"""Volcano Engine TOS file upload utility using official TOS SDK.""" import hashlib import uuid import logging from django.conf import settings logger = logging.getLogger(__name__) CONTENT_TYPE_MAP = { 'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', 'png': 'image/png', 'webp': 'image/webp', 'gif': 'image/gif', 'bmp': 'image/bmp', 'tiff': 'image/tiff', 'mp4': 'video/mp4', 'mov': 'video/quicktime', 'mp3': 'audio/mpeg', 'wav': 'audio/wav', } _client = None def get_tos_client(): import tos global _client if _client is None: endpoint = settings.TOS_ENDPOINT.replace('https://', '').replace('http://', '') _client = tos.TosClientV2( ak=settings.TOS_ACCESS_KEY, sk=settings.TOS_SECRET_KEY, endpoint=endpoint, region=settings.TOS_REGION, ) return _client def upload_file(file_obj, folder='uploads'): """Upload a file to TOS bucket with content-hash dedup, return its public URL. Uses MD5 hash of file content as the object key. If the same file has already been uploaded, the existing URL is returned without re-uploading, saving storage and bandwidth. """ ext = file_obj.name.rsplit('.', 1)[-1].lower() content_type = CONTENT_TYPE_MAP.get(ext, 'application/octet-stream') client = get_tos_client() content = file_obj.read() # Use content hash as key for dedup content_hash = hashlib.sha256(content).hexdigest() key = f'{folder}/{content_hash}.{ext}' url = f'{settings.TOS_CDN_DOMAIN}/{key}' # Check if object already exists — skip upload if so try: client.head_object(bucket=settings.TOS_BUCKET, key=key) logger.info('TOS dedup hit: %s', key) return url except Exception as e: err_str = str(e).lower() if '404' not in err_str and 'not found' not in err_str and 'nosuchkey' not in err_str: logger.warning('TOS head_object unexpected error (proceeding with upload): %s', e) client.put_object( bucket=settings.TOS_BUCKET, key=key, content=content, content_type=content_type, ) return url def upload_from_file_path(file_path, folder='uploads', content_type=None): """Upload a local file to TOS by path (streaming, no full memory load). Returns the permanent CDN URL. """ ext = file_path.rsplit('.', 1)[-1].lower() if '.' in file_path else 'bin' if not content_type: content_type = CONTENT_TYPE_MAP.get(ext, 'application/octet-stream') # Use content hash for dedup h = hashlib.sha256() with open(file_path, 'rb') as f: for chunk in iter(lambda: f.read(8192), b''): h.update(chunk) content_hash = h.hexdigest() key = f'{folder}/{content_hash}.{ext}' url = f'{settings.TOS_CDN_DOMAIN}/{key}' client = get_tos_client() try: client.head_object(bucket=settings.TOS_BUCKET, key=key) logger.info('TOS dedup hit: %s', key) return url except Exception as e: # Only proceed if object not found (404). Re-raise on auth/config errors. err_str = str(e).lower() if '404' not in err_str and 'not found' not in err_str and 'nosuchkey' not in err_str: logger.warning('TOS head_object unexpected error (proceeding with upload): %s', e) with open(file_path, 'rb') as f: client.put_object( bucket=settings.TOS_BUCKET, key=key, content=f, content_type=content_type, ) return url def upload_from_url(source_url, folder='results'): """Download a file from a URL and upload to TOS, return permanent CDN URL.""" import requests as req resp = req.get(source_url, timeout=120, stream=True) resp.raise_for_status() content = resp.content content_type = resp.headers.get('Content-Type', 'video/mp4') ext = 'mp4' # Seedance always returns mp4 key = f'{folder}/{uuid.uuid4().hex}.{ext}' client = get_tos_client() client.put_object( bucket=settings.TOS_BUCKET, key=key, content=content, content_type=content_type, ) return f'{settings.TOS_CDN_DOMAIN}/{key}'