import os import re import time import uvicorn import requests import json from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from dotenv import load_dotenv # Load Environment Variables load_dotenv() MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY") VOLCENGINE_API_KEY = os.getenv("VOLCENGINE_API_KEY") if not MINIMAX_API_KEY: print("Warning: MINIMAX_API_KEY not found in .env") # Initialize FastAPI app = FastAPI() # Allow CORS for local frontend app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Request Models class MusicRequest(BaseModel): text: str mood: str = "custom" # 'chill', 'happy', 'sleepy', 'random', 'custom' # Minimax Constants MINIMAX_GROUP_ID = "YOUR_GROUP_ID" BASE_URL_CHAT = "https://api.minimax.chat/v1/text/chatcompletion_v2" BASE_URL_MUSIC = "https://api.minimaxi.com/v1/music_generation" # Load System Prompt try: with open("prompts/music_director.md", "r", encoding="utf-8") as f: SYSTEM_PROMPT = f.read() except FileNotFoundError: SYSTEM_PROMPT = "You are a music director AI. Convert user input into JSON with 'style' (English description) and 'lyrics' (Chinese, structured)." print("Warning: prompts/music_director.md not found, using default.") def sse_event(data): """Format a dict as an SSE data line.""" return f"data: {json.dumps(data, ensure_ascii=False)}\n\n" @app.post("/api/create_music") def create_music(req: MusicRequest): """SSE streaming endpoint – pushes progress events to the frontend.""" print(f"[Music] Received request: {req.text} [{req.mood}]", flush=True) def event_stream(): import sys def log(msg): print(msg, flush=True) sys.stdout.flush() # ── Stage 1: LLM "Music Director" ──────────────────────── log("[Stage 1] Starting LLM call...") yield sse_event({ "stage": "lyrics", "progress": 10, "message": "AI 正在创作词曲..." }) director_input = f"用户场景描述: {req.text}。 (预设氛围参考: {req.mood})" try: chat_resp = requests.post( BASE_URL_CHAT, headers={ "Authorization": f"Bearer {MINIMAX_API_KEY}", "Content-Type": "application/json" }, json={ "model": "abab6.5s-chat", "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": director_input} ] }, timeout=30 ) chat_data = chat_resp.json() log(f"[Debug] Chat API status: {chat_resp.status_code}, resp keys: {list(chat_data.keys())}") if "choices" not in chat_data or not chat_data["choices"]: base = chat_data.get("base_resp", {}) raise ValueError(f"Chat API error ({base.get('status_code')}): {base.get('status_msg')}") content_str = chat_data["choices"][0]["message"]["content"] log(f"[Debug] LLM raw output (first 200): {content_str[:200]}") # Strip markdown code fences if present content_str = content_str.strip() if content_str.startswith("```"): content_str = re.sub(r'^```\w*\n?', '', content_str) content_str = re.sub(r'```$', '', content_str).strip() # Try to extract JSON from response json_match = re.search(r'\{[\s\S]*\}', content_str) if json_match: metadata = json.loads(json_match.group()) else: raise ValueError(f"No JSON in LLM response: {content_str[:100]}") style_val = metadata.get("style", "") lyrics_val = metadata.get("lyrics", "") log(f"[Director] Style: {style_val[:80]}") log(f"[Director] Lyrics (first 60): {lyrics_val[:60]}") yield sse_event({ "stage": "lyrics_done", "progress": 25, "message": "词曲创作完成!准备生成音乐..." }) except Exception as e: log(f"[Error] Director LLM Failed: {e}") metadata = { "style": "Lofi hip hop, relaxing, slow tempo, water sounds", "lyrics": "[Inst]" } yield sse_event({ "stage": "lyrics_fallback", "progress": 25, "message": "使用默认风格,准备生成音乐..." }) # ── Stage 2: Music Generation ──────────────────────────── yield sse_event({ "stage": "music", "progress": 30, "message": "正在生成音乐,请耐心等待..." }) try: raw_lyrics = metadata.get("lyrics") or "" # API requires lyrics >= 1 char if not raw_lyrics.strip() or "[instrumental]" in raw_lyrics.lower(): raw_lyrics = "[Inst]" music_payload = { "model": "music-2.5", "prompt": metadata.get("style", "Pop music"), "lyrics": raw_lyrics, "audio_setting": { "sample_rate": 44100, "bitrate": 256000, "format": "mp3" } } log(f"[Debug] Music payload prompt: {music_payload['prompt'][:80]}") log(f"[Debug] Music payload lyrics (first 60): {music_payload['lyrics'][:60]}") music_resp = requests.post( BASE_URL_MUSIC, headers={ "Authorization": f"Bearer {MINIMAX_API_KEY}", "Content-Type": "application/json" }, json=music_payload, timeout=120 ) music_data = music_resp.json() base_resp = music_data.get("base_resp", {}) log(f"[Debug] Music API status: {music_resp.status_code}, base_resp: {base_resp}") if music_data.get("data") and music_data["data"].get("audio"): hex_audio = music_data["data"]["audio"] log(f"[OK] Music generated! Audio hex length: {len(hex_audio)}") # ── Stage 3: Saving ────────────────────────────── yield sse_event({ "stage": "saving", "progress": 90, "message": "音乐生成完成,正在保存..." }) save_dir = os.path.join(os.path.dirname(__file__) or ".", "Capybara music") os.makedirs(save_dir, exist_ok=True) safe_name = re.sub(r'[^\w\u4e00-\u9fff]', '', req.text)[:20] or "ai_song" filename = f"{safe_name}_{int(time.time())}.mp3" filepath = os.path.join(save_dir, filename) audio_bytes = bytes.fromhex(hex_audio) with open(filepath, "wb") as f: f.write(audio_bytes) log(f"[Saved] {filepath}") # Save lyrics txt lyrics_text = metadata.get("lyrics", "") if lyrics_text: lyrics_dir = os.path.join(save_dir, "lyrics") os.makedirs(lyrics_dir, exist_ok=True) lyrics_filename = f"{safe_name}_{int(time.time())}.txt" with open(os.path.join(lyrics_dir, lyrics_filename), "w", encoding="utf-8") as lf: lf.write(lyrics_text) relative_path = f"Capybara music/{filename}" # ── Done ───────────────────────────────────────── yield sse_event({ "stage": "done", "progress": 100, "message": "新歌出炉!", "status": "success", "file_path": relative_path, "metadata": metadata }) else: error_msg = base_resp.get("status_msg", "unknown") error_code = base_resp.get("status_code", -1) log(f"[Error] Music Gen failed: {error_code} - {error_msg}") yield sse_event({ "stage": "error", "progress": 0, "message": f"生成失败 ({error_code}): {error_msg}" }) except requests.exceptions.Timeout: log("[Error] Music Gen Timeout") yield sse_event({ "stage": "error", "progress": 0, "message": "音乐生成超时,请稍后再试" }) except Exception as e: log(f"[Error] API exception: {e}") yield sse_event({ "stage": "error", "progress": 0, "message": f"服务器错误: {str(e)}" }) return StreamingResponse( event_stream(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", "Connection": "keep-alive" } ) @app.get("/api/playlist") def get_playlist(): """Scan Capybara music/ directory and return full playlist with lyrics.""" music_dir = os.path.join(os.path.dirname(__file__) or ".", "Capybara music") lyrics_dir = os.path.join(music_dir, "lyrics") playlist = [] if not os.path.isdir(music_dir): return {"playlist": []} for f in sorted(os.listdir(music_dir)): if not f.lower().endswith(".mp3"): continue name = f[:-4] # strip .mp3 # Read lyrics if available lyrics = "" lyrics_file = os.path.join(lyrics_dir, name + ".txt") if os.path.isfile(lyrics_file): try: with open(lyrics_file, "r", encoding="utf-8") as lf: lyrics = lf.read() except Exception: pass # Display title: strip timestamp suffix like _1770367350 title = re.sub(r'_\d{10,}$', '', name) playlist.append({ "title": title, "audioUrl": f"Capybara music/{f}", "lyrics": lyrics }) return {"playlist": playlist} if __name__ == "__main__": print("[Server] Music Server running on http://localhost:3000") uvicorn.run(app, host="0.0.0.0", port=3000)