rtc_prd/server.py

import os
import re
import time
import uvicorn
import requests
import json
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from dotenv import load_dotenv

# Load Environment Variables
load_dotenv()
MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY")
VOLCENGINE_API_KEY = os.getenv("VOLCENGINE_API_KEY")

if not MINIMAX_API_KEY:
    print("Warning: MINIMAX_API_KEY not found in .env")

# Initialize FastAPI
app = FastAPI()

# Allow CORS for local frontend
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Request Models
class MusicRequest(BaseModel):
    text: str
    mood: str = "custom"  # 'chill', 'happy', 'sleepy', 'random', 'custom'

# Minimax Constants
MINIMAX_GROUP_ID = "YOUR_GROUP_ID"
BASE_URL_CHAT = "https://api.minimax.chat/v1/text/chatcompletion_v2"
BASE_URL_MUSIC = "https://api.minimaxi.com/v1/music_generation"

# Load System Prompt
try:
    with open("prompts/music_director.md", "r", encoding="utf-8") as f:
        SYSTEM_PROMPT = f.read()
except FileNotFoundError:
    SYSTEM_PROMPT = "You are a music director AI. Convert user input into JSON with 'style' (English description) and 'lyrics' (Chinese, structured)."
    print("Warning: prompts/music_director.md not found, using default.")


def sse_event(data):
    """Format a dict as an SSE data line."""
    return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"


@app.post("/api/create_music")
def create_music(req: MusicRequest):
    """SSE streaming endpoint – pushes progress events to the frontend."""
    print(f"[Music] Received request: {req.text} [{req.mood}]", flush=True)

    def event_stream():
        import sys
        def log(msg):
            print(msg, flush=True)
            sys.stdout.flush()

        # ── Stage 1: LLM "Music Director" ────────────────────────
        log("[Stage 1] Starting LLM call...")
        yield sse_event({
            "stage": "lyrics",
            "progress": 10,
            "message": "AI 正在创作词曲..."
        })

        director_input = f"用户场景描述: {req.text}。 (预设氛围参考: {req.mood})"

        try:
            chat_resp = requests.post(
                BASE_URL_CHAT,
                headers={
                    "Authorization": f"Bearer {MINIMAX_API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "abab6.5s-chat",
                    "messages": [
                        {"role": "system", "content": SYSTEM_PROMPT},
                        {"role": "user", "content": director_input}
                    ]
                },
                timeout=30
            )

            chat_data = chat_resp.json()
            log(f"[Debug] Chat API status: {chat_resp.status_code}, resp keys: {list(chat_data.keys())}")
            if "choices" not in chat_data or not chat_data["choices"]:
                base = chat_data.get("base_resp", {})
                raise ValueError(f"Chat API error ({base.get('status_code')}): {base.get('status_msg')}")
            content_str = chat_data["choices"][0]["message"]["content"]
            log(f"[Debug] LLM raw output (first 200): {content_str[:200]}")
            # Strip markdown code fences if present
            content_str = content_str.strip()
            if content_str.startswith("```"):
                content_str = re.sub(r'^```\w*\n?', '', content_str)
                content_str = re.sub(r'```$', '', content_str).strip()
            # Try to extract JSON from response
            json_match = re.search(r'\{[\s\S]*\}', content_str)
            if json_match:
                metadata = json.loads(json_match.group())
            else:
                raise ValueError(f"No JSON in LLM response: {content_str[:100]}")

            style_val = metadata.get("style", "")
            lyrics_val = metadata.get("lyrics", "")
            log(f"[Director] Style: {style_val[:80]}")
            log(f"[Director] Lyrics (first 60): {lyrics_val[:60]}")

            yield sse_event({
                "stage": "lyrics_done",
                "progress": 25,
                "message": "词曲创作完成！准备生成音乐..."
            })

        except Exception as e:
            log(f"[Error] Director LLM Failed: {e}")
            metadata = {
                "style": "Lofi hip hop, relaxing, slow tempo, water sounds",
                "lyrics": "[Inst]"
            }
            yield sse_event({
                "stage": "lyrics_fallback",
                "progress": 25,
                "message": "使用默认风格，准备生成音乐..."
            })

        # ── Stage 2: Music Generation ────────────────────────────
        yield sse_event({
            "stage": "music",
            "progress": 30,
            "message": "正在生成音乐，请耐心等待..."
        })

        try:
            raw_lyrics = metadata.get("lyrics") or ""
            # API requires lyrics >= 1 char
            if not raw_lyrics.strip() or "[instrumental]" in raw_lyrics.lower():
                raw_lyrics = "[Inst]"

            music_payload = {
                "model": "music-2.5",
                "prompt": metadata.get("style", "Pop music"),
                "lyrics": raw_lyrics,
                "audio_setting": {
                    "sample_rate": 44100,
                    "bitrate": 256000,
                    "format": "mp3"
                }
            }
            log(f"[Debug] Music payload prompt: {music_payload['prompt'][:80]}")
            log(f"[Debug] Music payload lyrics (first 60): {music_payload['lyrics'][:60]}")

            music_resp = requests.post(
                BASE_URL_MUSIC,
                headers={
                    "Authorization": f"Bearer {MINIMAX_API_KEY}",
                    "Content-Type": "application/json"
                },
                json=music_payload,
                timeout=120
            )

            music_data = music_resp.json()
            base_resp = music_data.get("base_resp", {})
            log(f"[Debug] Music API status: {music_resp.status_code}, base_resp: {base_resp}")

            if music_data.get("data") and music_data["data"].get("audio"):
                hex_audio = music_data["data"]["audio"]
                log(f"[OK] Music generated! Audio hex length: {len(hex_audio)}")

                # ── Stage 3: Saving ──────────────────────────────
                yield sse_event({
                    "stage": "saving",
                    "progress": 90,
                    "message": "音乐生成完成，正在保存..."
                })

                save_dir = os.path.join(os.path.dirname(__file__) or ".", "Capybara music")
                os.makedirs(save_dir, exist_ok=True)

                safe_name = re.sub(r'[^\w\u4e00-\u9fff]', '', req.text)[:20] or "ai_song"
                filename = f"{safe_name}_{int(time.time())}.mp3"
                filepath = os.path.join(save_dir, filename)

                audio_bytes = bytes.fromhex(hex_audio)
                with open(filepath, "wb") as f:
                    f.write(audio_bytes)
                log(f"[Saved] {filepath}")

                # Save lyrics txt
                lyrics_text = metadata.get("lyrics", "")
                if lyrics_text:
                    lyrics_dir = os.path.join(save_dir, "lyrics")
                    os.makedirs(lyrics_dir, exist_ok=True)
                    lyrics_filename = f"{safe_name}_{int(time.time())}.txt"
                    with open(os.path.join(lyrics_dir, lyrics_filename), "w", encoding="utf-8") as lf:
                        lf.write(lyrics_text)

                relative_path = f"Capybara music/{filename}"

                # ── Done ─────────────────────────────────────────
                yield sse_event({
                    "stage": "done",
                    "progress": 100,
                    "message": "新歌出炉！",
                    "status": "success",
                    "file_path": relative_path,
                    "metadata": metadata
                })
            else:
                error_msg = base_resp.get("status_msg", "unknown")
                error_code = base_resp.get("status_code", -1)
                log(f"[Error] Music Gen failed: {error_code} - {error_msg}")
                yield sse_event({
                    "stage": "error",
                    "progress": 0,
                    "message": f"生成失败 ({error_code}): {error_msg}"
                })

        except requests.exceptions.Timeout:
            log("[Error] Music Gen Timeout")
            yield sse_event({
                "stage": "error",
                "progress": 0,
                "message": "音乐生成超时，请稍后再试"
            })
        except Exception as e:
            log(f"[Error] API exception: {e}")
            yield sse_event({
                "stage": "error",
                "progress": 0,
                "message": f"服务器错误: {str(e)}"
            })

    return StreamingResponse(
        event_stream(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "X-Accel-Buffering": "no",
            "Connection": "keep-alive"
        }
    )


@app.get("/api/playlist")
def get_playlist():
    """Scan Capybara music/ directory and return full playlist with lyrics."""
    music_dir = os.path.join(os.path.dirname(__file__) or ".", "Capybara music")
    lyrics_dir = os.path.join(music_dir, "lyrics")

    playlist = []
    if not os.path.isdir(music_dir):
        return {"playlist": []}

    for f in sorted(os.listdir(music_dir)):
        if not f.lower().endswith(".mp3"):
            continue

        name = f[:-4]  # strip .mp3

        # Read lyrics if available
        lyrics = ""
        lyrics_file = os.path.join(lyrics_dir, name + ".txt")
        if os.path.isfile(lyrics_file):
            try:
                with open(lyrics_file, "r", encoding="utf-8") as lf:
                    lyrics = lf.read()
            except Exception:
                pass

        # Display title: strip timestamp suffix like _1770367350
        title = re.sub(r'_\d{10,}$', '', name)

        playlist.append({
            "title": title,
            "audioUrl": f"Capybara music/{f}",
            "lyrics": lyrics
        })

    return {"playlist": playlist}


if __name__ == "__main__":
    print("[Server] Music Server running on http://localhost:3000")
    uvicorn.run(app, host="0.0.0.0", port=3000)