rtc_prd/server.py
seaislee1209 066eb8f820 feat: music-creation page + MiniMax API integration + Flutter dev setup
Music Creation Page:
- Vinyl 3D flip to view lyrics, tonearm animation, glow rotation effect
- Circular SVG progress ring, speech bubble feedback, confirm dialog
- Playlist modal, free creation input, lyrics formatting optimization
- MiniMax API real music generation with SSE streaming progress

Backend:
- FastAPI proxy server.py for MiniMax API calls
- Music + lyrics file persistence to Capybara music/ directory
- GET /api/playlist endpoint for auto-building playlist from files

UI/UX Refinements:
- frontend-design skill compliance across all pages
- Glassmorphism effects, modal interactions, scroll tap prevention
- iPhone 12 Pro responsive layout (390x844)

Flutter Development Preparation:
- Installed flutter-expert skill with 6 reference docs
- Added 5 Cursor Rules: official Flutter, clean architecture, UI performance, testing, Dart standards

Assets:
- 9 Capybara music MP3 files + lyrics TXT files
- MiniMax API documentation

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-06 18:23:19 +08:00

297 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
import time
import uvicorn
import requests
import json
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from dotenv import load_dotenv
# Load Environment Variables
load_dotenv()
MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY")
VOLCENGINE_API_KEY = os.getenv("VOLCENGINE_API_KEY")
if not MINIMAX_API_KEY:
print("Warning: MINIMAX_API_KEY not found in .env")
# Initialize FastAPI
app = FastAPI()
# Allow CORS for local frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Request Models
class MusicRequest(BaseModel):
text: str
mood: str = "custom" # 'chill', 'happy', 'sleepy', 'random', 'custom'
# Minimax Constants
MINIMAX_GROUP_ID = "YOUR_GROUP_ID"
BASE_URL_CHAT = "https://api.minimax.chat/v1/text/chatcompletion_v2"
BASE_URL_MUSIC = "https://api.minimaxi.com/v1/music_generation"
# Load System Prompt
try:
with open("prompts/music_director.md", "r", encoding="utf-8") as f:
SYSTEM_PROMPT = f.read()
except FileNotFoundError:
SYSTEM_PROMPT = "You are a music director AI. Convert user input into JSON with 'style' (English description) and 'lyrics' (Chinese, structured)."
print("Warning: prompts/music_director.md not found, using default.")
def sse_event(data):
"""Format a dict as an SSE data line."""
return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
@app.post("/api/create_music")
def create_music(req: MusicRequest):
"""SSE streaming endpoint pushes progress events to the frontend."""
print(f"[Music] Received request: {req.text} [{req.mood}]", flush=True)
def event_stream():
import sys
def log(msg):
print(msg, flush=True)
sys.stdout.flush()
# ── Stage 1: LLM "Music Director" ────────────────────────
log("[Stage 1] Starting LLM call...")
yield sse_event({
"stage": "lyrics",
"progress": 10,
"message": "AI 正在创作词曲..."
})
director_input = f"用户场景描述: {req.text}。 (预设氛围参考: {req.mood})"
try:
chat_resp = requests.post(
BASE_URL_CHAT,
headers={
"Authorization": f"Bearer {MINIMAX_API_KEY}",
"Content-Type": "application/json"
},
json={
"model": "abab6.5s-chat",
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": director_input}
]
},
timeout=30
)
chat_data = chat_resp.json()
log(f"[Debug] Chat API status: {chat_resp.status_code}, resp keys: {list(chat_data.keys())}")
if "choices" not in chat_data or not chat_data["choices"]:
base = chat_data.get("base_resp", {})
raise ValueError(f"Chat API error ({base.get('status_code')}): {base.get('status_msg')}")
content_str = chat_data["choices"][0]["message"]["content"]
log(f"[Debug] LLM raw output (first 200): {content_str[:200]}")
# Strip markdown code fences if present
content_str = content_str.strip()
if content_str.startswith("```"):
content_str = re.sub(r'^```\w*\n?', '', content_str)
content_str = re.sub(r'```$', '', content_str).strip()
# Try to extract JSON from response
json_match = re.search(r'\{[\s\S]*\}', content_str)
if json_match:
metadata = json.loads(json_match.group())
else:
raise ValueError(f"No JSON in LLM response: {content_str[:100]}")
style_val = metadata.get("style", "")
lyrics_val = metadata.get("lyrics", "")
log(f"[Director] Style: {style_val[:80]}")
log(f"[Director] Lyrics (first 60): {lyrics_val[:60]}")
yield sse_event({
"stage": "lyrics_done",
"progress": 25,
"message": "词曲创作完成!准备生成音乐..."
})
except Exception as e:
log(f"[Error] Director LLM Failed: {e}")
metadata = {
"style": "Lofi hip hop, relaxing, slow tempo, water sounds",
"lyrics": "[Inst]"
}
yield sse_event({
"stage": "lyrics_fallback",
"progress": 25,
"message": "使用默认风格,准备生成音乐..."
})
# ── Stage 2: Music Generation ────────────────────────────
yield sse_event({
"stage": "music",
"progress": 30,
"message": "正在生成音乐,请耐心等待..."
})
try:
raw_lyrics = metadata.get("lyrics") or ""
# API requires lyrics >= 1 char
if not raw_lyrics.strip() or "[instrumental]" in raw_lyrics.lower():
raw_lyrics = "[Inst]"
music_payload = {
"model": "music-2.5",
"prompt": metadata.get("style", "Pop music"),
"lyrics": raw_lyrics,
"audio_setting": {
"sample_rate": 44100,
"bitrate": 256000,
"format": "mp3"
}
}
log(f"[Debug] Music payload prompt: {music_payload['prompt'][:80]}")
log(f"[Debug] Music payload lyrics (first 60): {music_payload['lyrics'][:60]}")
music_resp = requests.post(
BASE_URL_MUSIC,
headers={
"Authorization": f"Bearer {MINIMAX_API_KEY}",
"Content-Type": "application/json"
},
json=music_payload,
timeout=120
)
music_data = music_resp.json()
base_resp = music_data.get("base_resp", {})
log(f"[Debug] Music API status: {music_resp.status_code}, base_resp: {base_resp}")
if music_data.get("data") and music_data["data"].get("audio"):
hex_audio = music_data["data"]["audio"]
log(f"[OK] Music generated! Audio hex length: {len(hex_audio)}")
# ── Stage 3: Saving ──────────────────────────────
yield sse_event({
"stage": "saving",
"progress": 90,
"message": "音乐生成完成,正在保存..."
})
save_dir = os.path.join(os.path.dirname(__file__) or ".", "Capybara music")
os.makedirs(save_dir, exist_ok=True)
safe_name = re.sub(r'[^\w\u4e00-\u9fff]', '', req.text)[:20] or "ai_song"
filename = f"{safe_name}_{int(time.time())}.mp3"
filepath = os.path.join(save_dir, filename)
audio_bytes = bytes.fromhex(hex_audio)
with open(filepath, "wb") as f:
f.write(audio_bytes)
log(f"[Saved] {filepath}")
# Save lyrics txt
lyrics_text = metadata.get("lyrics", "")
if lyrics_text:
lyrics_dir = os.path.join(save_dir, "lyrics")
os.makedirs(lyrics_dir, exist_ok=True)
lyrics_filename = f"{safe_name}_{int(time.time())}.txt"
with open(os.path.join(lyrics_dir, lyrics_filename), "w", encoding="utf-8") as lf:
lf.write(lyrics_text)
relative_path = f"Capybara music/{filename}"
# ── Done ─────────────────────────────────────────
yield sse_event({
"stage": "done",
"progress": 100,
"message": "新歌出炉!",
"status": "success",
"file_path": relative_path,
"metadata": metadata
})
else:
error_msg = base_resp.get("status_msg", "unknown")
error_code = base_resp.get("status_code", -1)
log(f"[Error] Music Gen failed: {error_code} - {error_msg}")
yield sse_event({
"stage": "error",
"progress": 0,
"message": f"生成失败 ({error_code}): {error_msg}"
})
except requests.exceptions.Timeout:
log("[Error] Music Gen Timeout")
yield sse_event({
"stage": "error",
"progress": 0,
"message": "音乐生成超时,请稍后再试"
})
except Exception as e:
log(f"[Error] API exception: {e}")
yield sse_event({
"stage": "error",
"progress": 0,
"message": f"服务器错误: {str(e)}"
})
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
"Connection": "keep-alive"
}
)
@app.get("/api/playlist")
def get_playlist():
"""Scan Capybara music/ directory and return full playlist with lyrics."""
music_dir = os.path.join(os.path.dirname(__file__) or ".", "Capybara music")
lyrics_dir = os.path.join(music_dir, "lyrics")
playlist = []
if not os.path.isdir(music_dir):
return {"playlist": []}
for f in sorted(os.listdir(music_dir)):
if not f.lower().endswith(".mp3"):
continue
name = f[:-4] # strip .mp3
# Read lyrics if available
lyrics = ""
lyrics_file = os.path.join(lyrics_dir, name + ".txt")
if os.path.isfile(lyrics_file):
try:
with open(lyrics_file, "r", encoding="utf-8") as lf:
lyrics = lf.read()
except Exception:
pass
# Display title: strip timestamp suffix like _1770367350
title = re.sub(r'_\d{10,}$', '', name)
playlist.append({
"title": title,
"audioUrl": f"Capybara music/{f}",
"lyrics": lyrics
})
return {"playlist": playlist}
if __name__ == "__main__":
print("[Server] Music Server running on http://localhost:3000")
uvicorn.run(app, host="0.0.0.0", port=3000)