diff --git a/package.json b/package.json index f8e872d..07abbcb 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,7 @@ "@ai-sdk/deepseek": "^2.0.17", "@ai-sdk/google": "^3.0.20", "@ai-sdk/openai": "^3.0.25", + "@ai-sdk/openai-compatible": "^2.0.27", "@aigne/core": "^1.72.0", "@aigne/openai": "^0.16.16", "@langchain/core": "^1.1.15", diff --git a/src/routes/other/testImage.ts b/src/routes/other/testImage.ts index 0ee30cf..c4aa78f 100644 --- a/src/routes/other/testImage.ts +++ b/src/routes/other/testImage.ts @@ -30,26 +30,5 @@ export default router.post( console.error(msg); res.status(500).send(error(msg)); } - - // try { - // const contentStr = await u.ai.generateImage( - // { - // prompt: "2D cat", - // imageBase64: [], - // aspectRatio: "16:9", - // size: "1K", - // }, - // { - // model: modelName, - // apiKey, - // baseURL, - // manufacturer, - // }, - // ); - // res.status(200).send(success(contentStr)); - // } catch (err: any) { - // const message = err?.response?.data?.error?.message || err?.error?.message || "模型调用失败"; - // res.status(500).send(error(message)); - // } }, ); diff --git a/src/routes/other/testVideo.ts b/src/routes/other/testVideo.ts index e699541..79ebc6d 100644 --- a/src/routes/other/testVideo.ts +++ b/src/routes/other/testVideo.ts @@ -25,7 +25,7 @@ export default router.post( savePath: "test.mp4", prompt: "stickman Dances", duration: 4, - resolution: "480p", + resolution: "720p", aspectRatio: "16:9", audio: false, }); diff --git a/src/utils/ai/image/index.ts b/src/utils/ai/image/index.ts index 6c10375..e60b5aa 100644 --- a/src/utils/ai/image/index.ts +++ b/src/utils/ai/image/index.ts @@ -7,6 +7,8 @@ import volcengine from "./owned/volcengine"; import kling from "./owned/kling"; import vidu from "./owned/vidu"; import runninghub from "./owned/runninghub"; +import apimart from "./owned/apimart"; +import other from "./owned/other"; import gemini from "./owned/gemini"; const urlToBase64 = async (url: string): Promise => { @@ -22,7 +24,8 @@ const modelInstance = { kling: kling, vidu: vidu, runninghub: runninghub, - apimart: null, + apimart: apimart, + other, } as const; export default async (input: ImageConfig, config?: AIConfig) => { @@ -33,6 +36,30 @@ export default async (input: ImageConfig, config?: AIConfig) => { const owned = modelList.find((m) => m.model === model); if (!owned) throw new Error("不支持的模型"); + // 补充图片的 base64 内容类型字符串 + if (input.imageBase64 && input.imageBase64.length > 0) { + input.imageBase64 = input.imageBase64.map((img) => { + if (img.startsWith("data:image/")) { + return img; + } + // 根据 base64 头部判断图片类型 + if (img.startsWith("/9j/")) { + return `data:image/jpeg;base64,${img}`; + } + if (img.startsWith("iVBORw")) { + return `data:image/png;base64,${img}`; + } + if (img.startsWith("R0lGOD")) { + return `data:image/gif;base64,${img}`; + } + if (img.startsWith("UklGR")) { + return `data:image/webp;base64,${img}`; + } + // 默认使用 png + return `data:image/png;base64,${img}`; + }); + } + let imageUrl = await manufacturerFn(input, { model, apiKey, baseURL }); if (!input.resType) input.resType = "b64"; if (input.resType === "b64" && imageUrl.startsWith("http")) imageUrl = await urlToBase64(imageUrl); diff --git a/src/utils/ai/image/modelList.ts b/src/utils/ai/image/modelList.ts index d5cad4b..1cce6d9 100644 --- a/src/utils/ai/image/modelList.ts +++ b/src/utils/ai/image/modelList.ts @@ -40,6 +40,12 @@ const modelList: Owned[] = [ type: "ti2i", }, //Vidu + { + manufacturer: "vidu", + model: "viduq1", + grid: false, + type: "i2i", + }, { manufacturer: "vidu", model: "viduq2", diff --git a/src/utils/ai/image/owned/apimart.ts b/src/utils/ai/image/owned/apimart.ts new file mode 100644 index 0000000..9ed4b80 --- /dev/null +++ b/src/utils/ai/image/owned/apimart.ts @@ -0,0 +1,31 @@ +import axios from "axios"; +import u from "@/utils"; +import FormData from "form-data"; +import axiosRetry from "axios-retry"; +import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai"; +import sharp from "sharp"; +import { pollTask } from "@/utils/ai/utils"; + +axiosRetry(axios, { retries: 3, retryDelay: () => 200 }); + +export default async (input: ImageConfig, config: AIConfig): Promise => { + if (!config.apiKey) throw new Error("缺少API Key"); + const apiKey = config.apiKey.replace("Bearer ", ""); + const taskRes = await axios.post( + `https://api.apimart.ai/v1/images/generations`, + { model: "gemini-3-pro-image-preview", prompt: input.prompt, size: input.aspectRatio, n: 1, resolution: input.size }, + { headers: { Authorization: apiKey } }, + ); + + if (taskRes.data.code !== 200 || !taskRes.data.data?.[0]?.task_id) throw new Error("任务创建失败: " + JSON.stringify(taskRes.data)); + + const taskId = taskRes.data.data[0].task_id; + return pollTask(async () => { + const res = await axios.get(`https://api.apimart.ai/v1/tasks/${taskId}`, { headers: { Authorization: apiKey }, params: { language: "en" } }); + if (res.data.code !== 200) return { completed: false, error: `查询失败: ${JSON.stringify(res.data)}` }; + const { status, result } = res.data.data; + if (status === "completed") return { completed: true, url: result?.images?.[0]?.url?.[0] }; + if (status === "failed" || status === "cancelled") return { completed: false, error: `任务${status}` }; + return { completed: false }; + }); +}; diff --git a/src/utils/ai/image/owned/gemini.ts b/src/utils/ai/image/owned/gemini.ts index f1c4123..c865394 100644 --- a/src/utils/ai/image/owned/gemini.ts +++ b/src/utils/ai/image/owned/gemini.ts @@ -16,13 +16,6 @@ export default async (input: ImageConfig, config: AIConfig): Promise => // 构建完整的提示词 const fullPrompt = input.systemPrompt ? `${input.systemPrompt}\n\n${input.prompt}` : input.prompt; - // 根据 size 配置映射到具体尺寸 - const sizeMap: Record = { - "1K": "1024x1024", - "2K": "2048x2048", - "4K": "4096x4096", - }; - const result = await generateText({ model: google.languageModel(config.model), prompt: fullPrompt + `请直接输出图片`, diff --git a/src/utils/ai/image/owned/kling.ts b/src/utils/ai/image/owned/kling.ts index fa32aa4..1f930f4 100644 --- a/src/utils/ai/image/owned/kling.ts +++ b/src/utils/ai/image/owned/kling.ts @@ -96,7 +96,7 @@ export default async (input: ImageConfig, config: AIConfig): Promise => } if (task_status === "succeed") { - return { completed: true, imageUrl: task_result?.images?.[0]?.url }; + return { completed: true, url: task_result?.images?.[0]?.url }; } return { completed: false }; diff --git a/src/utils/ai/image/owned/other.ts b/src/utils/ai/image/owned/other.ts new file mode 100644 index 0000000..6e72ef5 --- /dev/null +++ b/src/utils/ai/image/owned/other.ts @@ -0,0 +1,75 @@ +import "../type"; +import { generateImage, generateText } from "ai"; +import { createOpenAICompatible } from "@ai-sdk/openai-compatible"; + +export default async (input: ImageConfig, config: AIConfig): Promise => { + if (!config.model) throw new Error("缺少Model名称"); + if (!config.apiKey) throw new Error("缺少API Key"); + if (!config.baseURL) throw new Error("缺少baseUrl"); + + const apiKey = config.apiKey.replace("Bearer ", ""); + + const otherProvider = createOpenAICompatible({ + name: "xixixi", + baseURL: config.baseURL, + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }); + + // 根据 size 配置映射到具体尺寸 + const sizeMap: Record = { + "1K": "1024x1024", + "2K": "2048x2048", + "4K": "4096x4096", + }; + // 构建完整的提示词 + const fullPrompt = input.systemPrompt ? `${input.systemPrompt}\n\n${input.prompt}` : input.prompt; + const model = config.model; + if (model.includes("gemini") || model.includes("nano")) { + const result = await generateText({ + model: otherProvider.languageModel(model), + prompt: fullPrompt + `请直接输出图片`, + providerOptions: { + google: { + imageConfig: { + ...(config.model == "gemini-2.5-flash-image" + ? { aspectRatio: input.aspectRatio } + : { aspectRatio: input.aspectRatio, imageSize: input.size }), + }, + responseModalities: ["IMAGE"], + }, + }, + }); + if (result.files && result.files.length) { + let imageBase64; + for (const item of result.files) { + imageBase64 = `data:${item.mediaType};base64,${item.base64}`; + } + // 返回生成的图片 base64 + return imageBase64!; + } else { + if (!result.text) { + console.error(JSON.stringify(result.response, null, 2)); + throw new Error("图片生成失败"); + } + const match = result.text.match(/base64,([A-Za-z0-9+/=]+)/); + const base64Str = match && match[1] ? match[1] : result.text; + + // 返回生成的图片 base64 + return "data:image/jpeg;base64," + base64Str!; + } + } else { + const { image } = await generateImage({ + model: otherProvider.imageModel(model), + prompt: + input.imageBase64 && input.imageBase64.length + ? { text: fullPrompt + `请直接输出图片`, images: input.imageBase64 } + : fullPrompt + `请直接输出图片`, + aspectRatio: input.aspectRatio as "1:1" | "3:4" | "4:3" | "9:16" | "16:9", + size: sizeMap[input.size] ?? "1024x1024", + }); + + return image.base64; + } +}; diff --git a/src/utils/ai/image/owned/runninghub.ts b/src/utils/ai/image/owned/runninghub.ts index d2bb758..7a598c2 100644 --- a/src/utils/ai/image/owned/runninghub.ts +++ b/src/utils/ai/image/owned/runninghub.ts @@ -85,7 +85,7 @@ export default async (input: ImageConfig, config: AIConfig): Promise => return pollTask(async () => { const res = await axios.post(`https://www.runninghub.cn/task/openapi/outputs`, { taskId, apiKey: apiKey }); const { code, msg, data } = res.data; - if (code === 0 && msg === "success") return { completed: true, imageUrl: data?.[0]?.fileUrl }; + if (code === 0 && msg === "success") return { completed: true, url: data?.[0]?.fileUrl }; if (code === 804 || code === 813) return { completed: false }; if (code === 805) return { completed: false, error: `任务失败: ${data?.[0]?.failedReason?.exception_message || "未知原因"}` }; return { completed: false, error: `未知状态: code=${code}, msg=${msg}` }; diff --git a/src/utils/ai/image/owned/vidu.ts b/src/utils/ai/image/owned/vidu.ts index 9405117..99a8fee 100644 --- a/src/utils/ai/image/owned/vidu.ts +++ b/src/utils/ai/image/owned/vidu.ts @@ -21,7 +21,7 @@ export default async (input: ImageConfig, config: AIConfig): Promise => if (!config.model) throw new Error("缺少Model名称"); if (!config.apiKey) throw new Error("缺少API Key"); - const apiKey = "Token " + config.apiKey.replace(/Bearer\s+/g, "").trim(); + const apiKey = "Token " + config.apiKey.replace(/Token\s+/g, "").trim(); const viduq2Ratio = ["16:9", "9:16", "1:1", "3:4", "4:3", "21:9", "2:3", "3:2"]; const viduq1Ratio = ["16:9", "9:16", "1:1", "3:4", "4:3"]; let images: string[] = []; @@ -51,7 +51,6 @@ export default async (input: ImageConfig, config: AIConfig): Promise => else size = input.size; if (!viduq2Ratio.includes(input.aspectRatio)) throw new Error("不支持的图片比例:" + input.aspectRatio); } - console.log("%c Line:23 🍔 size", "background:#ffdd4d", size); const body: Record = { model: config.model, @@ -60,16 +59,15 @@ export default async (input: ImageConfig, config: AIConfig): Promise => resolution: size, ...(images.length && { images: images }), }; - console.log("%c Line:27 🍷 body", "background:#6ec1c2", body); + const urlObj = getApiUrl(config.baseURL!); try { const { data } = await axios.post(urlObj.requestUrl, body, { headers: { Authorization: apiKey } }); - console.log("%c Line:35 🥕 data", "background:#93c0a4", data); + const queryUrl = template({ id: data.task_id }, urlObj.queryUrl); - console.log("%c Line:53 🍋 queryUrl", "background:#465975", queryUrl); + return await pollTask(async () => { const { data: queryData } = await axios.get(queryUrl, { headers: { Authorization: apiKey } }); - console.log("%c Line:42 🍐 queryData", "background:#4fff4B", queryData); if (queryData.state !== 0) { return { completed: false, error: queryData.message || "查询任务失败" }; @@ -82,7 +80,7 @@ export default async (input: ImageConfig, config: AIConfig): Promise => } if (state === "succeed") { - return { completed: true, imageUrl: creations?.[0]?.url }; + return { completed: true, url: creations?.[0]?.url }; } return { completed: false }; diff --git a/src/utils/ai/utils.ts b/src/utils/ai/utils.ts index 8e38e1c..55aab17 100644 --- a/src/utils/ai/utils.ts +++ b/src/utils/ai/utils.ts @@ -4,6 +4,7 @@ interface ValidateResult { owned: (typeof modelList)[number]; images: string[]; hasStartEndType: boolean; + hasTextType: boolean; } /** @@ -14,19 +15,15 @@ interface ValidateResult { */ export const validateVideoConfig = (input: VideoConfig, config: AIConfig, customOwned?: (typeof modelList)[number]): ValidateResult => { if (!config.model) throw new Error("缺少Model名称"); - const owned = customOwned ?? modelList.find((m) => m.model === config.model); if (!owned) throw new Error(`不支持的模型: ${config.model}`); - const images = input.imageBase64 ?? []; - // 校验图片数量与模型类型是否匹配 const hasTextType = owned.type.includes("text"); const hasSingleImageType = owned.type.includes("singleImage"); const hasStartEndType = owned.type.some((t) => ["startEndRequired", "endFrameOptional", "startFrameOptional"].includes(t)); const hasMultiImageType = owned.type.includes("multiImage"); const hasReferenceType = owned.type.includes("reference"); - if (images.length === 0 && !hasTextType) { throw new Error(`模型 ${config.model} 不支持纯文本生成,需要提供图片`); } @@ -39,10 +36,9 @@ export const validateVideoConfig = (input: VideoConfig, config: AIConfig, custom if (images.length > 2 && !hasMultiImageType) { throw new Error(`模型 ${config.model} 不支持多图模式`); } - // 校验duration和resolution是否在支持范围内 const validDurationResolution = owned.durationResolutionMap.some( - (map) => map.duration.includes(input.duration) && map.resolution.includes(input.resolution), + (map) => map.duration.includes(input.duration) && map.resolution.includes(input.resolution as typeof map.resolution[number]), ); if (!validDurationResolution) { const supportedDurations = [...new Set(owned.durationResolutionMap.flatMap((m) => m.duration))].sort((a, b) => a - b); @@ -52,25 +48,29 @@ export const validateVideoConfig = (input: VideoConfig, config: AIConfig, custom `支持的duration: ${supportedDurations.join(", ")},支持的resolution: ${supportedResolutions.join(", ")}`, ); } - // 校验音频设置 if (input.audio && !owned.audio) { throw new Error(`模型 ${config.model} 不支持生成音频`); } - - return { owned, images, hasStartEndType }; + // 校验宽高比(仅文本生视频需要) + if (hasTextType && images.length === 0 && owned.aspectRatio.length > 0) { + if (!owned.aspectRatio.includes(input.aspectRatio as `${number}:${number}`)) { + throw new Error(`模型 ${config.model} 不支持宽高比 ${input.aspectRatio},支持的宽高比: ${owned.aspectRatio.join(", ")}`); + } + } + return { owned, images, hasStartEndType, hasTextType }; }; export const pollTask = async ( - queryFn: () => Promise<{ completed: boolean; imageUrl?: string; error?: string }>, + queryFn: () => Promise<{ completed: boolean; url?: string; error?: string }>, maxAttempts = 500, interval = 2000, ): Promise => { for (let i = 0; i < maxAttempts; i++) { await new Promise((resolve) => setTimeout(resolve, interval)); - const { completed, imageUrl, error } = await queryFn(); + const { completed, url, error } = await queryFn(); if (error) throw new Error(error); - if (completed && imageUrl) return imageUrl; + if (completed && url) return url; } throw new Error(`任务轮询超时,已尝试 ${maxAttempts} 次`); }; diff --git a/src/utils/ai/video/index.ts b/src/utils/ai/video/index.ts index 74628b4..cbc1b3c 100644 --- a/src/utils/ai/video/index.ts +++ b/src/utils/ai/video/index.ts @@ -6,12 +6,17 @@ import axios from "axios"; import volcengine from "./owned/volcengine"; import kling from "./owned/kling"; import vidu from "./owned/vidu"; +import wan from "./owned/wan"; +import runninghub from "./owned/runninghub"; +import gemini from "./owned/gemini"; const modelInstance = { volcengine: volcengine, kling: kling, vidu: vidu, - runninghub: null, + wan: wan, + gemini: gemini, + runninghub: runninghub, apimart: null, } as const; diff --git a/src/utils/ai/video/modelList.ts b/src/utils/ai/video/modelList.ts index 5d4a8dc..ae0748b 100644 --- a/src/utils/ai/video/modelList.ts +++ b/src/utils/ai/video/modelList.ts @@ -9,7 +9,7 @@ type VideoGenerationType = interface DurationResolutionMap { duration: number[]; - resolution: `${number}p`[]; + resolution: (`${number}p` | `${number}k`)[]; } interface Owned { manufacturer: string; @@ -22,58 +22,31 @@ interface Owned { const modelList: Owned[] = [ // ================== 火山引擎/豆包系列 ================== - // doubao-seedance-1-5-pro 文生视频 + // doubao-seedance-1-5-pro 文生视频/图生视频 { manufacturer: "volcengine", model: "doubao-seedance-1-5-pro-251215", durationResolutionMap: [{ duration: [4, 5, 6, 7, 8, 9, 10, 11, 12], resolution: ["480p", "720p", "1080p"] }], aspectRatio: ["16:9", "4:3", "1:1", "3:4", "9:16", "21:9"], - type: ["text"], + type: ["text", "endFrameOptional"], audio: true, }, - // doubao-seedance-1-5-pro 图生视频 - { - manufacturer: "volcengine", - model: "doubao-seedance-1-5-pro-251215", - durationResolutionMap: [{ duration: [4, 5, 6, 7, 8, 9, 10, 11, 12], resolution: ["480p", "720p", "1080p"] }], - aspectRatio: [], - type: ["endFrameOptional"], - audio: true, - }, - // doubao-seedance-1-0-pro 文生视频 + // doubao-seedance-1-0-pro 文生视频/图生视频 { manufacturer: "volcengine", model: "doubao-seedance-1-0-pro-250528", durationResolutionMap: [{ duration: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], resolution: ["480p", "720p", "1080p"] }], aspectRatio: ["16:9", "4:3", "1:1", "3:4", "9:16", "21:9"], - type: ["text"], + type: ["text", "endFrameOptional"], audio: false, }, - // doubao-seedance-1-0-pro 图生视频 - { - manufacturer: "volcengine", - model: "doubao-seedance-1-0-pro-250528", - durationResolutionMap: [{ duration: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], resolution: ["480p", "720p", "1080p"] }], - aspectRatio: [], - type: ["endFrameOptional"], - audio: false, - }, - // doubao-seedance-1-0-pro-fast 文生视频 + // doubao-seedance-1-0-pro-fast 文生视频/图生视频 { manufacturer: "volcengine", model: "doubao-seedance-1-0-pro-fast-251015", durationResolutionMap: [{ duration: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], resolution: ["480p", "720p", "1080p"] }], aspectRatio: ["16:9", "4:3", "1:1", "3:4", "9:16", "21:9"], - type: ["text"], - audio: false, - }, - // doubao-seedance-1-0-pro-fast 图生视频 - { - manufacturer: "volcengine", - model: "doubao-seedance-1-0-pro-fast-251015", - durationResolutionMap: [{ duration: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], resolution: ["480p", "720p", "1080p"] }], - aspectRatio: [], - type: ["singleImage"], + type: ["text", "singleImage"], audio: false, }, // doubao-seedance-1-0-lite-i2v 图生视频(仅支持图片模式) @@ -288,7 +261,210 @@ const modelList: Owned[] = [ type: ["singleImage", "reference", "startEndRequired"], audio: false, }, - // ================== sora系列 ================== + // ================== 万象系列 ================== + // wan2.6-t2v 文生视频(有声视频) + { + manufacturer: "wan", + model: "wan2.6-t2v", + durationResolutionMap: [{ duration: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], resolution: ["720p", "1080p"] }], + aspectRatio: ["16:9", "9:16", "1:1", "4:3", "3:4"], + type: ["text"], + audio: true, + }, + // wan2.5-t2v-preview 文生视频(有声视频) + { + manufacturer: "wan", + model: "wan2.5-t2v-preview", + durationResolutionMap: [{ duration: [5, 10], resolution: ["480p", "720p", "1080p"] }], + aspectRatio: ["16:9", "9:16", "1:1", "4:3", "3:4"], + type: ["text"], + audio: true, + }, + // wan2.2-t2v-plus 文生视频(无声视频) + { + manufacturer: "wan", + model: "wan2.2-t2v-plus", + durationResolutionMap: [{ duration: [5], resolution: ["480p", "1080p"] }], + aspectRatio: ["16:9", "9:16", "1:1", "4:3", "3:4"], + type: ["text"], + audio: false, + }, + // wanx2.1-t2v-turbo 文生视频(无声视频) + { + manufacturer: "wan", + model: "wanx2.1-t2v-turbo", + durationResolutionMap: [{ duration: [5], resolution: ["480p", "720p"] }], + aspectRatio: ["16:9", "9:16", "1:1", "4:3", "3:4"], + type: ["text"], + audio: false, + }, + // wanx2.1-t2v-plus 文生视频(无声视频) + { + manufacturer: "wan", + model: "wanx2.1-t2v-plus", + durationResolutionMap: [{ duration: [5], resolution: ["720p"] }], + aspectRatio: ["16:9", "9:16", "1:1", "4:3", "3:4"], + type: ["text"], + audio: false, + }, + // wan2.6-i2v-flash 图生视频(有声视频&无声视频) + { + manufacturer: "wan", + model: "wan2.6-i2v-flash", + durationResolutionMap: [{ duration: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], resolution: ["720p", "1080p"] }], + aspectRatio: [], + type: ["singleImage"], + audio: true, + }, + // wan2.6-i2v 图生视频(有声视频) + { + manufacturer: "wan", + model: "wan2.6-i2v", + durationResolutionMap: [{ duration: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], resolution: ["720p", "1080p"] }], + aspectRatio: [], + type: ["singleImage"], + audio: true, + }, + // wan2.5-i2v-preview 图生视频(有声视频) + { + manufacturer: "wan", + model: "wan2.5-i2v-preview", + durationResolutionMap: [{ duration: [5, 10], resolution: ["480p", "720p", "1080p"] }], + aspectRatio: [], + type: ["singleImage"], + audio: true, + }, + // wan2.2-i2v-flash 图生视频(无声视频) + { + manufacturer: "wan", + model: "wan2.2-i2v-flash", + durationResolutionMap: [{ duration: [5], resolution: ["480p", "720p", "1080p"] }], + aspectRatio: [], + type: ["singleImage"], + audio: false, + }, + // wan2.2-i2v-plus 图生视频(无声视频) + { + manufacturer: "wan", + model: "wan2.2-i2v-plus", + durationResolutionMap: [{ duration: [5], resolution: ["480p", "1080p"] }], + aspectRatio: [], + type: ["singleImage"], + audio: false, + }, + // wanx2.1-i2v-plus 图生视频(无声视频) + { + manufacturer: "wan", + model: "wanx2.1-i2v-plus", + durationResolutionMap: [{ duration: [5], resolution: ["720p"] }], + aspectRatio: [], + type: ["singleImage"], + audio: false, + }, + // wanx2.1-i2v-turbo 图生视频(无声视频) + { + manufacturer: "wan", + model: "wanx2.1-i2v-turbo", + durationResolutionMap: [{ duration: [3, 4, 5], resolution: ["480p", "720p"] }], + aspectRatio: [], + type: ["singleImage"], + audio: false, + }, + // wan2.2-kf2v-flash 首尾帧生视频(无声视频) + { + manufacturer: "wan", + model: "wan2.2-kf2v-flash", + durationResolutionMap: [{ duration: [5], resolution: ["480p", "720p", "1080p"] }], + aspectRatio: [], + type: ["startEndRequired"], + audio: false, + }, + // wanx2.1-kf2v-plus 首尾帧生视频(无声视频) + { + manufacturer: "wan", + model: "wanx2.1-kf2v-plus", + durationResolutionMap: [{ duration: [5], resolution: ["720p"] }], + aspectRatio: [], + type: ["startEndRequired"], + audio: false, + }, + // ================== Gemini Veo 系列 ================== + // Veo 3.1 预览版(支持音频) + { + manufacturer: "gemini", + model: "veo-3.1-generate-preview", + durationResolutionMap: [ + { duration: [4, 6], resolution: ["720p"] }, + { duration: [8], resolution: ["720p", "1080p"] }, + ], + aspectRatio: ["16:9", "9:16"], + type: ["text", "singleImage", "startEndRequired", "endFrameOptional", "reference"], + audio: true, + }, + // Veo 3.1 Fast 预览版(支持音频) + { + manufacturer: "gemini", + model: "veo-3.1-fast-generate-preview", + durationResolutionMap: [ + { duration: [4, 6], resolution: ["720p"] }, + { duration: [8], resolution: ["720p", "1080p"] }, + ], + aspectRatio: ["16:9", "9:16"], + type: ["text", "singleImage", "startEndRequired", "endFrameOptional", "reference"], + audio: true, + }, + // Veo 3 稳定版(支持音频) + { + manufacturer: "gemini", + model: "veo-3.0-generate-preview", + durationResolutionMap: [ + { duration: [4, 6], resolution: ["720p"] }, + { duration: [8], resolution: ["720p", "1080p"] }, + ], + aspectRatio: ["16:9", "9:16"], + type: ["text", "singleImage"], + audio: true, + }, + // Veo 3 Fast 稳定版(支持音频) + { + manufacturer: "gemini", + model: "veo-3.0-fast-generate-preview", + durationResolutionMap: [ + { duration: [4, 6], resolution: ["720p"] }, + { duration: [8], resolution: ["720p", "1080p"] }, + ], + aspectRatio: ["16:9", "9:16"], + type: ["text", "singleImage"], + audio: true, + }, + // Veo 2 稳定版(无音频) + { + manufacturer: "gemini", + model: "veo-2.0-generate-001", + durationResolutionMap: [{ duration: [5, 6, 7, 8], resolution: ["720p"] }], + aspectRatio: ["16:9", "9:16"], + type: ["text", "singleImage"], + audio: false, + }, + // ================== RunningHub 系列 ================== + // sora + { + manufacturer: "runninghub", + model: "sora", + durationResolutionMap: [{ duration: [10, 15], resolution: [] }], + aspectRatio: ["16:9", "9:16"], + type: ["singleImage", "text"], + audio: false, + }, + // sora 2 + { + manufacturer: "runninghub", + model: "sora-2", + durationResolutionMap: [{ duration: [15, 25], resolution: [] }], + aspectRatio: ["16:9", "9:16"], + type: ["singleImage", "text"], + audio: false, + }, ]; export default modelList; diff --git a/src/utils/ai/video/owned/gemini.ts b/src/utils/ai/video/owned/gemini.ts new file mode 100644 index 0000000..d4a7a06 --- /dev/null +++ b/src/utils/ai/video/owned/gemini.ts @@ -0,0 +1,62 @@ +import "../type"; +import fs from "fs"; +import path from "path"; +import axios from "axios"; +import { pollTask, validateVideoConfig } from "@/utils/ai/utils"; + +const buildInlineImage = (data: string) => ({ inlineData: { mimeType: "image/png", data } }); + +export default async (input: VideoConfig, config: AIConfig) => { + if (!config.model) throw new Error("缺少Model名称"); + if (!config.apiKey) throw new Error("缺少API Key"); + + const { owned, images, hasStartEndType } = validateVideoConfig(input, config); + const baseUrl = (config.baseURL || "https://generativelanguage.googleapis.com").replace(/\/+$/, ""); + const headers = { "x-goog-api-key": config.apiKey }; + + const instance: Record = { prompt: input.prompt }; + const parameters: Record = { + aspectRatio: input.aspectRatio, + durationSeconds: String(input.duration), + ...(input.resolution !== "720p" && { resolution: input.resolution }), + }; + + // 根据图片数量和模型能力决定图片用法 + const len = images.length; + const hasRef = owned.type.includes("reference"); + const hasSingle = owned.type.includes("singleImage"); + + if (len === 2 && hasStartEndType) { + instance.image = buildInlineImage(images[0]); + parameters.lastFrame = buildInlineImage(images[1]); + } else if (len === 1 && (hasSingle || hasStartEndType)) { + instance.image = buildInlineImage(images[0]); + } else if (len >= 1 && len <= 3 && hasRef) { + parameters.referenceImages = images.map((img) => ({ image: buildInlineImage(img), referenceType: "asset" })); + } + + const { data } = await axios.post( + `${baseUrl}/v1beta/models/${config.model}:predictLongRunning`, + { instances: [instance], parameters }, + { headers: { ...headers, "Content-Type": "application/json" } }, + ); + + if (!data.name) throw new Error("未获取到操作名称"); + + return pollTask(async () => { + const { data: status } = await axios.get(`${baseUrl}/v1beta/${data.name}`, { headers }); + const { done, response, error } = status; + + if (!done) return { completed: false }; + if (error) return { completed: false, error: `任务失败: ${error.message || JSON.stringify(error)}` }; + + const videoUri = response?.generateVideoResponse?.generatedSamples?.[0]?.video?.uri; + if (!videoUri) return { completed: false, error: "未获取到视频下载地址" }; + + const videoRes = await axios.get(videoUri, { headers, responseType: "arraybuffer", maxRedirects: 5 }); + const savePath = input.savePath.endsWith(".mp4") ? input.savePath : path.join(input.savePath, `gemini_${Date.now()}.mp4`); + fs.writeFileSync(savePath, Buffer.from(videoRes.data)); + + return { completed: true, url: savePath }; + }); +}; diff --git a/src/utils/ai/video/owned/kling.ts b/src/utils/ai/video/owned/kling.ts index 26e3a93..1d593e3 100644 --- a/src/utils/ai/video/owned/kling.ts +++ b/src/utils/ai/video/owned/kling.ts @@ -79,7 +79,7 @@ export default async (input: VideoConfig, config: AIConfig) => { if (!videoUrl) { return { completed: false, error: "任务成功但未返回视频URL" }; } - return { completed: true, imageUrl: videoUrl }; + return { completed: true, url: videoUrl }; } case "failed": return { completed: false, error: `任务失败: ${task?.task_status_msg || "未知原因"}` }; diff --git a/src/utils/ai/video/owned/runninghub.ts b/src/utils/ai/video/owned/runninghub.ts new file mode 100644 index 0000000..e9bed89 --- /dev/null +++ b/src/utils/ai/video/owned/runninghub.ts @@ -0,0 +1,86 @@ +import "../type"; +import axios from "axios"; +import sharp from "sharp"; +import FormData from "form-data"; +import { pollTask, validateVideoConfig } from "@/utils/ai/utils"; + +export default async (input: VideoConfig, config: AIConfig) => { + if (!config.apiKey) throw new Error("缺少API Key"); + + const { owned, images, hasTextType } = validateVideoConfig(input, config); + + const baseUrl = "https://www.runninghub.cn"; + const parts = (config.baseURL || "").split("|"); + const suffix = owned.model === "sora-2" ? "-pro" : ""; + + const image2videoUrl = parts[0] || `${baseUrl}/openapi/v2/rhart-video-s/image-to-video${suffix}`; + const text2videoUrl = parts[1] || `${baseUrl}/openapi/v2/rhart-video-s/text-to-video${suffix}`; + const queryUrl = parts[2] || `${baseUrl}/openapi/v2/rhart-video-s/{id}`; + const authorization = `Bearer ${config.apiKey}`; + + // 上传 base64 图片 + const uploadImage = async (base64Image: string): Promise => { + const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, ""); + let buffer: Buffer = Buffer.from(base64Data, "base64"); + const MAX_SIZE = 5 * 1024 * 1024; + + if (buffer.length > MAX_SIZE) { + for (let quality = 90; buffer.length > MAX_SIZE && quality > 10; quality -= 10) { + buffer = await sharp(buffer).jpeg({ quality, mozjpeg: true }).toBuffer(); + } + if (buffer.length > MAX_SIZE) { + const { width = 1920, height = 1080 } = await sharp(buffer).metadata(); + const scale = Math.sqrt(MAX_SIZE / buffer.length); + buffer = await sharp(buffer) + .resize({ width: Math.floor(width * scale), height: Math.floor(height * scale), fit: "inside" }) + .jpeg({ quality: 80, mozjpeg: true }) + .toBuffer(); + } + } + + const formData = new FormData(); + formData.append("file", buffer, { filename: "image.jpg", contentType: "image/jpeg" }); + + const { data } = await axios.post(`${baseUrl}/openapi/v2/media/upload/binary`, formData, { + headers: { Authorization: authorization }, + }); + + if (data.code !== 0 || !data.data?.download_url) { + throw new Error(`图片上传失败: ${JSON.stringify(data)}`); + } + return data.data.download_url; + }; + + // 提交任务 + const submitTask = async (url: string, body: Record) => { + const { data } = await axios.post(url, body, { + headers: { "Content-Type": "application/json", Authorization: authorization }, + }); + if (data.status === "FAILED") throw new Error(`任务提交失败: ${data.errorMessage || "未知错误"}`); + return { taskId: data.taskId, status: data.status, videoUrl: data.results?.[0]?.url }; + }; + + const isTextToVideo = images.length === 0 && hasTextType; + const submitUrl = isTextToVideo ? text2videoUrl : image2videoUrl; + const requestBody: Record = { + prompt: input.prompt, + duration: String(input.duration), + aspectRatio: input.aspectRatio, + ...(isTextToVideo ? {} : { imageUrl: await uploadImage(images[0]) }), + }; + + const { taskId, status, videoUrl } = await submitTask(submitUrl, requestBody); + if (status === "SUCCESS" && videoUrl) return { completed: true, videoUrl }; + + return await pollTask(async () => { + const { data } = await axios.get(queryUrl.replace("{id}", taskId), { + headers: { Authorization: authorization }, + }); + if (data.status === "SUCCESS") { + return data.results?.length ? { completed: true, videoUrl: data.results[0].url } : { completed: false, error: "任务成功但未返回视频链接" }; + } + if (data.status === "FAILED") return { completed: false, error: `任务失败: ${data.errorMessage || "未知错误"}` }; + if (data.status === "QUEUED" || data.status === "RUNNING") return { completed: false }; + return { completed: false, error: `未知状态: ${data.status}` }; + }); +}; diff --git a/src/utils/ai/video/owned/vidu.ts b/src/utils/ai/video/owned/vidu.ts index 0142fc3..a69e5f5 100644 --- a/src/utils/ai/video/owned/vidu.ts +++ b/src/utils/ai/video/owned/vidu.ts @@ -10,7 +10,10 @@ export default async (input: VideoConfig, config: AIConfig) => { throw new Error("至少需要提供prompt或图片"); } - const baseUrl = config.baseURL || "https://api.vidu.cn/ent/v2"; + const baseUrl = "https://api.vidu.cn/ent/v2"; + const [image2videoUrl = baseUrl + "/text2video", text2videoUrl = baseUrl + "/img2video", queryUrl = baseUrl + "/tasks"] = + config.baseURL!.split("|"); + const authorization = `Token ${config.apiKey}`; const hasImages = input.imageBase64 && input.imageBase64.length > 0; @@ -56,7 +59,7 @@ export default async (input: VideoConfig, config: AIConfig) => { requestBody.audio = input.audio; } - const response = await axios.post(`${baseUrl}/text2video`, requestBody, { + const response = await axios.post(text2videoUrl, requestBody, { headers: { "Content-Type": "application/json", Authorization: authorization, @@ -78,7 +81,7 @@ export default async (input: VideoConfig, config: AIConfig) => { requestBody.audio = input.audio; } - const response = await axios.post(`${baseUrl}/img2video`, requestBody, { + const response = await axios.post(image2videoUrl, requestBody, { headers: { "Content-Type": "application/json", Authorization: authorization, @@ -89,7 +92,7 @@ export default async (input: VideoConfig, config: AIConfig) => { // 轮询任务状态 return await pollTask(async () => { - const response = await axios.get(`${baseUrl}/tasks`, { + const response = await axios.get(queryUrl, { headers: { "Content-Type": "application/json", Authorization: authorization, @@ -111,8 +114,7 @@ export default async (input: VideoConfig, config: AIConfig) => { const creation = task.creations?.[0]; return { completed: true, - videoUrl: creation?.url, - coverUrl: creation?.cover_url, + url: creation?.url, }; } case "failed": diff --git a/src/utils/ai/video/owned/volcengine.ts b/src/utils/ai/video/owned/volcengine.ts index dc07fe8..25e1ec4 100644 --- a/src/utils/ai/video/owned/volcengine.ts +++ b/src/utils/ai/video/owned/volcengine.ts @@ -59,7 +59,7 @@ export default async (input: VideoConfig, config: AIConfig) => { switch (status) { case "succeeded": - return { completed: true, imageUrl: content?.video_url }; + return { completed: true, url: content?.video_url }; case "failed": case "cancelled": case "expired": diff --git a/src/utils/ai/video/owned/wan.ts b/src/utils/ai/video/owned/wan.ts new file mode 100644 index 0000000..94e11a4 --- /dev/null +++ b/src/utils/ai/video/owned/wan.ts @@ -0,0 +1,168 @@ +import "../type"; +import axios from "axios"; +import { pollTask, validateVideoConfig } from "@/utils/ai/utils"; + +// 根据分辨率档位和宽高比计算具体尺寸 +const getSizeFromConfig = (resolution: string, aspectRatio: string): string => { + const sizeMap: Record> = { + "480p": { + "16:9": "832*480", + "9:16": "480*832", + "1:1": "624*624", + }, + "720p": { + "16:9": "1280*720", + "9:16": "720*1280", + "1:1": "960*960", + "4:3": "1088*832", + "3:4": "832*1088", + }, + "1080p": { + "16:9": "1920*1080", + "9:16": "1080*1920", + "1:1": "1440*1440", + "4:3": "1632*1248", + "3:4": "1248*1632", + }, + }; + + const resolutionKey = resolution.toLowerCase(); + const size = sizeMap[resolutionKey]?.[aspectRatio]; + + if (!size) { + throw new Error(`不支持的分辨率(${resolution})和宽高比(${aspectRatio})组合`); + } + + return size; +}; + +export default async (input: VideoConfig, config: AIConfig) => { + if (!config.apiKey) throw new Error("缺少API Key"); + + const { owned, images, hasStartEndType, hasTextType } = validateVideoConfig(input, config); + + // 解析URL配置 + const baseUrl = "https://dashscope.aliyuncs.com/api/v1"; + const [ + i2vUrl = baseUrl + "/services/aigc/video-generation/video-synthesis", + kf2vUrl = baseUrl + "/services/aigc/image2video/video-synthesis", + queryUrl = baseUrl + "/tasks", + ] = (config.baseURL || "").split("|"); + + const types = owned.type; + const authorization = `Bearer ${config.apiKey}`; + + // 确定端点和构建请求体 + let submitUrl: string; + let body: Record; + + if (hasTextType && images.length === 0) { + // 文本生视频 + submitUrl = i2vUrl; + body = { + model: config.model, + input: { + prompt: input.prompt, + }, + parameters: { + size: getSizeFromConfig(input.resolution, input.aspectRatio), + duration: input.duration, + }, + }; + } else if (types.includes("singleImage")) { + // 图生视频 + submitUrl = i2vUrl; + body = { + model: config.model, + input: { + prompt: input.prompt, + img_url: images[0], + }, + parameters: { + resolution: input.resolution.toUpperCase(), + duration: input.duration, + }, + }; + // audio参数仅部分模型支持 + if (owned.audio && input.audio !== undefined) { + body.parameters.audio = input.audio; + } + } else if (hasStartEndType) { + // 首尾帧 + submitUrl = kf2vUrl; + const inputObj: Record = { + prompt: input.prompt, + first_frame_url: images[0], + }; + // 尾帧处理 + if (types.includes("startEndRequired")) { + inputObj.last_frame_url = images[1]; + } else if ((types.includes("endFrameOptional") || types.includes("startFrameOptional")) && images.length >= 2) { + inputObj.last_frame_url = images[1]; + } + body = { + model: config.model, + input: inputObj, + parameters: { + resolution: input.resolution.toUpperCase(), + duration: input.duration, + }, + }; + } else { + throw new Error(`不支持的视频生成类型: ${types.join(", ")}`); + } + + // 提交任务 + const submitResponse = await axios.post(submitUrl, body, { + headers: { + "Content-Type": "application/json", + Authorization: authorization, + "X-DashScope-Async": "enable", + }, + }); + + const submitData = submitResponse.data; + if (submitData.code) { + throw new Error(`任务提交失败: [${submitData.code}] ${submitData.message}`); + } + + const taskId = submitData.output?.task_id; + if (!taskId) { + throw new Error("任务提交失败: 未返回task_id"); + } + + // 轮询任务状态 + return await pollTask(async () => { + const response = await axios.get(`${queryUrl}/${taskId}`, { + headers: { Authorization: authorization }, + }); + + const data = response.data; + + // 顶层错误 + if (data.code) { + return { completed: false, error: `[${data.code}] ${data.message}` }; + } + + const taskStatus = data.output?.task_status; + + switch (taskStatus) { + case "SUCCEEDED": + return { completed: true, url: data.output?.video_url }; + case "FAILED": + return { + completed: false, + error: `任务失败: [${data.output?.code || "UNKNOWN"}] ${data.output?.message || "未知错误"}`, + }; + case "CANCELED": + return { completed: false, error: "任务已取消" }; + case "UNKNOWN": + return { completed: false, error: "任务不存在或状态未知" }; + case "PENDING": + case "RUNNING": + return { completed: false }; + default: + return { completed: false, error: `未知状态: ${taskStatus}` }; + } + }); +}; diff --git a/src/utils/ai/video/type.ts b/src/utils/ai/video/type.ts index 0706d2d..1687c0b 100644 --- a/src/utils/ai/video/type.ts +++ b/src/utils/ai/video/type.ts @@ -1,6 +1,6 @@ interface VideoConfig { duration: 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12; - resolution: "480p" | "720p" | "1080p"; + resolution: "480p" | "720p" | "1080p" | "2K" | "4K"; aspectRatio: "16:9" | "9:16"; prompt: string; savePath: string; diff --git a/yarn.lock b/yarn.lock index a00a10f..28551f2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -40,6 +40,14 @@ "@ai-sdk/provider" "3.0.7" "@ai-sdk/provider-utils" "4.0.13" +"@ai-sdk/openai-compatible@^2.0.27": + version "2.0.27" + resolved "https://registry.npmmirror.com/@ai-sdk/openai-compatible/-/openai-compatible-2.0.27.tgz#55c6bf3c59d71e71d9c337dbef8b764fa69e7ccd" + integrity sha512-YpAZe7OQuMkYqcM/m1BMX0xFn4QdhuL4qGo8sNaiLq1VjEeU/pPfz51rnlpCfCvYanUL5TjIZEbdclBUwLooSQ== + dependencies: + "@ai-sdk/provider" "3.0.7" + "@ai-sdk/provider-utils" "4.0.13" + "@ai-sdk/openai@^3.0.25": version "3.0.25" resolved "https://registry.npmmirror.com/@ai-sdk/openai/-/openai-3.0.25.tgz#452c8f8ed597468048569ec9476a0b5641888d2a"