补充模型文件
This commit is contained in:
parent
265dee7729
commit
ec8a459cd9
25
data/models/all-MiniLM-L6-v2/config.json
Normal file
25
data/models/all-MiniLM-L6-v2/config.json
Normal file
@ -0,0 +1,25 @@
|
||||
{
|
||||
"_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"architectures": [
|
||||
"BertModel"
|
||||
],
|
||||
"attention_probs_dropout_prob": 0.1,
|
||||
"classifier_dropout": null,
|
||||
"gradient_checkpointing": false,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"hidden_size": 384,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 1536,
|
||||
"layer_norm_eps": 1e-12,
|
||||
"max_position_embeddings": 512,
|
||||
"model_type": "bert",
|
||||
"num_attention_heads": 12,
|
||||
"num_hidden_layers": 6,
|
||||
"pad_token_id": 0,
|
||||
"position_embedding_type": "absolute",
|
||||
"transformers_version": "4.29.2",
|
||||
"type_vocab_size": 2,
|
||||
"use_cache": true,
|
||||
"vocab_size": 30522
|
||||
}
|
||||
30686
data/models/all-MiniLM-L6-v2/tokenizer.json
Normal file
30686
data/models/all-MiniLM-L6-v2/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,33 +1,31 @@
|
||||
import { pipeline, env as transformersEnv, FeatureExtractionPipeline } from "@huggingface/transformers";
|
||||
import path from "path";
|
||||
import fs from "fs";
|
||||
import getPath from "@/utils/getPath";
|
||||
|
||||
const modelDir = path.join(
|
||||
typeof process.versions?.electron !== "undefined" ? require("electron").app.getPath("userData") : process.cwd(),
|
||||
"data",
|
||||
"models",
|
||||
"all-MiniLM-L6-v2",
|
||||
);
|
||||
// ── 模型配置 ──
|
||||
const modelOnnxFile = ["all-MiniLM-L6-v2", "onnx", "model_fp16.onnx"]; // 模型文件路径
|
||||
const modelDtype = "fp16" as const; // 量化类型:fp32
|
||||
|
||||
let extractor: FeatureExtractionPipeline | null = null;
|
||||
|
||||
export async function initEmbedding(): Promise<void> {
|
||||
if (extractor) return;
|
||||
|
||||
const requiredFiles = ["config.json", "tokenizer.json", "onnx/model.onnx"];
|
||||
for (const file of requiredFiles) {
|
||||
const filePath = path.join(modelDir, file);
|
||||
if (!fs.existsSync(filePath)) {
|
||||
throw new Error(`文件不存在: ${filePath}`);
|
||||
}
|
||||
//todo 模型配置放到这里
|
||||
|
||||
const onnxPath = path.join(getPath("models"), ...modelOnnxFile);
|
||||
if (!fs.existsSync(onnxPath)) {
|
||||
throw new Error(`Embedding 模型文件不存在: ${onnxPath}`);
|
||||
}
|
||||
|
||||
transformersEnv.allowRemoteModels = false;
|
||||
transformersEnv.allowLocalModels = true;
|
||||
transformersEnv.localModelPath = path.dirname(modelDir).replace(/\\/g, "/") + "/";
|
||||
transformersEnv.localModelPath = getPath("models").replace(/\\/g, "/") + "/";
|
||||
|
||||
// @ts-ignore
|
||||
extractor = await pipeline("feature-extraction", path.basename(modelDir), { dtype: "fp32" });
|
||||
const modelFolder = modelOnnxFile[0];
|
||||
// @ts-ignore - pipeline 重载联合类型过于复杂
|
||||
extractor = await pipeline("feature-extraction", modelFolder, { dtype: modelDtype });
|
||||
}
|
||||
|
||||
export async function getEmbedding(text: string): Promise<number[]> {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user