// import pdf from 'pdf-parse'; // import fs from 'fs'; import { CloudApi } from 'src/lib/ncloud'; import mammoth from "mammoth"; import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; import { Document } from '@langchain/core/documents'; import * as tf from '@tensorflow/tfjs-core'; // import "@tensorflow/tfjs-backend-cpu"; // import '@tensorflow/tfjs-backend-webgpu'; import '@tensorflow/tfjs-backend-webgl'; // import '@tensorflow/tfjs-backend-wasm'; import { TensorFlowEmbeddings } from "@langchain/community/embeddings/tensorflow"; export class AgentStory{ url:string = "" content:string = "" docList:Array = [] constructor(){ setBackend() } async loader(url:string){ let api = new CloudApi(); let result; if(url?.endsWith(".docx")){ result = await this.loadDocx(url) } if(!result){ result = await api.fetch("agent/loader",{url:url}) } this.content = result?.data || null if(this.content){ this.url = url } return this.content } async loadDocx(url:string){ let data:any const response = await fetch(url); const arrayBuffer:any = await response.arrayBuffer(); let text; try { text = await mammoth.extractRawText({arrayBuffer:arrayBuffer}); // 浏览器 直接传递 arrayBuffer } catch (err) { console.error(err); } // let html = mammoth.convertToHtml(buffer) data = text?.value || ""; data = data.replaceAll(/\n+/g,"\n") // 剔除多余换行 return {data} } async splitter(options?:{ chunkSize:number, chunkOverlap:number }){ if(!this.content) return // 默认:递归字符文本分割器 let splitter = new RecursiveCharacterTextSplitter({ chunkSize: options?.chunkSize || 500, chunkOverlap: options?.chunkOverlap || 100, }); let docOutput = await splitter.splitDocuments([ new Document({ pageContent: this.content }), ]); console.log(docOutput) this.docList = docOutput return this.docList } /** * 文本向量提取 * @see * https://js.langchain.com/docs/integrations/text_embedding/tensorflow/ * @returns */ // embedding vector(1536) NOT NULL -- NOTE: 1536 for ChatGPT async embedings(){ if(!this.docList?.length){return} const embeddings = new TensorFlowEmbeddings(); let documentRes = await embeddings.embedDocuments(this.docList?.map(item=>item.pageContent)); console.log(documentRes); return documentRes; } } export async function fetchFileBuffer(url: string): Promise { const response = await fetch(url); if (!response.ok) { throw new Error(`Failed to fetch file: ${response.status} ${response.statusText}`); } const arrayBuffer = await response.arrayBuffer(); return Buffer.from(arrayBuffer); } async function setBackend(){ let backend let WebGPU = (navigator as any).gpu if (WebGPU) { // WebGPU is supported // console.log(WebGPU) backend = "webgpu" } else { // WebGPU is not supported } let glcanvas = document.createElement('canvas'); let WebGL = glcanvas.getContext('webgl') || glcanvas.getContext('experimental-webgl'); if (WebGL) { // console.log(WebGL) // WebGL is supported if(!backend) backend = "webgl" } else { // WebGL is not supported } if (typeof WebAssembly === 'object' && typeof WebAssembly.instantiate === 'function') { // WebAssembly is supported // console.log(WebAssembly) if(!backend) backend = "wasm" } else { // WebAssembly is not supported } backend&&await tf.setBackend(backend); await tf.ready(); return }