| 
					
				 | 
			
			
				@@ -1,6 +1,6 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 // import pdf from 'pdf-parse'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 // import fs from 'fs'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import { CloudApi } from 'src/lib/ncloud'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import { CloudApi, CloudObject, CloudQuery } from 'src/lib/ncloud'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import mammoth from "mammoth"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import { Document } from '@langchain/core/documents'; 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -14,12 +14,47 @@ import { TensorFlowEmbeddings } from "@langchain/community/embeddings/tensorflow 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 export class AgentStory{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    url:string = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    content:string = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    docList:Array<Document> = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    constructor(){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    story:CloudObject|undefined 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 文件标题 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    title:string|undefined = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 文档标签 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    tags:Array<string>|undefined 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 文件源地址 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    url:string|undefined = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 文档完整纯文本内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    content:string|undefined = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 文档hash唯一值 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    hash:string|undefined = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 文档分割后的列表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    docList:Array<Document|any> = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    constructor(metadata:{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        url:string, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        title?:string, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        tags?:Array<string> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        this.url = metadata.url 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        this.title = metadata.title 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        this.tags = metadata.tags 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         setBackend() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    async save(){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if(!this.hash){ return } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        let query = new CloudQuery("Story"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        query.equalTo("hash",this.hash); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        let story = await query.first(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if(!story?.id){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            story = new CloudObject("Story"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        story.set({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            title: this.title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            url: this.url, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            content: this.content, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            hash: this.hash, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tags:this.tags 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        this.story = await story.save(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     async loader(url:string){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         let api = new CloudApi(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -34,6 +69,7 @@ export class AgentStory{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if(this.content){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             this.url = url 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        this.save(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return this.content 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -49,8 +85,12 @@ export class AgentStory{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } catch (err) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             console.error(err); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        this.hash = await arrayBufferToHASH(arrayBuffer) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         // let html = mammoth.convertToHtml(buffer) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         data = text?.value || ""; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 正则匹配所有 多个\n换行的字符 替换成一次换行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         data = data.replaceAll(/\n+/g,"\n") // 剔除多余换行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return {data} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -62,7 +102,7 @@ export class AgentStory{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         // 默认:递归字符文本分割器 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         let splitter = new RecursiveCharacterTextSplitter({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             chunkSize: options?.chunkSize || 500, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            chunkOverlap: options?.chunkOverlap || 100, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            chunkOverlap: options?.chunkOverlap || 150, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				            
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         let docOutput = await splitter.splitDocuments([ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -79,14 +119,51 @@ export class AgentStory{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				      * https://js.langchain.com/docs/integrations/text_embedding/tensorflow/ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				      * @returns  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				      */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    //  embedding vector(1536) NOT NULL -- NOTE: 1536 for ChatGPT 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    //  TensorFlow embedding vector(512) NOT NULL -- NOTE: 512 for Tensorflow 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    //  OpenAI embedding vector(1536) NOT NULL -- NOTE: 1536 for ChatGPT 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     async embedings(){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if(!this.docList?.length){return} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         const embeddings = new TensorFlowEmbeddings(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         let documentRes = await embeddings.embedDocuments(this.docList?.map(item=>item.pageContent)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         console.log(documentRes); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 向量持久化 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        documentRes.forEach(async (vector512:any,index)=>{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            /** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+             * metadata 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+             * pageContent 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+             */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            let document = this.docList[index] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            this.docList[index].vector512 = vector512 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            let hash = await arrayBufferToHASH(stringToArrayBuffer(document?.pageContent)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            let query = new CloudQuery("Document"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            query.equalTo("hash",hash); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            let docObj = await query.first() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if(!docObj?.id){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                docObj = new CloudObject("Document"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            docObj.set({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                metadata:document?.metadata, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                pageContent:document?.pageContent, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                vector512:vector512, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                hash:hash, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                story:this.story?.toPointer(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            docObj.save(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return documentRes; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    async destoryAllDocument(){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if(this.story?.id){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            let query = new CloudQuery("Document"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            query.equalTo("story",this.story?.id); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            let docList = await query.find(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            docList.forEach(doc=>{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                doc.destroy(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+         
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 export async function fetchFileBuffer(url: string): Promise<Buffer> { 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -132,4 +209,54 @@ async function setBackend(){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         backend&&await tf.setBackend(backend); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         await tf.ready(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  export async function arrayBufferToHASH(arrayBuffer:any) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 使用 SubtleCrypto API 计算哈希 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const hashBuffer = await crypto.subtle.digest('SHA-256', arrayBuffer); // 使用 SHA-256 代替 MD5 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const hashArray = Array.from(new Uint8Array(hashBuffer)); // 将缓冲区转换为字节数组 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const hashHex = hashArray.map(b => ('00' + b.toString(16)).slice(-2)).join(''); // 转换为十六进制字符串 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return hashHex; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+export function stringToArrayBuffer(str:string) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 创建一个与字符串长度相同的Uint8Array 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const encoder = new TextEncoder(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return encoder.encode(str).buffer; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+export async function EmbedQuery(str:any):Promise<Array<number>>{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const embeddings = new TensorFlowEmbeddings(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let documentRes = await embeddings.embedQuery(str); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return documentRes 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 向量余弦相似度计算 */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+export function RetriveAllDocument(vector1: Array<number>, docList: Array<any>): Array<any> { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    docList.forEach(doc => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const vector512 = doc.vector512; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        doc.similarity = cosineSimilarity(vector1, vector512); // 计算余弦相似度并存储 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 按照相似度排序,降序排列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    docList.sort((a, b) => b.similarity - a.similarity); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return docList; // 返回排序后的docList 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function dotProduct(vectorA: number[], vectorB: number[]): number { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return vectorA.reduce((sum, value, index) => sum + value * vectorB[index], 0); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function magnitude(vector: number[]): number { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function cosineSimilarity(vectorA: number[], vectorB: number[]): number { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const dotProd = dotProduct(vectorA, vectorB); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const magnitudeA = magnitude(vectorA); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const magnitudeB = magnitude(vectorB); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (magnitudeA === 0 || magnitudeB === 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        throw new Error("One or both vectors are zero vectors, cannot compute cosine similarity."); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return dotProd / (magnitudeA * magnitudeB); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 |