Browse Source

feat: text embedings in angular

ryanemax 3 months ago
parent
commit
ced2f9f061

File diff suppressed because it is too large
+ 1168 - 131
package-lock.json


+ 5 - 0
package.json

@@ -27,18 +27,23 @@
     "@capacitor/keyboard": "6.0.3",
     "@capacitor/status-bar": "6.0.2",
     "@ionic/angular": "^8.0.0",
+    "@langchain/community": "^0.3.19",
     "@langchain/core": "^0.3.23",
     "@langchain/openai": "^0.3.14",
+    "@tensorflow-models/universal-sentence-encoder": "^1.3.3",
     "@tensorflow/tfjs": "^4.22.0",
+    "@tensorflow/tfjs-backend-cpu": "^4.22.0",
     "@tensorflow/tfjs-backend-wasm": "^4.22.0",
     "@tensorflow/tfjs-backend-webgl": "^4.22.0",
     "@tensorflow/tfjs-backend-webgpu": "^4.22.0",
+    "@tensorflow/tfjs-converter": "^4.22.0",
     "@tensorflow/tfjs-core": "^4.22.0",
     "@types/pdf-parse": "^1.1.4",
     "@vladmandic/face-api": "^1.7.14",
     "fmode-ng": "^0.0.63",
     "ionicons": "^7.2.1",
     "langchain": "^0.3.7",
+    "mammoth": "^1.8.0",
     "parse": "^5.3.0",
     "pdf-parse": "^1.1.1",
     "rxjs": "~7.8.0",

+ 29 - 10
rag-server/api/agent/loader/routes.js

@@ -1,14 +1,18 @@
 const express = require('express');
-const pdf = require('pdf-parse');
 const fs = require('fs');
 const path = require('path');
-
 const router = express.Router();
 
+const pdf = require('pdf-parse');
+const mammoth = require("mammoth");
+
+
 // POST /loader 路由
 router.post('/loader', async (req, res) => {
-    const { url } = req.body; // 从请求体中获取 URL
+    let url = req.body?.url; // 从请求体中获取 URL
 
+    console.log(req.body)
+    console.log(req.query)
     if (!url) {
         return res.status(400).json({ error: 'No URL provided' });
     }
@@ -18,20 +22,35 @@ router.post('/loader', async (req, res) => {
         const response = await fetch(url);
 
         if (!response.ok) {
-            throw new Error(`Failed to fetch PDF: ${response.statusText}`);
+            throw new Error(`Failed to fetch: ${response.statusText}`);
         }
 
-        const buffer = await response.buffer(); // 获取响应的 buffer
+        const arrayBuffer = await response.arrayBuffer();
+        const buffer = Buffer.from(arrayBuffer); // 将 ArrayBuffer 转换为 Buffer
+        // console.log(buffer)
 
-        // 解析 PDF 文件
-        const data = await pdf(buffer);
+        let type,data
+        if(url?.endsWith(".pdf")){
+            type = "pdf"
+            data = await pdf(buffer);
+        }
+        if(url?.endsWith(".docx")){
+            type = "docx"
+            let text = await mammoth.extractRawText(buffer)
+            // let html = mammoth.convertToHtml(buffer)
+            data = text?.value || "";
+        }
 
         // 返回解析后的数据
-        res.json(data);
+        res.json({
+            code:200,
+            type:type,
+            data:data,
+        });
     } catch (error) {
         // 处理错误
-        res.status(500).json({ error: 'Failed to parse PDF', details: error.message });
+        res.status(500).json({ error: 'Failed to parse File', details: error.message });
     }
 });
 
-module.exports = router;
+module.exports = router;

+ 4 - 2
rag-server/dev-server.js

@@ -7,8 +7,8 @@ const api = new ParseServer({
   databaseURI: 'postgresql://dev:666@dev.fmode.cn:5432/dev',
   appId: 'dev',
   masterKey: 'devmk',
-          masterKeyIps: ['0.0.0.0/0','::/0'] ,
-          allowClientClassCreation:true,
+  masterKeyIps: ['0.0.0.0/0','::/0'] ,
+  allowClientClassCreation:true,
   allowHeaders:["*"],
   allowOrigin:"*",
   // fileKey: 'myFileKey',
@@ -22,6 +22,8 @@ const api = new ParseServer({
 async function main(){
   await api.start();
 
+  app.use(express.json());
+
   // Serve the Parse API at /parse URL prefix
   app.use('/parse', api.app);
 

+ 183 - 0
rag-server/package-lock.json

@@ -9,6 +9,7 @@
       "version": "1.0.0",
       "license": "ISC",
       "dependencies": {
+        "mammoth": "^1.8.0",
         "parse-server": "^7.3.0",
         "pg-promise": "^11.10.2",
         "shelljs": "^0.8.5"
@@ -2481,6 +2482,15 @@
         "@types/webidl-conversions": "*"
       }
     },
+    "node_modules/@xmldom/xmldom": {
+      "version": "0.8.10",
+      "resolved": "https://registry.npmmirror.com/@xmldom/xmldom/-/xmldom-0.8.10.tgz",
+      "integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
     "node_modules/abort-controller": {
       "version": "3.0.0",
       "resolved": "https://registry.npmmirror.com/abort-controller/-/abort-controller-3.0.0.tgz",
@@ -2604,6 +2614,15 @@
       "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==",
       "license": "ISC"
     },
+    "node_modules/argparse": {
+      "version": "1.0.10",
+      "resolved": "https://registry.npmmirror.com/argparse/-/argparse-1.0.10.tgz",
+      "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
+      "license": "MIT",
+      "dependencies": {
+        "sprintf-js": "~1.0.2"
+      }
+    },
     "node_modules/array-flatten": {
       "version": "1.1.1",
       "resolved": "https://registry.npmmirror.com/array-flatten/-/array-flatten-1.1.1.tgz",
@@ -2775,6 +2794,12 @@
         "readable-stream": "^3.4.0"
       }
     },
+    "node_modules/bluebird": {
+      "version": "3.4.7",
+      "resolved": "https://registry.npmmirror.com/bluebird/-/bluebird-3.4.7.tgz",
+      "integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==",
+      "license": "MIT"
+    },
     "node_modules/bn.js": {
       "version": "4.12.1",
       "resolved": "https://registry.npmmirror.com/bn.js/-/bn.js-4.12.1.tgz",
@@ -3352,6 +3377,12 @@
         "node": ">=8"
       }
     },
+    "node_modules/dingbat-to-unicode": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmmirror.com/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz",
+      "integrity": "sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==",
+      "license": "BSD-2-Clause"
+    },
     "node_modules/dset": {
       "version": "3.1.4",
       "resolved": "https://registry.npmmirror.com/dset/-/dset-3.1.4.tgz",
@@ -3361,6 +3392,15 @@
         "node": ">=4"
       }
     },
+    "node_modules/duck": {
+      "version": "0.1.12",
+      "resolved": "https://registry.npmmirror.com/duck/-/duck-0.1.12.tgz",
+      "integrity": "sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==",
+      "license": "BSD",
+      "dependencies": {
+        "underscore": "^1.13.1"
+      }
+    },
     "node_modules/dunder-proto": {
       "version": "1.0.0",
       "resolved": "https://registry.npmmirror.com/dunder-proto/-/dunder-proto-1.0.0.tgz",
@@ -4900,6 +4940,12 @@
         "node": ">= 4"
       }
     },
+    "node_modules/immediate": {
+      "version": "3.0.6",
+      "resolved": "https://registry.npmmirror.com/immediate/-/immediate-3.0.6.tgz",
+      "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==",
+      "license": "MIT"
+    },
     "node_modules/import-fresh": {
       "version": "3.3.0",
       "resolved": "https://registry.npmmirror.com/import-fresh/-/import-fresh-3.3.0.tgz",
@@ -5070,6 +5116,12 @@
       "integrity": "sha512-cyA56iCMHAh5CdzjJIa4aohJyeO1YbwLi3Jc35MmRU6poroFjIGZzUzupGiRPOjgHg9TLu43xbpwXk523fMxKA==",
       "license": "MIT"
     },
+    "node_modules/isarray": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmmirror.com/isarray/-/isarray-1.0.0.tgz",
+      "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
+      "license": "MIT"
+    },
     "node_modules/isexe": {
       "version": "2.0.0",
       "resolved": "https://registry.npmmirror.com/isexe/-/isexe-2.0.0.tgz",
@@ -5271,6 +5323,48 @@
         "extsprintf": "^1.2.0"
       }
     },
+    "node_modules/jszip": {
+      "version": "3.10.1",
+      "resolved": "https://registry.npmmirror.com/jszip/-/jszip-3.10.1.tgz",
+      "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==",
+      "license": "(MIT OR GPL-3.0-or-later)",
+      "dependencies": {
+        "lie": "~3.3.0",
+        "pako": "~1.0.2",
+        "readable-stream": "~2.3.6",
+        "setimmediate": "^1.0.5"
+      }
+    },
+    "node_modules/jszip/node_modules/readable-stream": {
+      "version": "2.3.8",
+      "resolved": "https://registry.npmmirror.com/readable-stream/-/readable-stream-2.3.8.tgz",
+      "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
+      "license": "MIT",
+      "dependencies": {
+        "core-util-is": "~1.0.0",
+        "inherits": "~2.0.3",
+        "isarray": "~1.0.0",
+        "process-nextick-args": "~2.0.0",
+        "safe-buffer": "~5.1.1",
+        "string_decoder": "~1.1.1",
+        "util-deprecate": "~1.0.1"
+      }
+    },
+    "node_modules/jszip/node_modules/safe-buffer": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.1.2.tgz",
+      "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
+      "license": "MIT"
+    },
+    "node_modules/jszip/node_modules/string_decoder": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmmirror.com/string_decoder/-/string_decoder-1.1.1.tgz",
+      "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
+      "license": "MIT",
+      "dependencies": {
+        "safe-buffer": "~5.1.0"
+      }
+    },
     "node_modules/jwa": {
       "version": "2.0.0",
       "resolved": "https://registry.npmmirror.com/jwa/-/jwa-2.0.0.tgz",
@@ -5362,6 +5456,15 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/lie": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmmirror.com/lie/-/lie-3.3.0.tgz",
+      "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==",
+      "license": "MIT",
+      "dependencies": {
+        "immediate": "~3.0.5"
+      }
+    },
     "node_modules/limiter": {
       "version": "1.1.5",
       "resolved": "https://registry.npmmirror.com/limiter/-/limiter-1.1.5.tgz",
@@ -5493,6 +5596,17 @@
       "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
       "license": "Apache-2.0"
     },
+    "node_modules/lop": {
+      "version": "0.4.2",
+      "resolved": "https://registry.npmmirror.com/lop/-/lop-0.4.2.tgz",
+      "integrity": "sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "duck": "^0.1.12",
+        "option": "~0.2.1",
+        "underscore": "^1.13.1"
+      }
+    },
     "node_modules/lru-cache": {
       "version": "10.2.2",
       "resolved": "https://registry.npmmirror.com/lru-cache/-/lru-cache-10.2.2.tgz",
@@ -5533,6 +5647,30 @@
         "es5-ext": "~0.10.2"
       }
     },
+    "node_modules/mammoth": {
+      "version": "1.8.0",
+      "resolved": "https://registry.npmmirror.com/mammoth/-/mammoth-1.8.0.tgz",
+      "integrity": "sha512-pJNfxSk9IEGVpau+tsZFz22ofjUsl2mnA5eT8PjPs2n0BP+rhVte4Nez6FdgEuxv3IGI3afiV46ImKqTGDVlbA==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "@xmldom/xmldom": "^0.8.6",
+        "argparse": "~1.0.3",
+        "base64-js": "^1.5.1",
+        "bluebird": "~3.4.0",
+        "dingbat-to-unicode": "^1.0.1",
+        "jszip": "^3.7.1",
+        "lop": "^0.4.1",
+        "path-is-absolute": "^1.0.0",
+        "underscore": "^1.13.1",
+        "xmlbuilder": "^10.0.0"
+      },
+      "bin": {
+        "mammoth": "bin/mammoth"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      }
+    },
     "node_modules/math-intrinsics": {
       "version": "1.0.0",
       "resolved": "https://registry.npmmirror.com/math-intrinsics/-/math-intrinsics-1.0.0.tgz",
@@ -5916,6 +6054,12 @@
         "fn.name": "1.x.x"
       }
     },
+    "node_modules/option": {
+      "version": "0.2.4",
+      "resolved": "https://registry.npmmirror.com/option/-/option-0.2.4.tgz",
+      "integrity": "sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==",
+      "license": "BSD-2-Clause"
+    },
     "node_modules/optionator": {
       "version": "0.9.4",
       "resolved": "https://registry.npmmirror.com/optionator/-/optionator-0.9.4.tgz",
@@ -5977,6 +6121,12 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/pako": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmmirror.com/pako/-/pako-1.0.11.tgz",
+      "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
+      "license": "(MIT AND Zlib)"
+    },
     "node_modules/parent-module": {
       "version": "1.0.1",
       "resolved": "https://registry.npmmirror.com/parent-module/-/parent-module-1.0.1.tgz",
@@ -6516,6 +6666,12 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/process-nextick-args": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmmirror.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
+      "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
+      "license": "MIT"
+    },
     "node_modules/process-warning": {
       "version": "2.3.2",
       "resolved": "https://registry.npmmirror.com/process-warning/-/process-warning-2.3.2.tgz",
@@ -7016,6 +7172,12 @@
       "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
       "license": "ISC"
     },
+    "node_modules/setimmediate": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmmirror.com/setimmediate/-/setimmediate-1.0.5.tgz",
+      "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
+      "license": "MIT"
+    },
     "node_modules/setprototypeof": {
       "version": "1.2.0",
       "resolved": "https://registry.npmmirror.com/setprototypeof/-/setprototypeof-1.2.0.tgz",
@@ -7228,6 +7390,12 @@
         "node": ">= 10.x"
       }
     },
+    "node_modules/sprintf-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmmirror.com/sprintf-js/-/sprintf-js-1.0.3.tgz",
+      "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==",
+      "license": "BSD-3-Clause"
+    },
     "node_modules/sshpk": {
       "version": "1.18.0",
       "resolved": "https://registry.npmmirror.com/sshpk/-/sshpk-1.18.0.tgz",
@@ -7661,6 +7829,12 @@
         "node": ">= 0.6"
       }
     },
+    "node_modules/underscore": {
+      "version": "1.13.7",
+      "resolved": "https://registry.npmmirror.com/underscore/-/underscore-1.13.7.tgz",
+      "integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==",
+      "license": "MIT"
+    },
     "node_modules/undici-types": {
       "version": "6.20.0",
       "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-6.20.0.tgz",
@@ -8025,6 +8199,15 @@
         }
       }
     },
+    "node_modules/xmlbuilder": {
+      "version": "10.1.1",
+      "resolved": "https://registry.npmmirror.com/xmlbuilder/-/xmlbuilder-10.1.1.tgz",
+      "integrity": "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
     "node_modules/xmlhttprequest": {
       "version": "1.8.0",
       "resolved": "https://registry.npmmirror.com/xmlhttprequest/-/xmlhttprequest-1.8.0.tgz",

+ 1 - 0
rag-server/package.json

@@ -9,6 +9,7 @@
   "license": "ISC",
   "description": "",
   "dependencies": {
+    "mammoth": "^1.8.0",
     "parse-server": "^7.3.0",
     "pg-promise": "^11.10.2",
     "shelljs": "^0.8.5"

+ 408 - 0
src/lib/ncloud.ts

@@ -0,0 +1,408 @@
+// CloudObject.ts
+export class CloudObject {
+    className: string;
+    id: string | null = null;
+    createdAt:any;
+    updatedAt:any;
+    data: Record<string, any> = {};
+
+    constructor(className: string) {
+        this.className = className;
+    }
+
+    toPointer() {
+        return { "__type": "Pointer", "className": this.className, "objectId": this.id };
+    }
+
+    set(json: Record<string, any>) {
+        Object.keys(json).forEach(key => {
+            if (["objectId", "id", "createdAt", "updatedAt"].indexOf(key) > -1) {
+                return;
+            }
+            this.data[key] = json[key];
+        });
+    }
+
+    get(key: string) {
+        return this.data[key] || null;
+    }
+
+    async save() {
+        let method = "POST";
+        let url = `http://dev.fmode.cn:1337/parse/classes/${this.className}`;
+
+        // 更新
+        if (this.id) {
+            url += `/${this.id}`;
+            method = "PUT";
+        }
+
+        const body = JSON.stringify(this.data);
+        const response = await fetch(url, {
+            headers: {
+                "content-type": "application/json;charset=UTF-8",
+                "x-parse-application-id": "dev"
+            },
+            body: body,
+            method: method,
+            mode: "cors",
+            credentials: "omit"
+        });
+
+        const result = await response?.json();
+        if (result?.error) {
+            console.error(result?.error);
+        }
+        if (result?.objectId) {
+            this.id = result?.objectId;
+        }
+        return this;
+    }
+
+    async destroy() {
+        if (!this.id) return;
+        const response = await fetch(`http://dev.fmode.cn:1337/parse/classes/${this.className}/${this.id}`, {
+            headers: {
+                "x-parse-application-id": "dev"
+            },
+            body: null,
+            method: "DELETE",
+            mode: "cors",
+            credentials: "omit"
+        });
+
+        const result = await response?.json();
+        if (result) {
+            this.id = null;
+        }
+        return true;
+    }
+}
+
+// CloudQuery.ts
+export class CloudQuery {
+    className: string;
+    queryParams: Record<string, any> = {};
+
+    constructor(className: string) {
+        this.className = className;
+    }
+
+    include(...fileds:string[]) {
+        this.queryParams["include"] = fileds;
+    }
+    greaterThan(key: string, value: any) {
+        if (!this.queryParams["where"][key]) this.queryParams["where"][key] = {};
+        this.queryParams["where"][key]["$gt"] = value;
+    }
+
+    greaterThanAndEqualTo(key: string, value: any) {
+        if (!this.queryParams["where"][key]) this.queryParams["where"][key] = {};
+        this.queryParams["where"][key]["$gte"] = value;
+    }
+
+    lessThan(key: string, value: any) {
+        if (!this.queryParams["where"][key]) this.queryParams["where"][key] = {};
+        this.queryParams["where"][key]["$lt"] = value;
+    }
+
+    lessThanAndEqualTo(key: string, value: any) {
+        if (!this.queryParams["where"][key]) this.queryParams["where"][key] = {};
+        this.queryParams["where"][key]["$lte"] = value;
+    }
+
+    equalTo(key: string, value: any) {
+        if (!this.queryParams["where"]) this.queryParams["where"] = {};
+        this.queryParams["where"][key] = value;
+    }
+
+    async get(id: string) {
+        const url = `http://dev.fmode.cn:1337/parse/classes/${this.className}/${id}?`;
+
+        const response = await fetch(url, {
+            headers: {
+                "if-none-match": "W/\"1f0-ghxH2EwTk6Blz0g89ivf2adBDKY\"",
+                "x-parse-application-id": "dev"
+            },
+            body: null,
+            method: "GET",
+            mode: "cors",
+            credentials: "omit"
+        });
+
+        const json = await response?.json();
+        return json || {};
+    }
+
+    async find() {
+        let url = `http://dev.fmode.cn:1337/parse/classes/${this.className}?`;
+
+        let queryStr = ``
+        Object.keys(this.queryParams).forEach(key=>{
+            let paramStr = JSON.stringify(this.queryParams[key]);
+            if(key=="include"){
+                paramStr = this.queryParams[key]?.join(",")
+            }
+            if(queryStr) {
+                url += `${key}=${paramStr}`;
+            }else{
+                url += `&${key}=${paramStr}`;
+            }
+        })
+        // if (Object.keys(this.queryParams["where"]).length) {
+            
+        // }
+
+        const response = await fetch(url, {
+            headers: {
+                "if-none-match": "W/\"1f0-ghxH2EwTk6Blz0g89ivf2adBDKY\"",
+                "x-parse-application-id": "dev"
+            },
+            body: null,
+            method: "GET",
+            mode: "cors",
+            credentials: "omit"
+        });
+
+        const json = await response?.json();
+        let list = json?.results || []
+        let objList = list.map((item:any)=>this.dataToObj(item))
+        return objList || [];
+    }
+
+
+    async first() {
+        let url = `http://dev.fmode.cn:1337/parse/classes/${this.className}?`;
+
+        if (Object.keys(this.queryParams["where"]).length) {
+            const whereStr = JSON.stringify(this.queryParams["where"]);
+            url += `where=${whereStr}`;
+        }
+
+        const response = await fetch(url, {
+            headers: {
+                "if-none-match": "W/\"1f0-ghxH2EwTk6Blz0g89ivf2adBDKY\"",
+                "x-parse-application-id": "dev"
+            },
+            body: null,
+            method: "GET",
+            mode: "cors",
+            credentials: "omit"
+        });
+
+        const json = await response?.json();
+        const exists = json?.results?.[0] || null;
+        if (exists) {
+            let existsObject = this.dataToObj(exists)
+            return existsObject;
+        }
+        return null
+    }
+
+    dataToObj(exists:any):CloudObject{
+        let existsObject = new CloudObject(this.className);
+        existsObject.set(exists);
+        existsObject.id = exists.objectId;
+        existsObject.createdAt = exists.createdAt;
+        existsObject.updatedAt = exists.updatedAt;
+        return existsObject;
+    }
+}
+
+// CloudUser.ts
+export class CloudUser extends CloudObject {
+    constructor() {
+        super("_User"); // 假设用户类在Parse中是"_User"
+        // 读取用户缓存信息
+        let userCacheStr = localStorage.getItem("NCloud/dev/User")
+        if(userCacheStr){
+            let userData = JSON.parse(userCacheStr)
+            // 设置用户信息
+            this.id = userData?.objectId;
+            this.sessionToken = userData?.sessionToken;
+            this.data = userData; // 保存用户数据
+        }
+    }
+
+    sessionToken:string|null = ""
+    /** 获取当前用户信息 */
+    async current() {
+        if (!this.sessionToken) {
+            console.error("用户未登录");
+            return null;
+        }
+        return this;
+        // const response = await fetch(`http://dev.fmode.cn:1337/parse/users/me`, {
+        //     headers: {
+        //         "x-parse-application-id": "dev",
+        //         "x-parse-session-token": this.sessionToken // 使用sessionToken进行身份验证
+        //     },
+        //     method: "GET"
+        // });
+
+        // const result = await response?.json();
+        // if (result?.error) {
+        //     console.error(result?.error);
+        //     return null;
+        // }
+        // return result;
+    }
+
+    /** 登录 */
+    async login(username: string, password: string):Promise<CloudUser|null> {
+        const response = await fetch(`http://dev.fmode.cn:1337/parse/login`, {
+            headers: {
+                "x-parse-application-id": "dev",
+                "Content-Type": "application/json"
+            },
+            body: JSON.stringify({ username, password }),
+            method: "POST"
+        });
+
+        const result = await response?.json();
+        if (result?.error) {
+            console.error(result?.error);
+            return null;
+        }
+        
+        // 设置用户信息
+        this.id = result?.objectId;
+        this.sessionToken = result?.sessionToken;
+        this.data = result; // 保存用户数据
+        // 缓存用户信息
+        console.log(result)
+        localStorage.setItem("NCloud/dev/User",JSON.stringify(result))
+        return this;
+    }
+
+    /** 登出 */
+    async logout() {
+        if (!this.sessionToken) {
+            console.error("用户未登录");
+            return;
+        }
+
+        const response = await fetch(`http://dev.fmode.cn:1337/parse/logout`, {
+            headers: {
+                "x-parse-application-id": "dev",
+                "x-parse-session-token": this.sessionToken
+            },
+            method: "POST"
+        });
+
+        const result = await response?.json();
+        if (result?.error) {
+            console.error(result?.error);
+            return false;
+        }
+
+        // 清除用户信息
+        localStorage.removeItem("NCloud/dev/User")
+        this.id = null;
+        this.sessionToken = null;
+        this.data = {};
+        return true;
+    }
+
+    /** 注册 */
+    async signUp(username: string, password: string, additionalData: Record<string, any> = {}) {
+        const userData = {
+            username,
+            password,
+            ...additionalData // 合并额外的用户数据
+        };
+
+        const response = await fetch(`http://dev.fmode.cn:1337/parse/users`, {
+            headers: {
+                "x-parse-application-id": "dev",
+                "Content-Type": "application/json"
+            },
+            body: JSON.stringify(userData),
+            method: "POST"
+        });
+
+        const result = await response?.json();
+        if (result?.error) {
+            console.error(result?.error);
+            return null;
+        }
+
+        // 设置用户信息
+        // 缓存用户信息
+        console.log(result)
+        localStorage.setItem("NCloud/dev/User",JSON.stringify(result))
+        this.id = result?.objectId;
+        this.sessionToken = result?.sessionToken;
+        this.data = result; // 保存用户数据
+        return this;
+    }
+
+    override async save() {
+        let method = "POST";
+        let url = `http://dev.fmode.cn:1337/parse/users`;
+    
+        // 更新用户信息
+        if (this.id) {
+            url += `/${this.id}`;
+            method = "PUT";
+        }
+    
+        let data:any = JSON.parse(JSON.stringify(this.data))
+        delete data.createdAt
+        delete data.updatedAt
+        delete data.ACL
+        delete data.objectId
+        const body = JSON.stringify(data);
+        let headersOptions:any = {
+            "content-type": "application/json;charset=UTF-8",
+            "x-parse-application-id": "dev",
+            "x-parse-session-token": this.sessionToken, // 添加sessionToken以进行身份验证
+        }
+        const response = await fetch(url, {
+            headers: headersOptions,
+            body: body,
+            method: method,
+            mode: "cors",
+            credentials: "omit"
+        });
+    
+        const result = await response?.json();
+        if (result?.error) {
+            console.error(result?.error);
+        }
+        if (result?.objectId) {
+            this.id = result?.objectId;
+        }
+        localStorage.setItem("NCloud/dev/User",JSON.stringify(this.data))
+        return this;
+    }
+}
+
+export class CloudApi{
+    async fetch(path:string,body:any,options?:{
+        method:string
+        body:any
+    }){
+
+        let reqOpts:any =  {
+            headers: {
+                "x-parse-application-id": "dev",
+                "Content-Type": "application/json"
+            },
+            method: options?.method || "POST",
+            mode: "cors",
+            credentials: "omit"
+        }
+        if(body||options?.body){
+            reqOpts.body = JSON.stringify(body || options?.body);
+            reqOpts.json = true;
+        }
+        let host = `http://dev.fmode.cn:1337`
+        host = `http://127.0.0.1:1337`
+        let url = `${host}/api/`+path
+        console.log(url,reqOpts)
+        const response = await fetch(url,reqOpts);
+        let json = await response.json();
+        return json
+    }
+}

+ 16 - 0
src/modules/story/page-hangzhou/page-hangzhou.component.html

@@ -18,6 +18,22 @@
           <ion-button (click)="preview(file)">预览</ion-button>
           <ion-button (click)="loader(file)">加载</ion-button>
         </ion-item>
+        @if(storyMap[file?.url]){
+          <ion-item>
+            加载:正文{{storyMap[file?.url]?.content?.length}}字符 
+            <ion-button (click)="splitter(storyMap[file?.url])">分割</ion-button>
+          </ion-item>
+        }
+        @if(storyMap[file?.url]?.docList?.length){
+          <div style="display: flex;flex-direction: column;">
+            <ion-button (click)="embedings(storyMap[file?.url])">嵌入</ion-button>
+            分割:文本块{{storyMap[file?.url]?.docList?.length}}个
+            @for(doc of storyMap[file?.url]?.docList;track doc){
+              <p>行{{doc.metadata?.loc?.lines?.from}}至行{{doc.metadata?.loc?.lines?.to}}</p>
+              <p>{{doc.pageContent}}</p>
+            }
+          </div>
+        }
       }
     </ion-list>
   

+ 16 - 6
src/modules/story/page-hangzhou/page-hangzhou.component.ts

@@ -2,7 +2,7 @@ import { Component, OnInit } from '@angular/core';
 import { CommonModule } from '@angular/common';
     
 import { IonContent,IonButton,IonSegment,IonSegmentButton,IonLabel,IonList,IonItem } from "@ionic/angular/standalone";
-import { StoryLoader } from '../story-loader/story.loader';
+import { AgentStory } from '../story-loader/story.loader';
 
 @Component({
   selector: 'app-page-hangzhou',
@@ -31,19 +31,29 @@ export class PageHangzhouComponent  implements OnInit {
   fileList:Array<any> = [
     {
       title:`市委办公厅 市政府办公厅印发《关于服务保障“抓防控促发展”落实“人才生态37条”的补充意见》的通知`,
-      url:`https://app.fmode.cn/dev/jxnu/case/2020%E5%B9%B4%E6%9D%AD%E5%B7%9E%E5%B8%82%E4%BA%BA%E6%89%8D37%E6%9D%A1.pdf`
+      url:`https://app.fmode.cn/dev/jxnu/case/2020%E5%B9%B4%E6%9D%AD%E5%B7%9E%E5%B8%82%E4%BA%BA%E6%89%8D37%E6%9D%A1.docx`
     },
     {
       title:"杭州市余杭区服务保障高层次人才创新创业政策实施细则",
-      url:"https://app.fmode.cn/dev/jxnu/case/2022%E5%B9%B4%E6%9D%AD%E5%B7%9E%E4%BD%99%E6%9D%AD%E4%BA%BA%E6%89%8D%E5%8F%8C%E5%88%9B%E4%BF%9D%E9%9A%9C%E7%BB%86%E5%88%99.pdf"
+      url:"https://app.fmode.cn/dev/jxnu/case/2022年杭州余杭.docx"
     }
   ]
+  storyMap:any = {}
   preview(file:any){
     window.open(file.url,"_blank")
   }
-  loader(file:any){
-    let storyLoader = new StoryLoader();
-    let pdfData = storyLoader.loader(file.url);
+  async loader(file:any){
+    let story = new AgentStory();
+    await story.loader(file.url);
+    console.log(story.content);
+    this.storyMap[file?.url] = story;
+  }
+  async splitter(story:AgentStory){
+
+    await story.splitter();
+  }
+  async embedings(story:AgentStory){
+    await story.embedings()
   }
 
 }

+ 121 - 8
src/modules/story/story-loader/story.loader.ts

@@ -1,12 +1,91 @@
-import pdf from 'pdf-parse';
-import fs from 'fs';
- 
- 
-export class StoryLoader{
+// import pdf from 'pdf-parse';
+// import fs from 'fs';
+import { CloudApi } from 'src/lib/ncloud';
+import mammoth from "mammoth";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { Document } from '@langchain/core/documents';
 
+import * as tf from '@tensorflow/tfjs-core';
+// import "@tensorflow/tfjs-backend-cpu";
+// import '@tensorflow/tfjs-backend-webgpu';
+import '@tensorflow/tfjs-backend-webgl';
+// import '@tensorflow/tfjs-backend-wasm';
+import { TensorFlowEmbeddings } from "@langchain/community/embeddings/tensorflow";
+
+export class AgentStory{
+
+    url:string = ""
+    content:string = ""
+    docList:Array<Document> = []
+    constructor(){
+        setBackend()
+    }
     async loader(url:string){
-       
-        return data
+        let api = new CloudApi();
+
+        let result;
+        if(url?.endsWith(".docx")){
+            result = await this.loadDocx(url)
+        }
+        if(!result){
+            result = await api.fetch("agent/loader",{url:url})
+        }
+        this.content = result?.data || null
+        if(this.content){
+            this.url = url
+        }
+        return this.content
+    }
+
+    async loadDocx(url:string){
+        let data:any
+        const response = await fetch(url);
+
+        const arrayBuffer:any = await response.arrayBuffer();
+        
+        let text;
+        try {
+            text = await mammoth.extractRawText({arrayBuffer:arrayBuffer}); // 浏览器 直接传递 arrayBuffer
+        } catch (err) {
+            console.error(err);
+        }
+        // let html = mammoth.convertToHtml(buffer)
+        data = text?.value || "";
+        data = data.replaceAll(/\n+/g,"\n") // 剔除多余换行
+        return {data}
+    }
+    async splitter(options?:{
+        chunkSize:number,
+        chunkOverlap:number
+    }){
+        if(!this.content) return
+        // 默认:递归字符文本分割器
+        let splitter = new RecursiveCharacterTextSplitter({
+            chunkSize: options?.chunkSize || 500,
+            chunkOverlap: options?.chunkOverlap || 100,
+        });
+          
+        let docOutput = await splitter.splitDocuments([
+            new Document({ pageContent: this.content }),
+        ]);
+        console.log(docOutput)
+        this.docList = docOutput
+        return this.docList
+    }
+
+    /**
+     * 文本向量提取
+     * @see
+     * https://js.langchain.com/docs/integrations/text_embedding/tensorflow/
+     * @returns 
+     */
+    //  embedding vector(1536) NOT NULL -- NOTE: 1536 for ChatGPT
+    async embedings(){
+        if(!this.docList?.length){return}
+        const embeddings = new TensorFlowEmbeddings();
+        let documentRes = await embeddings.embedDocuments(this.docList?.map(item=>item.pageContent));
+        console.log(documentRes);
+        return documentRes;
     }
 }
 
@@ -19,4 +98,38 @@ export async function fetchFileBuffer(url: string): Promise<Buffer> {
 
     const arrayBuffer = await response.arrayBuffer();
     return Buffer.from(arrayBuffer);
-}
+}
+
+async function setBackend(){
+
+        let backend
+        let WebGPU = (navigator as any).gpu
+        if (WebGPU) {
+          // WebGPU is supported
+          // console.log(WebGPU)
+          backend = "webgpu"
+        } else {
+          // WebGPU is not supported
+        }
+        let glcanvas = document.createElement('canvas');
+        let WebGL = glcanvas.getContext('webgl') || glcanvas.getContext('experimental-webgl');
+        if (WebGL) {
+          // console.log(WebGL)
+          // WebGL is supported
+          if(!backend) backend = "webgl"
+        } else {
+          // WebGL is not supported
+        }
+
+        if (typeof WebAssembly === 'object' && typeof WebAssembly.instantiate === 'function') {
+          // WebAssembly is supported
+          // console.log(WebAssembly)
+          if(!backend) backend = "wasm"
+        } else {
+          // WebAssembly is not supported
+        }
+
+        backend&&await tf.setBackend(backend);
+        await tf.ready();
+        return
+  }

Some files were not shown because too many files changed in this diff