123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- /**
- npm install -S langchain
- npm install mammoth -S
- npm i compressing -S
- npm install pdf-parse -S
- */
- var mammoth = require("mammoth");
- const fs = require('fs');
- const pdf = require('pdf-parse');
- async function main(){
-
- pdfLoader("../data/pgvector.pdf")
- docsLoader("../data/pgvector.docx")
-
- }
- main()
- async function docsLoader(path){
- mammoth.convertToHtml({path: path})
- .then(function(result){
- var html = result.value; // The generated HTML
- var messages = result.messages; // Any messages, such as warnings during conversion
- console.log(html)
- console.log(messages)
- })
- .catch(function(error) {
- console.error(error);
- });
- }
- async function pdfLoader(path){
-
- let dataBuffer = fs.readFileSync(path);
-
- pdf(dataBuffer).then(function(data) {
-
- // number of pages
- console.log(data.numpages);
- // number of rendered pages
- console.log(data.numrender);
- // PDF info
- console.log(data.info);
- // PDF metadata
- console.log(data.metadata);
- // PDF.js version
- // check https://mozilla.github.io/pdf.js/getting_started/
- console.log(data.version);
- // PDF text
- console.log(data.text);
-
- });
- }
|