test.loader.js 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. /**
  2. npm install -S langchain
  3. npm install mammoth -S
  4. npm i compressing -S
  5. npm install pdf-parse -S
  6. */
  7. var mammoth = require("mammoth");
  8. const fs = require('fs');
  9. const pdf = require('pdf-parse');
  10. async function main(){
  11. pdfLoader("../data/pgvector.pdf")
  12. docsLoader("../data/pgvector.docx")
  13. }
  14. main()
  15. async function docsLoader(path){
  16. mammoth.convertToHtml({path: path})
  17. .then(function(result){
  18. var html = result.value; // The generated HTML
  19. var messages = result.messages; // Any messages, such as warnings during conversion
  20. console.log(html)
  21. console.log(messages)
  22. })
  23. .catch(function(error) {
  24. console.error(error);
  25. });
  26. }
  27. async function pdfLoader(path){
  28. let dataBuffer = fs.readFileSync(path);
  29. pdf(dataBuffer).then(function(data) {
  30. // number of pages
  31. console.log(data.numpages);
  32. // number of rendered pages
  33. console.log(data.numrender);
  34. // PDF info
  35. console.log(data.info);
  36. // PDF metadata
  37. console.log(data.metadata);
  38. // PDF.js version
  39. // check https://mozilla.github.io/pdf.js/getting_started/
  40. console.log(data.version);
  41. // PDF text
  42. console.log(data.text);
  43. });
  44. }