routes.js 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. const express = require('express');
  2. const fs = require('fs');
  3. const path = require('path');
  4. const router = express.Router();
  5. const pdf = require('pdf-parse');
  6. const mammoth = require("mammoth");
  7. // POST /loader 路由
  8. router.post('/loader', async (req, res) => {
  9. let url = req.body?.url; // 从请求体中获取 URL
  10. console.log(req.body)
  11. console.log(req.query)
  12. if (!url) {
  13. return res.status(400).json({ error: 'No URL provided' });
  14. }
  15. try {
  16. // 使用 fetch 获取 PDF 文件
  17. const response = await fetch(url);
  18. if (!response.ok) {
  19. throw new Error(`Failed to fetch: ${response.statusText}`);
  20. }
  21. const arrayBuffer = await response.arrayBuffer();
  22. const buffer = Buffer.from(arrayBuffer); // 将 ArrayBuffer 转换为 Buffer
  23. // console.log(buffer)
  24. let type,data
  25. if(url?.endsWith(".pdf")){
  26. type = "pdf"
  27. data = await pdf(buffer);
  28. }
  29. if(url?.endsWith(".docx")){
  30. type = "docx"
  31. let text = await mammoth.extractRawText(buffer)
  32. // let html = mammoth.convertToHtml(buffer)
  33. data = text?.value || "";
  34. }
  35. // 返回解析后的数据
  36. res.json({
  37. code:200,
  38. type:type,
  39. data:data,
  40. });
  41. } catch (error) {
  42. // 处理错误
  43. res.status(500).json({ error: 'Failed to parse File', details: error.message });
  44. }
  45. });
  46. module.exports = router;