const express = require('express'); const fs = require('fs'); const path = require('path'); const router = express.Router(); const pdf = require('pdf-parse'); const mammoth = require("mammoth"); // POST /loader 路由 router.post('/loader', async (req, res) => { let url = req.body?.url; // 从请求体中获取 URL console.log(req.body) console.log(req.query) if (!url) { return res.status(400).json({ error: 'No URL provided' }); } try { // 使用 fetch 获取 PDF 文件 const response = await fetch(url); if (!response.ok) { throw new Error(`Failed to fetch: ${response.statusText}`); } const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); // 将 ArrayBuffer 转换为 Buffer // console.log(buffer) let type,data if(url?.endsWith(".pdf")){ type = "pdf" data = await pdf(buffer); } if(url?.endsWith(".docx")){ type = "docx" let text = await mammoth.extractRawText(buffer) // let html = mammoth.convertToHtml(buffer) data = text?.value || ""; } // 返回解析后的数据 res.json({ code:200, type:type, data:data, }); } catch (error) { // 处理错误 res.status(500).json({ error: 'Failed to parse File', details: error.message }); } }); module.exports = router;