1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- const express = require('express');
- const fs = require('fs');
- const path = require('path');
- const router = express.Router();
- const pdf = require('pdf-parse');
- const mammoth = require("mammoth");
- // POST /loader 路由
- router.post('/loader', async (req, res) => {
- let url = req.body?.url; // 从请求体中获取 URL
- console.log(req.body)
- console.log(req.query)
- if (!url) {
- return res.status(400).json({ error: 'No URL provided' });
- }
- try {
- // 使用 fetch 获取 PDF 文件
- const response = await fetch(url);
- if (!response.ok) {
- throw new Error(`Failed to fetch: ${response.statusText}`);
- }
- const arrayBuffer = await response.arrayBuffer();
- const buffer = Buffer.from(arrayBuffer); // 将 ArrayBuffer 转换为 Buffer
- // console.log(buffer)
- let type,data
- if(url?.endsWith(".pdf")){
- type = "pdf"
- data = await pdf(buffer);
- }
- if(url?.endsWith(".docx")){
- type = "docx"
- let text = await mammoth.extractRawText(buffer)
- // let html = mammoth.convertToHtml(buffer)
- data = text?.value || "";
- }
- // 返回解析后的数据
- res.json({
- code:200,
- type:type,
- data:data,
- });
- } catch (error) {
- // 处理错误
- res.status(500).json({ error: 'Failed to parse File', details: error.message });
- }
- });
- module.exports = router;
|