const cheerio = require('cheerio'); const { GushiHrefList } = require("./href") // 引用Parse JS SDK const Parse = require("parse/node"); Parse.initialize("dev"); // 设置applicationId Parse.serverURL = "http://web2023.fmode.cn:9999/parse"; // 设置serverURL // 实现分页抓取数据,并保存 main() async function main(){ // let list = [GushiHrefList[0]] let list = GushiHrefList list.forEach(async href=>{ let gushi = await getGushiDataFromHref(href); if(!gushi?.title || !gushi?.author) return; // 查重 let exists = await checkExists(gushi); if(exists?.id) return // 新增 let Shige = Parse.Object.extend("Shige"); let sg = new Shige(); sg.set(gushi); sg.save(); }) } async function checkExists(gushi){ let query = new Parse.Query("Shige"); query.equalTo("title",gushi?.title); query.equalTo("author",gushi?.author); query.equalTo("dynasty",gushi?.dynasty); await query.first(); return await query.first(); } async function getGushiDataFromHref(href){ let gushi = {} let response try{ response = await fetch(href); }catch(err){ console.log("失败:",href) return {} } let html = await response.text() const $ = cheerio.load(html); gushi.title = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > h1').text(); gushi.author = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.author-simple-info > span:nth-child(3) > a').text(); gushi.dynasty = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.author-simple-info > span:nth-child(1) > a').text(); gushi.content = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.shici-content.check-more').html(); gushi.intro = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(2)').html(); gushi.yiwen = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(4)').html(); gushi.note = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(6)').html() gushi.review = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(8)').html() console.log(href,gushi) return gushi }