1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- //需要获取多个高校的录取分数线
- //参考网站:http://www.dxsbb.com/news/list_168.html
- //cheerio
- const cheerio = require("cheerio");
- const { title } = require("process");
- async function main() {
- //获取学校列表页面html文本
- let schoolHost = "http://www.dxsbb.com"
- let schoolListUrl="http://www.dxsbb.com/news/list_168.html";
- let response1 = await fetch(schoolListUrl);
- let html1=await response1?.text();
- //html文本解析为可控制的DOM选择器
- const $ = cheerio.load(html1);
- // console.log($.html());
- // console.log("----------------------------------------------")
- // console.log($("body>div.listBody>div.listareaL>div>div:nth-child(1)>div>ul>li").html());
- let schoolList=[];
- //遍历每个li元素
- $('li').each((index,element)=>{
- const aHref=$(element).find('a').attr('href');//获取a标签的href
- const h3Text=$(element).find('.b h3').text();//获取h3标签的文本
- //将结果存入数组
- if(!h3Text) return
- if(h3Text.indexOf("多少")>-1) return
- schoolList.push({href:aHref,title:h3Text});
- });
- console.log(schoolList);
- //方法一
- for(let index=0;index<schoolList.length;index++){
- let school = schoolList[index];
- let response2 = await fetch(`${schoolHost}${school.href}`)
- let html2 =await response2.text();
- console.log(`解析${school.title},文本量${html2.length}`)
- }
- console.log("-------------------------------------------------------------------------------------")
- //方法二
- schoolList.forEach(async school=>{
- let response2 = await fetch(`${schoolHost}${school.href}`)
- let html2 =await response2.text();
- console.log(`解析${school.title},文本量${html2.length}`)
- })
- }
- main();
|