test-fetch.js 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. //需要获取多个高校的录取分数线
  2. //参考网站:http://www.dxsbb.com/news/list_168.html
  3. //cheerio
  4. const cheerio = require("cheerio");
  5. const { title } = require("process");
  6. async function main() {
  7. //获取学校列表页面html文本
  8. let schoolHost = "http://www.dxsbb.com"
  9. let schoolListUrl="http://www.dxsbb.com/news/list_168.html";
  10. let response1 = await fetch(schoolListUrl);
  11. let html1=await response1?.text();
  12. //html文本解析为可控制的DOM选择器
  13. const $ = cheerio.load(html1);
  14. // console.log($.html());
  15. // console.log("----------------------------------------------")
  16. // console.log($("body>div.listBody>div.listareaL>div>div:nth-child(1)>div>ul>li").html());
  17. let schoolList=[];
  18. //遍历每个li元素
  19. $('li').each((index,element)=>{
  20. const aHref=$(element).find('a').attr('href');//获取a标签的href
  21. const h3Text=$(element).find('.b h3').text();//获取h3标签的文本
  22. //将结果存入数组
  23. if(!h3Text) return
  24. if(h3Text.indexOf("多少")>-1) return
  25. schoolList.push({href:aHref,title:h3Text});
  26. });
  27. console.log(schoolList);
  28. //方法一
  29. for(let index=0;index<schoolList.length;index++){
  30. let school = schoolList[index];
  31. let response2 = await fetch(`${schoolHost}${school.href}`)
  32. let html2 =await response2.text();
  33. console.log(`解析${school.title},文本量${html2.length}`)
  34. }
  35. console.log("-------------------------------------------------------------------------------------")
  36. //方法二
  37. schoolList.forEach(async school=>{
  38. let response2 = await fetch(`${schoolHost}${school.href}`)
  39. let html2 =await response2.text();
  40. console.log(`解析${school.title},文本量${html2.length}`)
  41. })
  42. }
  43. main();