Node.js学习(七)爬虫系统

742 阅读1分钟

主要的库

  • express node版的jquery
  • request 发请求,返回的是 html 结构(虽然 npm 官网已经弃用了)
  • cheerio 选取 request 返回的程序后,可以变成类似 jquery 的操作

代码

const express = require('express');
const app = express();
const request = require("request");
const cheerio = require('cheerio');

app.get("/", function (req, res) {
    request('https://news.baidu.com/', (err, response, body) => {
        if (err) {
            console.log(`热点新闻失败 ${err}`)
        } else {
            const $ = cheerio.load(body);
            const obj = {};
            $('.ulist .bold-item a').each((index, item) => {
                const newsTitle = item.children[0].data;
                const newsLink = item.attribs.href;
                obj[newsTitle] = newsLink;
                console.log(newsTitle, newsLink);
            });
            res.json(obj);
        }
    });
});
app.listen(3000, function () {
    console.log("server is running...");
});
module.exports = app;