数据抓取
基于 Nodejs 请求数据解析,返回api。
接口测试:test.52shujun.cn
抱歉,拉低【掘金】代码质量。
安装库
npm i cheerio express superagent -S
app.js
const express = require('express')
const app = express()
const { getTrending, getNationality, getLanguage, getDate } = require('./util')
// 国家
app.get('/nationality', async (req, res) => {
const data = await getNationality()
res.json(data)
})
// 编程语言
app.get('/language', async (req, res) => {
const data = await getLanguage()
res.json(data)
})
// 时间
app.get('/date', async (req, res) => {
const data = await getDate()
res.json(data)
})
// 趋势
app.get('/', async (req, res) => {
var { language = 'any', since = 'any', nationality = 'any' } = req.query
var data = await getTrending(language, since, nationality)
res.send(data)
})
app.listen(3000, () => {
console.log('服务已运行')
})
util.js
// 类似 jQuery
const cheerio = require('cheerio')
// 基于原生 node 的 ajax 请求库
const request = require('superagent')
// 目标地址
const trending = `https://github.com/trending`
// 获取趋势数据
function getTrending(language, since, nationality) {
return new Promise(async (resolve, reject) => {
// 参数拼接 -- start
var queryData = {}
var localTreding = trending
if (language !== 'any') {
localTreding = `${trending}/${language}`
}
if (since !== 'any') {
queryData['since'] = since
}
if (nationality !== 'any') {
queryData['spoken_language_code'] = nationality
}
// 参数拼接 -- end
// html 请求解析
const html = await request.get(localTreding).query(queryData)
const $ = cheerio.load(html.text)
var list = []
// 解析数据
$('.Box-row').each((index, item) => {
const title = $(item).find('h1 > a').text().split('/')[1].trim()
const author = $(item).find('h1 > a').text().split('/')[0].trim()
const discription = $(item).find('p').text().trim()
const language = $(item)
.find('div [itemprop="programmingLanguage"]')
.text()
const star = $(item)
.find('.d-inline-block.float-sm-right')
.text()
.match(/\d+/)[0]
const avatarUrl = $(item).find('img.avatar').attr('src')
const url = 'https://github.com' + $(item).find('h1 > a').attr('href')
list.push({
title,
author,
avatarUrl,
discription,
language,
star,
url,
})
})
list.length > 0 ? resolve(list) : reject('null')
})
}
// 国家
function getNationality() {
return new Promise(async (resolve, reject) => {
const html = await request.get(trending)
const $ = cheerio.load(html.text)
var list = []
// 解析数据
$('[data-filterable-for="text-filter-field-spoken-language"] a').each(
(index, item) => {
const key = $(item).find('> span').text().trim()
const value = $(item).attr('href').split('=')[1]
list.push({
key,
value,
})
}
)
list.length > 0 ? resolve(list) : reject('null')
})
}
// 编程语言
function getLanguage() {
return new Promise(async (resolve, reject) => {
const html = await request.get(trending)
const $ = cheerio.load(html.text)
var list = []
// 解析数据
$('#languages-menuitems > div > a').each((index, item) => {
const key = $(item).find('> span').text().trim()
const value = $(item)
.attr('href')
.match(/[^\/]+(\s\S)*(?=\?)/)[0]
list.push({
key,
value,
})
})
list.length > 0 ? resolve(list) : reject('null')
})
}
// 时间
function getDate() {
return new Promise(async (resolve, reject) => {
const html = await request.get(trending)
const $ = cheerio.load(html.text)
var list = []
// 解析数据
$('.select-menu-list > a.select-menu-item').each((index, item) => {
const key = $(item).find('> span').text().trim()
const value = $(item).attr('href').split('=')[1]
list.push({
key,
value,
})
})
list.length > 0 ? resolve(list) : reject('null')
})
}
module.exports = {
getDate,
getLanguage,
getNationality,
getTrending,
}