爬取图片
废话不说,直接上代码
const cheerio = require('cheerio');
const fs = require('fs');
var iconv = require('iconv-lite');
const request = require('request');
const Utils = require('./utils')
const baseUrl = 'https://pic.netbian.com'
Utils.deleteDir('./pic/')
function start(index) {
let url = ''
if (index > 58) return
if (index == 1) {
url = baseUrl + '/4kmeinv/index.html'
} else {
url = baseUrl + `/4kmeinv/index_${index}.html`
}
console.log(`------------第${index}页--------------`);
requestSrc(url)
const timer = setTimeout(() => {
index++
start(index)
}, 20000)
}
const requestSrc = async (url) => {
request({
url,
method: 'GET',
timeout: 5000
}, function (err, res, body) {
if (err) {
return console.error(err)
}
if (res.statusCode == 200) {
findHref(body)
}
})
}
function findHref(body) {
const $ = cheerio.load(body)
$('.slist .clearfix a').each((index, dom) => {
let href = $(dom).attr('href')
requestHref(href)
})
}
function requestHref(href) {
const url = baseUrl + href
request({
url,
encoding: null
}, function (err, res, body) {
if (err) {
return console.error(err)
}
if (res.statusCode == 200) {
let buf = iconv.decode(body, 'gb2312').toString();
findImg(buf)
}
})
}
function findImg(body) {
const $ = cheerio.load(body)
const dom = $('.photo-pic img')
const src = dom.attr('src')
const srcArr = src.split('.')
const extension = srcArr[srcArr.length - 1]
const title = dom.attr('title') + '.' + extension
saveImgFile(src, title)
}
function saveImgFile(src, title) {
const writeStrean = fs.createWriteStream('./pic/' + title)
request(baseUrl + src).pipe(writeStrean)
writeStrean.on('finish', function() {
console.log('文件写入成功:', title)
})
}
start(1)
const fs = require('fs')
function readDir(path) {
return fs.readdirSync(path, function (err, files) {
if (err) {
console.error(err)
return false
}
return files
})
}
function deleteDir(path) {
if (fs.existsSync(path)) {
const files = readDir(path)
if (files) {
files.forEach(file => {
fs.unlinkSync(path + file)
})
}
fs.rmdirSync(path)
}
fs.mkdirSync(path)
}
module.exports = {
readDir,
deleteDir
}
结果展示
