nest爬虫

393 阅读1分钟

需要提前安装两个包

npm i axios
npm i cheerio     //用于在node环境里面操作dom
import { Injectable } from '@nestjs/common';
import { CreateReptileDto } from './dto/create-reptile.dto';
import { UpdateReptileDto } from './dto/update-reptile.dto';
// import  axios from 'axios';
import * as cheerio from 'cheerio';
import * as fs from 'fs'
import * as path from 'path'
const axios = require('axios');


@Injectable()
export class ReptileService {
  async findAll() {
    let urls: string[] = [];
    const baseUrl = 'https://www.jpmn5.com';
    const nextPage = '下一页';
    let index = 0;
    const getCosPlay = async () => {
      console.log(index);
      const body = await axios.get(`https://www.jpmn5.com/Cosplay/Cosplay10444${index ? '_' + index : ''}.html`,).then(async (res) => res.data);

      const $ = cheerio.load(body);
      // console.log($('.article-content p img').length);

      // 读取分页
      const page = $('.pagination').eq(0).find('a');
      const pageArr = page.map(function () {
        return $(this).text();
      }).toArray();
      // console.log(pageArr);
      if (pageArr.includes(nextPage)) {
        $('.article-content p img').each(function () {
          urls.push(baseUrl + $(this).attr('src'));
        });
        index++
        await getCosPlay();
      }
    };
    await getCosPlay();
    console.log(urls);
    this.writeFiles(urls)
    return `cos`;
  }


//写入本地
  writeFiles(urls: string[]) {
    urls.forEach(async (url) => {
      const buffer = await axios.get(url, { responseType: 'arraybuffer' }).then(res => res.data)
      const ws = fs.createWriteStream(path.join(__dirname, '../images' + new Date().getTime() + '.jpg'))
      ws.write(buffer)
    })
  }
}