node爬虫请求同步延迟示例

102 阅读1分钟

python好久不用 有点生疏了 突然有个想法 用axios爬虫,然后保存为.json 或者 excel文件


const fs = require('fs');
const path = require('path');
const _ = require("lodash")
const axios = require('axios');

// 创建一个axios实例并设置全局请求头
const server = axios.create({
  baseURL: "https://xxx.com/",
  headers: {
    'Content-Type': 'application/json',
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309080f)XWEB/8461",
    'Cookie': "xxxx",
  }
});


/**
 *  finalList 是最终需要用到的数据  {url, filePath}  请求的url 以及 对应需要生成的path文件路径
 */
const fetchWithDelay = async () => {
  try {

    for (const { url, filePath } of finalList) {

      let delayTime = _.random(1, 3, true)
      await delay(delayTime * 1000);
      console.log(`delay:${delayTime}s ,接口 ${url}   filePath:${filePath}`);

      const response = await server.get(url);
      saveJson(filePath, response.data)

    }
  } catch (error) {
    console.error('请求出错:', error);
  }
};

let saveJson = (pathStr, data) => {

  let parts = pathStr.split('/');
  let parentPath = parts.slice(0, 2).join('/');

  //创建父级文件夹
  if (!fs.existsSync(parentPath)) {
    fs.mkdirSync(parentPath, { recursive: true });
  }
  // 构建文件路径
  const filePath = path.join(parentPath, parts.slice(2, parts.length).join('/')
    .replaceAll("\"", " ")
    .replaceAll("/", " ")
    .replaceAll(";", " ")

  );
  //保存文件
  fs.writeFileSync(filePath, JSON.stringify(data), 'utf-8');
}

//同步延迟ms
function delay(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}


上述代码剔除爬虫相关,有核心同步延迟以及保存文件示例。