python好久不用 有点生疏了 突然有个想法 用axios爬虫,然后保存为.json 或者 excel文件
const fs = require('fs');
const path = require('path');
const _ = require("lodash")
const axios = require('axios');
// 创建一个axios实例并设置全局请求头
const server = axios.create({
baseURL: "https://xxx.com/",
headers: {
'Content-Type': 'application/json',
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309080f)XWEB/8461",
'Cookie': "xxxx",
}
});
/**
* finalList 是最终需要用到的数据 {url, filePath} 请求的url 以及 对应需要生成的path文件路径
*/
const fetchWithDelay = async () => {
try {
for (const { url, filePath } of finalList) {
let delayTime = _.random(1, 3, true)
await delay(delayTime * 1000);
console.log(`delay:${delayTime}s ,接口 ${url} filePath:${filePath}`);
const response = await server.get(url);
saveJson(filePath, response.data)
}
} catch (error) {
console.error('请求出错:', error);
}
};
let saveJson = (pathStr, data) => {
let parts = pathStr.split('/');
let parentPath = parts.slice(0, 2).join('/');
//创建父级文件夹
if (!fs.existsSync(parentPath)) {
fs.mkdirSync(parentPath, { recursive: true });
}
// 构建文件路径
const filePath = path.join(parentPath, parts.slice(2, parts.length).join('/')
.replaceAll("\"", " ")
.replaceAll("/", " ")
.replaceAll(";", " ")
);
//保存文件
fs.writeFileSync(filePath, JSON.stringify(data), 'utf-8');
}
//同步延迟ms
function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
上述代码剔除爬虫相关,有核心同步延迟以及保存文件示例。