const cluster = require('cluster');
const cpuNums = require('os').cpus().length;
const redisLock = require('./redis');
const spider = require('./action');
cluster.setupMaster({
exec: 'work.js',
args: ['--use', 'http']
});
function analyzeArr(arr) {
let obj = {};
arr.forEach((id, idx, arr) => {
obj['work' + id] = obj['work' + id] !== void 0 ? obj['work' + id] + 1 : 1;
});
return obj;
}
const pageLenth= 100;
let pageNum = 0;
const startTime = new Date().getTime();
let widArr = [];
redisLock.set('page', '0')
for (let i = 0; i < cpuNums; ++i) {
let work = cluster.fork();
work.send([i,cpuNums,pageLenth])
console.log(`[master] : fork worker ${i + 1}\n`)
}
cluster.on('message',(worker,msg)=>{
++pageNum;
widArr.push(worker.id);
if(pageNum === pageLenth){
console.log(`-----------------Time:${new Date().getTime() - startTime}-------------------\n`)
console.log(analyzeArr(widArr));
setTimeout(() => {
cluster.disconnect();
},1000)
}
});
const redis = require('redis')
const lockScript = 'return redis.call("set", KEYS[1], ARGV[1], "NX", "PX", ARGV[2])';
const unlockScript = 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end';
redisClient = redis.createClient();
redisClient.on("error", function (err) {
console.log("Error " + err);
});
async function loop(resourse, uniqueStr, ttl) {
return new Promise((resolve, reject) => {
setTimeout(async () => {
let result = await lock(resourse, uniqueStr, ttl)
resolve(!!result)
}, 0)
})
}
async function attemplock(resourse, uniqueStr, ttl) {
let result = await loop(resourse, uniqueStr, ttl)
if (result) {
return true
} else {
return attemplock(resourse, uniqueStr, ttl)
}
}
async function lock(resourse, uniqueStr, ttl) {
ttl = ttl ? ttl : "30000"
return new Promise((resolve, reject) => {
redisClient.eval(lockScript, 1, resourse, uniqueStr, ttl, (err, reply) => {
if (err) {
console.error(err)
reject(err)
}
resolve(reply)
})
})
}
async function unlock(resourse, uniqueStr) {
return new Promise((resolve, reject) => {
redisClient.eval(unlockScript, 1, resourse, uniqueStr, (err, reply) => {
if (err) {
console.error(err)
reject(err)
}
resolve(reply)
})
})
}
async function get(key) {
return new Promise((resolve, reject) => {
redisClient.get(key, (err, reply) => {
if (err) {
console.error(err);
reject(err)
}
resolve(reply)
})
})
}
async function set(key, value) {
return new Promise((resolve, reject) => {
redisClient.set(key, value, (err, reply) => {
if (err) {
console.error(err)
reject(err)
}
resolve(reply)
})
})
}
module.exports = {
redisClient,
attemplock,
set,
get,
unlock,
lock,
loop
}
const spider = require('./action');
const crypto = require('crypto');
const redisLock = require('./redis');
console.log(process.pid)
process.on('message', async (param) => {
let resourse = "lock:page"
let uniqueStr = crypto.randomBytes(10).toString('hex')
await redisLock.attemplock(resourse, uniqueStr)
let nowCityNum = Number(await redisLock.get('page'))
let flag = false;
while(nowCityNum < param[2]) {
if(flag){
uniqueStr = crypto.randomBytes(10).toString('hex')
await redisLock.attemplock(resourse, uniqueStr)
nowCityNum = Number(await redisLock.get('page'))
}else{
flag=true
}
if (nowCityNum < param[2]) {
await spider(nowCityNum);
process.send(nowCityNum);
nowCityNum++
await redisLock.set('page', String(nowCityNum))
await redisLock.unlock(resourse, uniqueStr)
}
}
await redisLock.unlock(resourse, uniqueStr)
process.exit(0)
})
const axios = require('axios')
const api = 'http://10.35.93.10:801/document/picture/list';
module.exports = (pageStart) => {
return new Promise((resolve, reject) => {
axios({
method: 'get',
params: {
pageStart
},
url: api,
contentType: "application/json; charset=UTF-8"
}).then(data => {
console.log(`-------------子进程 ${process.pid} 成功爬去第${pageStart + 1}页数据-------------\n`)
resolve(data.data.data.data);
}).catch(err => {
console.log(err)
reject(err)
})
});
};