目标
应用场景如下:
自动发布QQZone图文说说
调研
场景分析及技术考虑
粗略排除难点及技术障碍
最大障碍,验证码缺口识别
技术支持
难点总结
- puppeteer 处理 iframe
- 验证码如何正确判断缺口位置
await page.evaluate()正确返回promise
业务实现
把大象装进冰箱
初始设置 puppeteer 启动
let browser = await puppeteer.launch({
headless: true,
defaultViewport: {
width: 320,
height: 568,
isMobile: true
},
timeout: 100000,
args: ['–no-sandbox']
})
let page = await browser.newPage()
await page.setViewport({
width: 320,
height: 568
})
await page.setRequestInterception(true)
await page.emulate(iPhone)
console.log(`info: browser connect success...`)
打开登录页输入 QQ 账户及密码
delay: 100模拟真实用户
// let url = `https://h5.qzone.qq.com/mqzone/index` // 会redirect如下
let url = `https://ui.ptlogin2.qq.com/cgi-bin/login?pt_hide_ad=1&style=9&appid=549000929&pt_no_auth=1&pt_wxtest=1&daid=5&s_url=https://h5.qzone.qq.com/mqzone/index&t=${new Date().getTime()}`
await page.goto(url, {
timeout: 0,
waitUntil: 'networkidle0'
})
await page.waitFor('#web_login')
await page.type('#u', '567890', { delay: 100 })
await page.type('#p', '123456', { delay: 100 })
await page.click('#go')
// 显示不是你的空间,而是验证码....
计算验证码图片缺口位置
- 下载验证图片到本地
- 计算滑块缺口位置
- 移动滑块到指定位置
客户端思路原思路是在 evaluate 函数里处理图片得到宽高以及转化成 base64,进而用 rgba 找到图片缺口的位置,但是中途遇到 html 的 canvas 跨域问题,且本人未找到解决方案,遂放弃...
// 判断是否触发验证
let verification = await page.evaluate(() => {
let newVcode = document.querySelector('#new_vcode')
return window.getComputedStyle(newVcode).display === 'block'
})
console.log(`info:verification: ${verification}`)
// 如果有验证码拦截,做如下处理
if (verification) {
console.log('info: start drag...')
// 等待验证码加载出来=>此处可由程序判断是否加载处理验证码
await page.waitFor(10000)
// iframe 加载出来=>puppeteer与iframe的处理
const eh = await page.$('#tcaptcha_iframe')
const frame = await eh.contentFrame()
const dragMove = async () => {
// 获取显示图片的src和宽度
let { src, width } = await frame.evaluate(async () => {
return await new Promise((resolve, reject) => {
let img = document.querySelector('#slideBg')
let src = img.getAttribute('src')
let width = parseInt(window.getComputedStyle(img).width)
let height = parseInt(window.getComputedStyle(img).height)
let _img = new Image()
// 处理canvas跨域,但是貌似仍未解决问题
_img.setAttribute('crossOrigin', 'Anonymous')
_img.src = src
_img.onload = () => {
let canvas = document.createElement('canvas')
// let body = document.getElementsByTagName('body')[0]
// body.appendChild(canvas)
canvas.width = width
canvas.height = height
let ctx = canvas.getContext('2d')
ctx.drawImage(_img, 0, 0, width, height)
let src = canvas.toDataURL('image/png')
resolve({ src, width })
}
_img.onerror = err => {
console.log('浏览器控制台=>', err)
let src = img.getAttribute('src')
resolve({ src, width })
}
})
})
console.log(`info: src: ${src}`)
console.log(`info: width: ${width}`)
// 获取缺口左x坐标->canvas跨域???
// Canvas是不支持跨域的,改浏览器启动参数只是让自己电脑支持而已,解决不了真正的跨域问题
let distance = await page.evaluate(
async (base64, actualWidth) => {
/**
* combine rgba colors [r, g, b, a]
* @param rgba1 底色
* @param rgba2 遮罩色
* @returns {number[]}
*/
function combineRgba(rgba1, rgba2) {
const [r1, g1, b1, a1] = rgba1
const [r2, g2, b2, a2] = rgba2
const a = a1 + a2 - a1 * a2
const r = (r1 * a1 + r2 * a2 - r1 * a1 * a2) / a
const g = (g1 * a1 + g2 * a2 - g1 * a1 * a2) / a
const b = (b1 * a1 + b2 * a2 - b1 * a1 * a2) / a
return [r, g, b, a]
}
/**
* 判断两个颜色是否相似
* @param rgba1
* @param rgba2
* @param t
* @returns {boolean}
*/
function tolerance(rgba1, rgba2, t) {
const [r1, g1, b1] = rgba1
const [r2, g2, b2] = rgba2
return (
r1 > r2 - t &&
r1 < r2 + t &&
g1 > g2 - t &&
g1 < g2 + t &&
b1 > b2 - t &&
b1 < b2 + t
)
}
// createCanvas
function createCanvas(w, h) {
let canvas = document.createElement('canvas')
canvas.id = 'CursorLayer'
canvas.width = w
canvas.height = h
let body = document.getElementsByTagName('body')[0]
body.appendChild(canvas)
return canvas
}
// get-x
return await new Promise((resolve, reject) => {
const canvas = createCanvas(1000, 1000)
const ctx = canvas.getContext('2d')
const img = new Image()
img.setAttribute('crossOrigin', 'Anonymous')
img.src = base64
img.onload = () => {
console.log('loaded......')
const width = img.naturalWidth
const height = img.naturalHeight
ctx.drawImage(img, 0, 0)
const maskRgba = [0, 0, 0, 0.65]
const t = 5 // 色差容忍值
let prevPixelRgba = null
for (let x = 0; x < width; x++) {
// 重新开始一列,清除上个像素的色值
prevPixelRgba = null
for (let y = 0; y < height; y++) {
const rgba = ctx.getImageData(x, y, 1, 1).data
if (prevPixelRgba) {
// 所有原图中的 alpha 通道值都是1
prevPixelRgba[3] = 1
const maskedPrevPixel = combineRgba(prevPixelRgba, maskRgba)
// 只要找到了一个色值匹配的像素点则直接返回,因为是自上而下,自左往右的查找,第一个像素点已经满足"最近"的条件
if (tolerance(maskedPrevPixel, rgba, t)) {
resolve((x * actualWidth) / width)
}
} else {
prevPixelRgba = rgba
}
}
}
// 没有找到任何符合条件的像素点
resolve(0)
}
img.onerror = () => {
console.log('error......')
resolve(0)
}
})
},
src,
width
)
await page.waitFor(10000)
console.log('--->distance', distance)
// 无法转成base64 计算出来的distance错误
// ....
}
}
- 服务端思路
后采用 npm 的 canvas 工具包在 nodejs 里面计算缺口位置
// 判断是否触发验证
let verification = await page.evaluate(() => {
let newVcode = document.querySelector('#new_vcode')
return window.getComputedStyle(newVcode).display === 'block'
})
console.log(`info:verification: ${verification}`)
// 如果有验证码拦截,做如下处理
if (verification) {
console.log('info: start cacle ...')
// 等待验证码加载出来=>此处可由程序判断是否加载处理验证码
await page.waitFor(10000)
// iframe 加载出来=>puppeteer与iframe的处理
const eh = await page.$('#tcaptcha_iframe')
const frame = await eh.contentFrame()
const dragMove = async () => {
// 获取显示图片的src和宽度
let { src, width } = await frame.evaluate(async () => {
let img = document.querySelector('#slideBg')
let src = img.getAttribute('src')
let width = parseInt(window.getComputedStyle(img).width)
let height = parseInt(window.getComputedStyle(img).height)
return {
src,
width,
height
}
})
console.log(`info: src: ${src}`)
console.log(`info: width: ${width}`)
// 缓存code图片目录位置
const codePath = './code.jpg'
// 缓存图片
const downloadImage = require('./../utils/downloadImage')
await downloadImage(src, codePath)
console.log('info: downloadImage success')
const calcSliderDistance = require('./../utils/getVerifyPosition')
let { distance, realWidth } = calcSliderDistance(codePath)
console.log(`info: calcSliderDistance success`)
console.log(`info: distance ${distance}`)
console.log(`info: realWidth ${realWidth}`)
// 按比例计算滑动距离
// await page.waitFor(10000)
distance = parseInt((width / realWidth) * distance)
console.log(`info: distance ${distance}`)
// 获取缺口方块大小
const matchWidth = await frame.evaluate(() => {
let matchRect = document.querySelector('#slideBlock')
const { width } = matchRect.getBoundingClientRect()
return width
})
// 减掉缺口大小
distance = parseInt(distance - matchWidth)
console.log(`info: distance ${distance}`)
// 考虑缺口图片png阴影,左右两边分别8px左右
distance = distance - 2 * 8
console.log(`info: distance ${distance}`)
// 滑块
const dragBtnPosition = await frame.evaluate(() => {
let dragBtn = document.querySelector('#tcaptcha_drag_thumb')
const { x, y, width, height } = dragBtn.getBoundingClientRect()
console.log({ x, y, width, height })
return { x, y, width, height }
})
console.log(`info: dragBtnPosition ${dragBtnPosition}`)
// 按下位置设置在滑块中心
const x = dragBtnPosition.x + dragBtnPosition.width / 2
const y = dragBtnPosition.y + dragBtnPosition.height / 2
console.log('info: start move...')
if (distance > 10) {
// 如果距离够长,则将距离设置为二段(模拟人工操作)
const distance1 = distance - 10
const distance2 = 10
await page.mouse.move(x, y)
await page.mouse.down()
// 第一次滑动
await page.mouse.move(x + distance1, y, { steps: 30 })
await page.waitFor(500)
// 第二次滑动
await page.mouse.move(x + distance1 + distance2, y, { steps: 20 })
await page.waitFor(500)
await page.mouse.up()
} else {
// 否则直接滑到相应位置
await page.mouse.move(x, y)
await page.mouse.down()
await page.mouse.move(x + distance, y, { steps: 30 })
await page.mouse.up()
}
// 等待验证结果
await page.waitFor(10000)
const success = await page.$('#page-content button.input-btn')
return !!success
}
const loop = async () => {
let success = await dragMove()
console.log(`info: success: ${success}`)
if (!success) {
console.log(`fail: loop...`)
loop()
}
}
await loop()
}
console.log(`info: end move....`)
utils
下载缺口图片到本地
const fs = require('fs')
const path = require('path')
const https = require('https')
const downloadImage = async (url, savePath) => {
let file = fs.createWriteStream(path.resolve(__dirname, savePath))
return new Promise((resolve, reject) => {
https.get(url, res => {
res.pipe(file)
resolve()
})
})
}
// !(async () => {
// console.log('start')
// await downloadImage(`https://img.alicdn.com/tfs/TB1Z5RLd9SD3KVjSZFKXXb10VXa-2277-645.png`, `./code.jpg`)
// console.log('end')
// })()
module.exports = downloadImage
计算验证图片缺口位置
const path = require('path')
const fs = require('fs')
const { createCanvas, Image } = require('canvas')
const calcSliderDistance = imgPath => {
const buf = fs.readFileSync(path.resolve(__dirname, imgPath))
const canvas = createCanvas(1000, 1000)
const ctx = canvas.getContext('2d')
const img = new Image()
img.src = buf
const width = img.width
const height = img.height
ctx.drawImage(img, 0, 0, width, height)
const imgData = ctx.getImageData(0, 0, width, height).data
const arr = []
const minHeight = Math.floor(height / 4)
const maxHeight = minHeight * 3
const minWidth = Math.floor(width / 2)
for (let i = minHeight; i < maxHeight; i++) {
for (let j = width; j > minWidth; j--) {
let p = width * i + j
p = p << 2
if (
255 - imgData[p] < 10 &&
255 - imgData[p - 1] < 10 &&
255 - imgData[p + 1] < 10
) {
arr.push(j)
break
}
}
}
return {
distance: Math.max(...arr),
realWidth: width
}
}
// console.log(calcSliderDistance('./code.jpg'))
module.exports = calcSliderDistance
发布图文说说
console.log(`open ... https://h5.qzone.qq.com/mqzone/index`)
await page.waitFor('#page-content')
await page.click('#page-content button.input-btn')
await page.waitFor('#form-0')
await page.type(
'.J_textareaWrapper textarea',
`auto send ${new Date().getHours()}:${new Date().getMinutes()}:${new Date().getSeconds()}`,
{
delay: 100
}
)
const fileInput = await page.waitForSelector('#addphoto')
await fileInput.uploadFile(path.resolve(__dirname, `a.png`))
await page.click('#form-0-submit')
await page.waitForNavigation({
timeout: 0,
waitUntil: 'networkidle0'
})
// // 跳转到发表列表页面
// await page.goto(`https://h5.qzone.qq.com/mqzone/index#3153926643/info/all`, {
// timeout: 0,
// waitUntil: 'networkidle0'
// })
console.log(
`info: go to https://h5.qzone.qq.com/mqzone/index#3153926643/info/all`
)
// await page.waitForResponse
await page.waitFor(5000)
await page.waitFor('#feed_list_cot_all')
let res = await page.evaluate(() => {
let item = document.querySelectorAll('.feed.dataItem')
let curItem = item[0]
let txt = curItem.querySelector('.feed-bd .txt').innerText
// https://m.qpic.cn/psb?/V11BqWbN0U76uG/Z4CagcTIjhyNmwtAhRazKo3OE..BnJFvsbvROSe6Jv8!/c/dMUAAAAAAAAA&ek=1&kp=1&pt=0&bo=bAJ*AWwCfwERADc!&tl=1&su=013035857&vuin=3153926643&tm=1566374400
let domImg = curItem.querySelector('.feed-bd .img')
let img = domImg
? domImg.style.backgroundImage.replace('url("', '').replace('")', '')
: ''
return {
txt,
img
}
})
console.log(`info: auto send success...`)
console.log(res)
await browser.close()
错误处理与重启
注意设置上限,多次超时或者多次重启失败,给自己发告警邮件
try {
run()
} catch (error) {
console.log(`再次 run...`)
run()
}
踩坑
canvas 安装报错
需要依赖,根据提示安装即可(暂未验证)
# linux
sudo yum install cairo cairo-devel cairomm-devel libjpeg-turbo-devel pango pango-devel pangomm pangomm-devel giflib-devel
- 20191113 在 centos7.4 上部署出现问题=>(centos 深坑)
- 参考-linux(CentOS6)安装 canvas(nodejs)的踩坑之旅
- centos6.5 升级 gcc 到 4.9
- centos 7 下 Paraviewweb 环境的安装和配置 和 npm install canvas
解决如下
# 安装开发必备环境
yum groupinstall "Development Tools"
yum -y install glibc-static libstdc++-static
yum -y install gcc gcc-c++ kernel-devel
# 下载gcc安装文件并解压到/usr/local/src
cd /usr/local/src
wget ftp://gcc.gnu.org/pub/gcc/releases/gcc-5.4.0/gcc-5.4.0.tar.gz
# 下载gcc必备的依赖程序
cd /usr/local/src/gcc-5.4.0
./contrib/download_prerequisites
# 在/usr/local/src/下新建build文件夹,并进入到build文件夹下
cd /usr/local/src/
mkdir build
cd build
/usr/local/src/gcc-5.4.0/configure --enable-checking=release --enable-languages=c,c++ --disable-multilib
# 编译和安装gcc
make
make install
# make报错
# 想转投Ubuntu了...
# 安装cairo以及相关包资源
su -c 'yum install cairo cairo-devel cairomm-devel libjpeg-turbo-devel pango pango-devel pangomm pangomm-devel'
# 安装canvas
npm install canvas --save
无关请求会阻塞
url.includes粗暴处理,但是肯定有更优解
page.on('request', request => {
let url = request.url()
// console.log(url)
if (url.includes('google.com') || url.includes('baidu.com')) {
request.abort()
} else {
request.continue()
}
})