puppeteer应用-自动登录QQ发布图文说说

1,497 阅读3分钟

目标

应用场景如下:
自动发布QQZone图文说说

调研

场景分析及技术考虑
粗略排除难点及技术障碍
最大障碍,验证码缺口识别

技术支持

  1. 核心js,nodejs
  2. 大将puppeteer
  3. 工具canvas

难点总结

  1. puppeteer 处理 iframe
  2. 验证码如何正确判断缺口位置
  3. await page.evaluate()正确返回promise

业务实现

把大象装进冰箱

初始设置 puppeteer 启动

let browser = await puppeteer.launch({
  headless: true,
  defaultViewport: {
    width: 320,
    height: 568,
    isMobile: true
  },
  timeout: 100000,
  args: ['–no-sandbox']
})
let page = await browser.newPage()
await page.setViewport({
  width: 320,
  height: 568
})
await page.setRequestInterception(true)
await page.emulate(iPhone)
console.log(`info: browser connect success...`)

打开登录页输入 QQ 账户及密码

delay: 100模拟真实用户

// let url = `https://h5.qzone.qq.com/mqzone/index` // 会redirect如下
let url = `https://ui.ptlogin2.qq.com/cgi-bin/login?pt_hide_ad=1&style=9&appid=549000929&pt_no_auth=1&pt_wxtest=1&daid=5&s_url=https://h5.qzone.qq.com/mqzone/index&t=${new Date().getTime()}`
await page.goto(url, {
  timeout: 0,
  waitUntil: 'networkidle0'
})
await page.waitFor('#web_login')
await page.type('#u', '567890', { delay: 100 })
await page.type('#p', '123456', { delay: 100 })
await page.click('#go')
// 显示不是你的空间,而是验证码....

计算验证码图片缺口位置

  1. 下载验证图片到本地
  2. 计算滑块缺口位置
  3. 移动滑块到指定位置
  • 客户端思路

    原思路是在 evaluate 函数里处理图片得到宽高以及转化成 base64,进而用 rgba 找到图片缺口的位置,但是中途遇到 html 的 canvas 跨域问题,且本人未找到解决方案,遂放弃...

// 判断是否触发验证
let verification = await page.evaluate(() => {
  let newVcode = document.querySelector('#new_vcode')
  return window.getComputedStyle(newVcode).display === 'block'
})
console.log(`info:verification: ${verification}`)

// 如果有验证码拦截,做如下处理
if (verification) {
  console.log('info: start drag...')
  // 等待验证码加载出来=>此处可由程序判断是否加载处理验证码
  await page.waitFor(10000)
  // iframe 加载出来=>puppeteer与iframe的处理
  const eh = await page.$('#tcaptcha_iframe')
  const frame = await eh.contentFrame()
  const dragMove = async () => {
    // 获取显示图片的src和宽度
    let { src, width } = await frame.evaluate(async () => {
      return await new Promise((resolve, reject) => {
        let img = document.querySelector('#slideBg')
        let src = img.getAttribute('src')
        let width = parseInt(window.getComputedStyle(img).width)
        let height = parseInt(window.getComputedStyle(img).height)
        let _img = new Image()
        // 处理canvas跨域,但是貌似仍未解决问题
        _img.setAttribute('crossOrigin', 'Anonymous')
        _img.src = src
        _img.onload = () => {
          let canvas = document.createElement('canvas')
          // let body = document.getElementsByTagName('body')[0]
          // body.appendChild(canvas)
          canvas.width = width
          canvas.height = height
          let ctx = canvas.getContext('2d')
          ctx.drawImage(_img, 0, 0, width, height)
          let src = canvas.toDataURL('image/png')
          resolve({ src, width })
        }
        _img.onerror = err => {
          console.log('浏览器控制台=>', err)
          let src = img.getAttribute('src')
          resolve({ src, width })
        }
      })
    })
    console.log(`info: src: ${src}`)
    console.log(`info: width: ${width}`)
    // 获取缺口左x坐标->canvas跨域???
    // Canvas是不支持跨域的,改浏览器启动参数只是让自己电脑支持而已,解决不了真正的跨域问题
    let distance = await page.evaluate(
      async (base64, actualWidth) => {
        /**
         * combine rgba colors [r, g, b, a]
         * @param rgba1 底色
         * @param rgba2 遮罩色
         * @returns {number[]}
         */
        function combineRgba(rgba1, rgba2) {
          const [r1, g1, b1, a1] = rgba1
          const [r2, g2, b2, a2] = rgba2
          const a = a1 + a2 - a1 * a2
          const r = (r1 * a1 + r2 * a2 - r1 * a1 * a2) / a
          const g = (g1 * a1 + g2 * a2 - g1 * a1 * a2) / a
          const b = (b1 * a1 + b2 * a2 - b1 * a1 * a2) / a
          return [r, g, b, a]
        }
        /**
         * 判断两个颜色是否相似
         * @param rgba1
         * @param rgba2
         * @param t
         * @returns {boolean}
         */
        function tolerance(rgba1, rgba2, t) {
          const [r1, g1, b1] = rgba1
          const [r2, g2, b2] = rgba2
          return (
            r1 > r2 - t &&
            r1 < r2 + t &&
            g1 > g2 - t &&
            g1 < g2 + t &&
            b1 > b2 - t &&
            b1 < b2 + t
          )
        }
        // createCanvas
        function createCanvas(w, h) {
          let canvas = document.createElement('canvas')
          canvas.id = 'CursorLayer'
          canvas.width = w
          canvas.height = h
          let body = document.getElementsByTagName('body')[0]
          body.appendChild(canvas)
          return canvas
        }
        // get-x
        return await new Promise((resolve, reject) => {
          const canvas = createCanvas(1000, 1000)
          const ctx = canvas.getContext('2d')
          const img = new Image()
          img.setAttribute('crossOrigin', 'Anonymous')
          img.src = base64
          img.onload = () => {
            console.log('loaded......')
            const width = img.naturalWidth
            const height = img.naturalHeight
            ctx.drawImage(img, 0, 0)
            const maskRgba = [0, 0, 0, 0.65]
            const t = 5 // 色差容忍值
            let prevPixelRgba = null
            for (let x = 0; x < width; x++) {
              // 重新开始一列,清除上个像素的色值
              prevPixelRgba = null
              for (let y = 0; y < height; y++) {
                const rgba = ctx.getImageData(x, y, 1, 1).data
                if (prevPixelRgba) {
                  // 所有原图中的 alpha 通道值都是1
                  prevPixelRgba[3] = 1
                  const maskedPrevPixel = combineRgba(prevPixelRgba, maskRgba)
                  // 只要找到了一个色值匹配的像素点则直接返回,因为是自上而下,自左往右的查找,第一个像素点已经满足"最近"的条件
                  if (tolerance(maskedPrevPixel, rgba, t)) {
                    resolve((x * actualWidth) / width)
                  }
                } else {
                  prevPixelRgba = rgba
                }
              }
            }
            // 没有找到任何符合条件的像素点
            resolve(0)
          }
          img.onerror = () => {
            console.log('error......')
            resolve(0)
          }
        })
      },
      src,
      width
    )
    await page.waitFor(10000)
    console.log('--->distance', distance)
    // 无法转成base64 计算出来的distance错误
    // ....
  }
}
  • 服务端思路

    后采用 npm 的 canvas 工具包在 nodejs 里面计算缺口位置

// 判断是否触发验证
let verification = await page.evaluate(() => {
  let newVcode = document.querySelector('#new_vcode')
  return window.getComputedStyle(newVcode).display === 'block'
})
console.log(`info:verification: ${verification}`)

// 如果有验证码拦截,做如下处理
if (verification) {
  console.log('info: start cacle ...')
  // 等待验证码加载出来=>此处可由程序判断是否加载处理验证码
  await page.waitFor(10000)
  // iframe 加载出来=>puppeteer与iframe的处理
  const eh = await page.$('#tcaptcha_iframe')
  const frame = await eh.contentFrame()
  const dragMove = async () => {
    // 获取显示图片的src和宽度
    let { src, width } = await frame.evaluate(async () => {
      let img = document.querySelector('#slideBg')
      let src = img.getAttribute('src')
      let width = parseInt(window.getComputedStyle(img).width)
      let height = parseInt(window.getComputedStyle(img).height)
      return {
        src,
        width,
        height
      }
    })
    console.log(`info: src: ${src}`)
    console.log(`info: width: ${width}`)

    // 缓存code图片目录位置
    const codePath = './code.jpg'
    // 缓存图片
    const downloadImage = require('./../utils/downloadImage')
    await downloadImage(src, codePath)
    console.log('info: downloadImage success')
    const calcSliderDistance = require('./../utils/getVerifyPosition')
    let { distance, realWidth } = calcSliderDistance(codePath)
    console.log(`info: calcSliderDistance success`)
    console.log(`info: distance ${distance}`)
    console.log(`info: realWidth ${realWidth}`)
    // 按比例计算滑动距离
    // await page.waitFor(10000)
    distance = parseInt((width / realWidth) * distance)
    console.log(`info: distance ${distance}`)
    // 获取缺口方块大小
    const matchWidth = await frame.evaluate(() => {
      let matchRect = document.querySelector('#slideBlock')
      const { width } = matchRect.getBoundingClientRect()
      return width
    })
    // 减掉缺口大小
    distance = parseInt(distance - matchWidth)
    console.log(`info: distance ${distance}`)
    // 考虑缺口图片png阴影,左右两边分别8px左右
    distance = distance - 2 * 8
    console.log(`info: distance ${distance}`)
    // 滑块
    const dragBtnPosition = await frame.evaluate(() => {
      let dragBtn = document.querySelector('#tcaptcha_drag_thumb')
      const { x, y, width, height } = dragBtn.getBoundingClientRect()
      console.log({ x, y, width, height })
      return { x, y, width, height }
    })
    console.log(`info: dragBtnPosition ${dragBtnPosition}`)
    // 按下位置设置在滑块中心
    const x = dragBtnPosition.x + dragBtnPosition.width / 2
    const y = dragBtnPosition.y + dragBtnPosition.height / 2
    console.log('info: start move...')
    if (distance > 10) {
      // 如果距离够长,则将距离设置为二段(模拟人工操作)
      const distance1 = distance - 10
      const distance2 = 10
      await page.mouse.move(x, y)
      await page.mouse.down()
      // 第一次滑动
      await page.mouse.move(x + distance1, y, { steps: 30 })
      await page.waitFor(500)
      // 第二次滑动
      await page.mouse.move(x + distance1 + distance2, y, { steps: 20 })
      await page.waitFor(500)
      await page.mouse.up()
    } else {
      // 否则直接滑到相应位置
      await page.mouse.move(x, y)
      await page.mouse.down()
      await page.mouse.move(x + distance, y, { steps: 30 })
      await page.mouse.up()
    }
    // 等待验证结果
    await page.waitFor(10000)
    const success = await page.$('#page-content button.input-btn')
    return !!success
  }
  const loop = async () => {
    let success = await dragMove()
    console.log(`info: success: ${success}`)
    if (!success) {
      console.log(`fail: loop...`)
      loop()
    }
  }
  await loop()
}
console.log(`info: end move....`)

utils

下载缺口图片到本地

const fs = require('fs')
const path = require('path')
const https = require('https')

const downloadImage = async (url, savePath) => {
  let file = fs.createWriteStream(path.resolve(__dirname, savePath))
  return new Promise((resolve, reject) => {
    https.get(url, res => {
      res.pipe(file)
      resolve()
    })
  })
}
// !(async () => {
//   console.log('start')
//   await downloadImage(`https://img.alicdn.com/tfs/TB1Z5RLd9SD3KVjSZFKXXb10VXa-2277-645.png`, `./code.jpg`)
//   console.log('end')
// })()
module.exports = downloadImage

计算验证图片缺口位置

const path = require('path')
const fs = require('fs')
const { createCanvas, Image } = require('canvas')

const calcSliderDistance = imgPath => {
  const buf = fs.readFileSync(path.resolve(__dirname, imgPath))
  const canvas = createCanvas(1000, 1000)
  const ctx = canvas.getContext('2d')
  const img = new Image()
  img.src = buf
  const width = img.width
  const height = img.height
  ctx.drawImage(img, 0, 0, width, height)

  const imgData = ctx.getImageData(0, 0, width, height).data
  const arr = []
  const minHeight = Math.floor(height / 4)
  const maxHeight = minHeight * 3
  const minWidth = Math.floor(width / 2)
  for (let i = minHeight; i < maxHeight; i++) {
    for (let j = width; j > minWidth; j--) {
      let p = width * i + j
      p = p << 2
      if (
        255 - imgData[p] < 10 &&
        255 - imgData[p - 1] < 10 &&
        255 - imgData[p + 1] < 10
      ) {
        arr.push(j)
        break
      }
    }
  }
  return {
    distance: Math.max(...arr),
    realWidth: width
  }
}
// console.log(calcSliderDistance('./code.jpg'))
module.exports = calcSliderDistance

发布图文说说

console.log(`open ... https://h5.qzone.qq.com/mqzone/index`)
await page.waitFor('#page-content')
await page.click('#page-content button.input-btn')

await page.waitFor('#form-0')
await page.type(
  '.J_textareaWrapper textarea',
  `auto send ${new Date().getHours()}:${new Date().getMinutes()}:${new Date().getSeconds()}`,
  {
    delay: 100
  }
)
const fileInput = await page.waitForSelector('#addphoto')
await fileInput.uploadFile(path.resolve(__dirname, `a.png`))
await page.click('#form-0-submit')
await page.waitForNavigation({
  timeout: 0,
  waitUntil: 'networkidle0'
})
// // 跳转到发表列表页面
// await page.goto(`https://h5.qzone.qq.com/mqzone/index#3153926643/info/all`, {
//   timeout: 0,
//   waitUntil: 'networkidle0'
// })
console.log(
  `info: go to https://h5.qzone.qq.com/mqzone/index#3153926643/info/all`
)
// await page.waitForResponse
await page.waitFor(5000)
await page.waitFor('#feed_list_cot_all')
let res = await page.evaluate(() => {
  let item = document.querySelectorAll('.feed.dataItem')
  let curItem = item[0]
  let txt = curItem.querySelector('.feed-bd .txt').innerText
  // https://m.qpic.cn/psb?/V11BqWbN0U76uG/Z4CagcTIjhyNmwtAhRazKo3OE..BnJFvsbvROSe6Jv8!/c/dMUAAAAAAAAA&ek=1&kp=1&pt=0&bo=bAJ*AWwCfwERADc!&tl=1&su=013035857&vuin=3153926643&tm=1566374400
  let domImg = curItem.querySelector('.feed-bd .img')
  let img = domImg
    ? domImg.style.backgroundImage.replace('url("', '').replace('")', '')
    : ''
  return {
    txt,
    img
  }
})
console.log(`info: auto send success...`)
console.log(res)
await browser.close()

错误处理与重启

注意设置上限,多次超时或者多次重启失败,给自己发告警邮件

try {
  run()
} catch (error) {
  console.log(`再次 run...`)
  run()
}

踩坑

canvas 安装报错

需要依赖,根据提示安装即可(暂未验证)

# linux
sudo yum install cairo cairo-devel cairomm-devel libjpeg-turbo-devel pango pango-devel pangomm pangomm-devel giflib-devel

解决如下

# 安装开发必备环境
yum groupinstall "Development Tools"
yum -y install glibc-static libstdc++-static
yum -y install gcc gcc-c++ kernel-devel

# 下载gcc安装文件并解压到/usr/local/src
cd /usr/local/src
wget ftp://gcc.gnu.org/pub/gcc/releases/gcc-5.4.0/gcc-5.4.0.tar.gz

# 下载gcc必备的依赖程序
cd /usr/local/src/gcc-5.4.0
./contrib/download_prerequisites

# 在/usr/local/src/下新建build文件夹,并进入到build文件夹下
cd /usr/local/src/
mkdir build
cd build
/usr/local/src/gcc-5.4.0/configure --enable-checking=release --enable-languages=c,c++ --disable-multilib

# 编译和安装gcc
make
make install

# make报错
# 想转投Ubuntu了...

# 安装cairo以及相关包资源
su -c 'yum install cairo cairo-devel cairomm-devel libjpeg-turbo-devel pango pango-devel pangomm pangomm-devel'

# 安装canvas
npm install canvas --save

无关请求会阻塞

url.includes粗暴处理,但是肯定有更优解

page.on('request', request => {
  let url = request.url()
  // console.log(url)
  if (url.includes('google.com') || url.includes('baidu.com')) {
    request.abort()
  } else {
    request.continue()
  }
})

参考资料