puppeteer+sharp实现网页截图

493 阅读2分钟

本文主要讲述通过 puppeteer + sharp,实现网页的全截屏幕

0. 相关文档

  1. 【puppeteer中文文档】

  2. 【sharp官网文档】

1. 入门级引用

const puppeteer = require('puppeteer');

const browser = await puppeteer.launch({
    // headless: false,
    args: [
        `--proxy-server=${address.ip}:${address.port}`, //使用代理请求
    ],
});

const page = await browser.newPage();

// timeout 单位是毫秒, 默认是30秒, 传 `0` 表示无限等待;
// waitUntil, 满足什么条件认为页面跳转完成,默认是 `load` 事件触发;
// 所以监听事件 page.on('load'); 要在page.goto前绑定才会触发回调;
await page.goto(item.url, { timeout: 30000, waitUntil: 'load' });

await page.setViewport({
    width: 1920,
    height: 2160,
});

await page.screenshot({
    path: 'screenshot.png',
    // fullPage: true,
});

await browser.close();
const sharp = require('sharp');

// 创建一张底图
const semiTransparentRedPng = await sharp({
    create: {
        width: 100,
        height: 100,
        channels: 4, // 颜色通道,4: alpha
        background: { r: 255, g: 0, b: 0, alpha: 0.5 }
    }
})
.png()
.toBuffer();

2. 正题

  • Q: 实现网页全截图,为啥需要用到sharp组件?
  • A: 对于强js交互(如滚动页面,同个位置展示不同模块)1. 采用puppeteer的fullPage属性;或者2. setViewport的height为document.body.scrollHeight都不能很好捕获整个页面
// 获取页面原始高度
const $bodyScrollHeight = await page.$eval('body', el => el.scrollHeight);

// 故采用分段截取拼接方式
let currentHeight = 0;
// 裁剪的时候,向下偏移100px, 减少重复截取吸顶头部
const offsetHeight = 150;
const screenshotList = [];
do{
    // page.evaluate可以传参到内部
    // 注意!!函数内打印,为对应浏览器的控制台,我们看不到打印内容,可以通过监听page.on('console')获取
    await page.evaluate(currentHeight => {
        window.scroll({
            top: currentHeight,
            left: 0,
        });
    }, currentHeight);
    await sleep(3000);
    const isFirst = !currentHeight;
    // 剩余高度(避免最后一屏出现部分空白)
    const actualOffset = isFirst ? ($bodyScrollHeight - currentHeight) : ($bodyScrollHeight - currentHeight - offsetHeight);
    const imgBuffer = await page.screenshot({
        type: 'jpeg',
        quality: 50, // 默认 100
        clip: {
            x: 0,
            y: isFirst ? 0 : (currentHeight + offsetHeight),
            width: snapshotCnf.width,
            height: isFirst ? Math.min(snapshotCnf.height, actualOffset) : Math.min((snapshotCnf.height - offsetHeight), actualOffset),
        },
    });
    // 下一屏幕高度
    currentHeight += (snapshotCnf.height - offsetHeight);
    console.log('currentHeight', currentHeight, item.brand);
    screenshotList.push(imgBuffer);
}while(currentHeight < $bodyScrollHeight);

// 合成图片
const imageFullSharp = await joinImage(screenshotList.map(o => sharp(o)), {
    mode: 'vertical',
});

// 导出到本地
await imageFullSharp.toFile('output.jpg');

/**
* 拼接图片
* @param  { Array<Sharp> } imgList
* @param  { Object } mode 拼接模式:horizontal(水平)/vertical(垂直)
* @return  { Object } 合成的图片
*/
async function joinImage(imgList, { mode, background }) {
    let totalWidth = 0;
    let totalHeight = 0;
    let maxWidth = 0;
    let maxHeight = 0;
    const compositeList = [];
    const imgMetadataList = [];
    const offsetOpt = {
        left: 0,
        top: 0,
    };
    // 获取所有图片的宽和高,计算和及最大值
    for (let i = 0, j = imgList.length; i < j; i++) {
        const { width, height } = await imgList[i].metadata();
        // 计算偏移
        if (mode === 'horizontal') {
            offsetOpt.left = i === 0 ? offsetOpt.left : (offsetOpt.left + imgMetadataList[i - 1].width);
            offsetOpt.top = 0;
        } else {
            offsetOpt.top = i === 0 ? offsetOpt.top : (offsetOpt.top + imgMetadataList[i - 1].height);
            offsetOpt.left = 0;
        }
        imgMetadataList.push({ width, height });
        compositeList.push({
            input: await imgList[i].toBuffer(),
            left: offsetOpt.left,
            top: offsetOpt.top,
        });
        totalHeight += height;
        totalWidth += width;
        maxHeight = Math.max(maxHeight, height);
        maxWidth = Math.max(maxWidth, width);
    }

    const baseOpt = {
        width: mode === 'horizontal' ? totalWidth : maxWidth,
        height: mode === 'vertical' ? totalHeight : maxHeight,
        channels: 4,
        background: background || {
            r: 255, g: 255, b: 255, alpha: 1,
        },
    };

    return sharp({
        create: baseOpt,
    }).composite(compositeList);
}

3. 杂谈

page.exposeFunction   挂载 APIwindow
// 要触发click等事件,可以 
const inputElement = await page.$('input[type=submit]');

await inputElement.click();
// 滚动页面,避免懒加载模块
await page.evaluate(async () => {
    await new Promise((resolve, reject) => {
        try {
            const maxScroll = Number.MAX_SAFE_INTEGER;
            let lastScroll = 0;
            const interval = setInterval(() => {
                window.scrollBy(0, 100);
                const scrollTop = document.documentElement.scrollTop || window.scrollY;
                if (scrollTop === maxScroll || scrollTop === lastScroll) { // 判断触底,或超出js最大安全长度
                    clearInterval(interval);
                    resolve();
                } else {
                    lastScroll = scrollTop;
                }
            }, 100); // 100毫秒执行间隔
        } catch (err) {
            console.log(err);
            reject(err);
        }
    });
});

sharp适配的node版本,可以通过require('package.json').engines.node获取;

sharp动态安装的libvips,如果ci流程的镜像,和部署的容器镜像不一致,则会导致运行失败; 参考相关issue

可以尝试通过修改process.env,安装指定环境的libvips

env npm_config_libc=glibc npm i