0. 相关文档
1. 入门级引用
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
// headless: false,
args: [
`--proxy-server=${address.ip}:${address.port}`, //使用代理请求
],
});
const page = await browser.newPage();
// timeout 单位是毫秒, 默认是30秒, 传 `0` 表示无限等待;
// waitUntil, 满足什么条件认为页面跳转完成,默认是 `load` 事件触发;
// 所以监听事件 page.on('load'); 要在page.goto前绑定才会触发回调;
await page.goto(item.url, { timeout: 30000, waitUntil: 'load' });
await page.setViewport({
width: 1920,
height: 2160,
});
await page.screenshot({
path: 'screenshot.png',
// fullPage: true,
});
await browser.close();
const sharp = require('sharp');
// 创建一张底图
const semiTransparentRedPng = await sharp({
create: {
width: 100,
height: 100,
channels: 4, // 颜色通道,4: alpha
background: { r: 255, g: 0, b: 0, alpha: 0.5 }
}
})
.png()
.toBuffer();
2. 正题
- Q: 实现网页全截图,为啥需要用到
sharp
组件? - A: 对于强js交互(如滚动页面,同个位置展示不同模块)1. 采用
puppeteer
的fullPage属性;或者2.setViewport
的height为document.body.scrollHeight
都不能很好捕获整个页面
// 获取页面原始高度
const $bodyScrollHeight = await page.$eval('body', el => el.scrollHeight);
// 故采用分段截取拼接方式
let currentHeight = 0;
// 裁剪的时候,向下偏移100px, 减少重复截取吸顶头部
const offsetHeight = 150;
const screenshotList = [];
do{
// page.evaluate可以传参到内部
// 注意!!函数内打印,为对应浏览器的控制台,我们看不到打印内容,可以通过监听page.on('console')获取
await page.evaluate(currentHeight => {
window.scroll({
top: currentHeight,
left: 0,
});
}, currentHeight);
await sleep(3000);
const isFirst = !currentHeight;
// 剩余高度(避免最后一屏出现部分空白)
const actualOffset = isFirst ? ($bodyScrollHeight - currentHeight) : ($bodyScrollHeight - currentHeight - offsetHeight);
const imgBuffer = await page.screenshot({
type: 'jpeg',
quality: 50, // 默认 100
clip: {
x: 0,
y: isFirst ? 0 : (currentHeight + offsetHeight),
width: snapshotCnf.width,
height: isFirst ? Math.min(snapshotCnf.height, actualOffset) : Math.min((snapshotCnf.height - offsetHeight), actualOffset),
},
});
// 下一屏幕高度
currentHeight += (snapshotCnf.height - offsetHeight);
console.log('currentHeight', currentHeight, item.brand);
screenshotList.push(imgBuffer);
}while(currentHeight < $bodyScrollHeight);
// 合成图片
const imageFullSharp = await joinImage(screenshotList.map(o => sharp(o)), {
mode: 'vertical',
});
// 导出到本地
await imageFullSharp.toFile('output.jpg');
/**
* 拼接图片
* @param { Array<Sharp> } imgList
* @param { Object } mode 拼接模式:horizontal(水平)/vertical(垂直)
* @return { Object } 合成的图片
*/
async function joinImage(imgList, { mode, background }) {
let totalWidth = 0;
let totalHeight = 0;
let maxWidth = 0;
let maxHeight = 0;
const compositeList = [];
const imgMetadataList = [];
const offsetOpt = {
left: 0,
top: 0,
};
// 获取所有图片的宽和高,计算和及最大值
for (let i = 0, j = imgList.length; i < j; i++) {
const { width, height } = await imgList[i].metadata();
// 计算偏移
if (mode === 'horizontal') {
offsetOpt.left = i === 0 ? offsetOpt.left : (offsetOpt.left + imgMetadataList[i - 1].width);
offsetOpt.top = 0;
} else {
offsetOpt.top = i === 0 ? offsetOpt.top : (offsetOpt.top + imgMetadataList[i - 1].height);
offsetOpt.left = 0;
}
imgMetadataList.push({ width, height });
compositeList.push({
input: await imgList[i].toBuffer(),
left: offsetOpt.left,
top: offsetOpt.top,
});
totalHeight += height;
totalWidth += width;
maxHeight = Math.max(maxHeight, height);
maxWidth = Math.max(maxWidth, width);
}
const baseOpt = {
width: mode === 'horizontal' ? totalWidth : maxWidth,
height: mode === 'vertical' ? totalHeight : maxHeight,
channels: 4,
background: background || {
r: 255, g: 255, b: 255, alpha: 1,
},
};
return sharp({
create: baseOpt,
}).composite(compositeList);
}
3. 杂谈
page.exposeFunction 挂载 API到window
// 要触发click等事件,可以
const inputElement = await page.$('input[type=submit]');
await inputElement.click();
// 滚动页面,避免懒加载模块
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
try {
const maxScroll = Number.MAX_SAFE_INTEGER;
let lastScroll = 0;
const interval = setInterval(() => {
window.scrollBy(0, 100);
const scrollTop = document.documentElement.scrollTop || window.scrollY;
if (scrollTop === maxScroll || scrollTop === lastScroll) { // 判断触底,或超出js最大安全长度
clearInterval(interval);
resolve();
} else {
lastScroll = scrollTop;
}
}, 100); // 100毫秒执行间隔
} catch (err) {
console.log(err);
reject(err);
}
});
});
sharp
适配的node版本,可以通过require('package.json').engines.node
获取;
sharp
动态安装的libvips
,如果ci流程的镜像,和部署的容器镜像不一致,则会导致运行失败;
参考相关issue
可以尝试通过修改process.env
,安装指定环境的libvips
env npm_config_libc=glibc npm i