SpringBoot 使用playwright 抓取草料二维码活码内容

8 阅读2分钟

GitHub地址

NoneSL/cl-qrcode: 草料二维码活码解析 (github.com)

相关代码

<!--依赖 pom.xml-->
<dependency>
    <groupId>com.microsoft.playwright</groupId>
    <artifactId>playwright</artifactId>
    <version>1.45.0</version>
</dependency>
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.17.1</version>
</dependency>
//实体类
package com.example.demo.entity;


public record ResolveResult(
        String finalUrl,
        String html
) {}
//无头浏览器实例管理类
package com.example.demo.utils;

import com.microsoft.playwright.Browser;
import com.microsoft.playwright.BrowserContext;
import com.microsoft.playwright.BrowserType;
import com.microsoft.playwright.Playwright;
import jakarta.annotation.PreDestroy;
import org.springframework.stereotype.Component;

import java.util.Arrays;

@Component
public class PlaywrightManager {

    private final Playwright playwright;
    private final Browser browser;

    public PlaywrightManager() {
        playwright = Playwright.create();
        browser = playwright.chromium().launch(
                new BrowserType.LaunchOptions()
                        .setHeadless(true)
                        .setSlowMo(0) // 关闭慢动作
                        .setArgs(Arrays.asList(new String[]{
                                "--disable-gpu", // 禁用GPU加速(减少资源占用)
                                "--disable-images", // 双重禁用图片(路由拦截+浏览器参数)
                                "--disable-extensions", // 禁用扩展
                                "--disable-plugins", // 禁用插件
                                "--disable-notifications", // 禁用通知
                                "--no-sandbox", // 关闭沙箱(Linux下提速)
                                "--disable-web-security" // 关闭跨域检查(非必要,可选)
                        }))
        );
    }

    public BrowserContext newContext() {
        return browser.newContext();
    }

    @PreDestroy
    public void close() {
        browser.close();
        playwright.close();
    }
}
//接口
package com.example.demo.service;


import com.example.demo.entity.ResolveResult;

public interface QRCodeService {
    public ResolveResult decodeQRCode(String url);
    public String getContent(String url);
}
//接口实现类
package com.example.demo.service.impl;

import com.example.demo.entity.ResolveResult;
import com.example.demo.service.QRCodeService;
import com.example.demo.utils.PlaywrightManager;

import com.microsoft.playwright.BrowserContext;
import com.microsoft.playwright.Page;
import com.microsoft.playwright.Route;
import com.microsoft.playwright.options.WaitUntilState;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import tools.jackson.core.JacksonException;
import tools.jackson.databind.ObjectMapper;

import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Service
public class QRCodeServiceImpl implements QRCodeService {
    @Autowired
    private PlaywrightManager manager;
    //通过无头浏览器解析
    @Override
    public ResolveResult decodeQRCode(String url) {

            try (BrowserContext context = manager.newContext()) {
                Page page = context.newPage();
                page.route("**/*", route -> {
                    String type = route.request().resourceType();
                    if ("image".equals(type)
                            || "font".equals(type)
                            || "media".equals(type)
                            ) {
                        route.abort();
                    }else if ("stylesheet".equals(type)) {
                        // 放行CSS请求,但返回空内容(不阻塞后续script)
                        route.fulfill(new Route.FulfillOptions()
                                .setStatus(200)
                                .setContentType("text/css")
                                .setBody("")); // 空CSS
                    }  else {
                        route.resume();
                    }
                });
                page.navigate(
                        url,
                        new Page.NavigateOptions()
                                .setWaitUntil(WaitUntilState.NETWORKIDLE)
//                                .setTimeout(18000)
                );
                // URL 稳定即可
//                page.waitForURL("**?*rlid=*", new Page.WaitForURLOptions().setTimeout(8000));

                return new ResolveResult(
                        page.url(),
                        page.content()
                );
            }
        }

    @Override
    public String getContent(String url) {
        ResolveResult result = decodeQRCode(url);
        Document doc = Jsoup.parse(result.html());
        
        // 直接获取doc.title()即可,这里根据业务场景替换
        // 正则匹配:中文字段 + 冒号 + 非字段内容
        Pattern pattern = Pattern.compile("(产品名称|物料编码|批号|生产日期|数量):([^产品名称物料编码批号生产日期数量]+)");
        Matcher matcher = pattern.matcher(doc.title());
        Map<String, String> map = new LinkedHashMap<>();
        while (matcher.find()) {
            String key = matcher.group(1).trim();
            String value = matcher.group(2).trim();
            map.put(key, value);
        }

        // 转 JSON
        ObjectMapper mapper = new ObjectMapper();
        try {
            String json = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(map);
            return json ;  // 返回文本
        } catch (JacksonException e) {
            throw new RuntimeException(e);
        }
    }

}
// controller层
package com.example.demo.controller;

import com.example.demo.service.QRCodeService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

@RestController
@RequestMapping("/QRCode")
public class QRCodeController {

    @Autowired
    private QRCodeService qrCodeService;

    @PostMapping("/getQRCodeByURl")
    public String getQRCodeByURl(@RequestParam String url) {
        return qrCodeService.getContent(url);
    }
}
//调用
POST http://localhost:8080/QRCode/getQRCodeByURl?
    url=xxx