技术干货(三)—— 微信小程序之摄像头实时检测文字

3,230 阅读2分钟

我正在参加「掘金·启航计划」

image.png

使用场景

偶然翻阅微信开发者文档,发现微信提供了OCR检测能力. 最近正好研究OCR功能, OCR检测有2种使用方法,一种是输入一张静态图片进行检测,另一种是通过摄像头实时检测。决定使用摄像头实时检测看看什么效果.

代码分析

微信官方示例代码: 此处有坑

const session = wx.createVKSession({
  track: {
    OCR: { mode: 1 } // mode: 1 - 使用摄像头;2 - 手动传入图像
  },
  version: 'v1'
})

// 摄像头实时检测模式下,监测到文字时,updateAnchors 事件会连续触发 (每帧触发一次)
session.on('updateAnchors', anchors => {
  console.log('anchors.text',"".concat(anchors.map(anchor=>anchor.text)))
})

// 当文字区域从相机中离开时,会触发 removeAnchors 事件
session.on('removeAnchors', () => {
  console.log('removeAnchors')
})

微信文档大家都懂得, 到处都是坑.果然,复制到项目里面,啥效果也没有. 于是调研 createVKSession 函数, 发现真实用法.

首先初始化参数不同:

// 以下 demo 以 v2 为例
// 创建 session 对象
const ssession = wx.createVKSession({
  track: {
    plane: {mode: 3},
  },
  version: 'v2',
  gl, // WebGLRenderingContext
})

创建以后调用 start 启动:

// 逐帧分析
const onFrame = timestamp => {
  // 开发者可以自己控制帧率
  const frame = session.getVKFrame(canvasWidth, canvasHeight)
    if (frame) {
      // 分析完毕,可以拿到帧对象
      doRender(frame)
    }

  session.requestAnimationFrame(onFrame)
}
session.start(err => {
  if (!err) session.requestAnimationFrame(onFrame)
})

// 渲染函数
const doRender = frame => {
  // ...
}

session.requestAnimationFrame(onFrame) 类似 requestAnimationFrame用法.

const frame = session.getVKFrame(canvasWidth, canvasHeight) 才会触发OCR分析.

代码实现

技术方案使用 Taro 创建 Canvas , type使用 webgl,

html 代码:

  <Canvas
        type="webgl"
        id="webgl"
        style={{
          width: "100vw",
          height: "100vh",
        }}
      ></Canvas>

js 代码:

useEffect(() => {
    Taro.createSelectorQuery()
      .select("#webgl")
      .node()
      .exec((res) => {
        var canvas = res[0].node;
        const gl = canvas.getContext("webgl");

        //@ts-ignore
        const session = Taro.createVKSession({
          track: {
            plane: {
              mode: 3,
            },
            OCR: {
              mode: 1,
            },
          },
          version: "v1",
          gl,
        });
        session.start((err) => {
          if (err) return console.error("VK error: ", err);

          const onFrame = (timestamp) => {
            const frame = session.getVKFrame(canvas.width, canvas.height);
            if (frame) {
              gl.disable(gl.DEPTH_TEST);
              const { yTexture, uvTexture } = frame.getCameraTexture(gl, "yuv");
              const displayTransform = frame.getDisplayTransform();
              draw(gl);
              function draw(gl) {
                // 编写着色器
                const currentProgram = gl.getParameter(gl.CURRENT_PROGRAM);
                const vs = `
                attribute vec2 a_position;
                attribute vec2 a_texCoord;
                uniform mat3 displayTransform;
                varying vec2 v_texCoord;
                void main() {
                  vec3 p = displayTransform * vec3(a_position, 0);
                  gl_Position = vec4(p, 1);
                  v_texCoord = a_texCoord;
                }
                `;
                const fs = `
                precision highp float;
              
                uniform sampler2D y_texture;
                uniform sampler2D uv_texture;
                varying vec2 v_texCoord;
                void main() {
                  vec4 y_color = texture2D(y_texture, v_texCoord);
                  vec4 uv_color = texture2D(uv_texture, v_texCoord);
              
                  float Y, U, V;
                  float R ,G, B;
                  Y = y_color.r;
                  U = uv_color.r - 0.5;
                  V = uv_color.a - 0.5;
                  
                  R = Y + 1.402 * V;
                  G = Y - 0.344 * U - 0.714 * V;
                  B = Y + 1.772 * U;
                  
                  gl_FragColor = vec4(R, G, B, 1.0);
                }
                `;
                const vertShader = gl.createShader(gl.VERTEX_SHADER);
                gl.shaderSource(vertShader, vs);
                gl.compileShader(vertShader);
                const fragShader = gl.createShader(gl.FRAGMENT_SHADER);
                gl.shaderSource(fragShader, fs);
                gl.compileShader(fragShader);

                const program = gl.createProgram();
                gl.attachShader(program, vertShader);
                gl.attachShader(program, fragShader);
                gl.deleteShader(vertShader);
                gl.deleteShader(fragShader);
                gl.linkProgram(program);
                gl.useProgram(program);

                const uniformYTexture = gl.getUniformLocation(
                  program,
                  "y_texture"
                );
                gl.uniform1i(uniformYTexture, 5);
                const uniformUVTexture = gl.getUniformLocation(
                  program,
                  "uv_texture"
                );
                gl.uniform1i(uniformUVTexture, 6);

                const dt = gl.getUniformLocation(program, "displayTransform");
                gl.useProgram(currentProgram);
                // 初始化 VAO
                const ext = gl.getExtension("OES_vertex_array_object");
                const currentVAO = gl.getParameter(gl.VERTEX_ARRAY_BINDING);
                const vao = ext.createVertexArrayOES();

                ext.bindVertexArrayOES(vao);

                const posAttr = gl.getAttribLocation(program, "a_position");
                const pos = gl.createBuffer();
                gl.bindBuffer(gl.ARRAY_BUFFER, pos);
                gl.bufferData(
                  gl.ARRAY_BUFFER,
                  new Float32Array([1, 1, -1, 1, 1, -1, -1, -1]),
                  gl.STATIC_DRAW
                );
                gl.vertexAttribPointer(posAttr, 2, gl.FLOAT, false, 0, 0);
                gl.enableVertexAttribArray(posAttr);
                vao.posBuffer = pos;

                const texcoordAttr = gl.getAttribLocation(
                  program,
                  "a_texCoord"
                );
                const texcoord = gl.createBuffer();
                gl.bindBuffer(gl.ARRAY_BUFFER, texcoord);
                gl.bufferData(
                  gl.ARRAY_BUFFER,
                  new Float32Array([1, 1, 0, 1, 1, 0, 0, 0]),
                  gl.STATIC_DRAW
                );
                gl.vertexAttribPointer(texcoordAttr, 2, gl.FLOAT, false, 0, 0);
                gl.enableVertexAttribArray(texcoordAttr);
                vao.texcoordBuffer = texcoord;

                ext.bindVertexArrayOES(currentVAO);

                if (yTexture && uvTexture) {
                  const currentProgram = gl.getParameter(gl.CURRENT_PROGRAM);
                  const currentActiveTexture = gl.getParameter(
                    gl.ACTIVE_TEXTURE
                  );
                  const currentVAO = gl.getParameter(gl.VERTEX_ARRAY_BINDING);

                  gl.useProgram(program);
                  ext.bindVertexArrayOES(vao);

                  // 传入调整矩阵
                  gl.uniformMatrix3fv(dt, false, displayTransform);
                  gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1);

                  // 传入 y 通道纹理
                  gl.activeTexture(gl.TEXTURE0 + 5);
                  const bindingTexture5 = gl.getParameter(
                    gl.TEXTURE_BINDING_2D
                  );
                  gl.bindTexture(gl.TEXTURE_2D, yTexture);

                  // 传入 uv 通道纹理
                  gl.activeTexture(gl.TEXTURE0 + 6);
                  const bindingTexture6 = gl.getParameter(
                    gl.TEXTURE_BINDING_2D
                  );
                  gl.bindTexture(gl.TEXTURE_2D, uvTexture);

                  gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4);

                  gl.bindTexture(gl.TEXTURE_2D, bindingTexture6);
                  gl.activeTexture(gl.TEXTURE0 + 5);
                  gl.bindTexture(gl.TEXTURE_2D, bindingTexture5);

                  gl.useProgram(currentProgram);
                  gl.activeTexture(currentActiveTexture);
                  ext.bindVertexArrayOES(currentVAO);
                }
              }
            }

            session.requestAnimationFrame(onFrame);
          };
          session.requestAnimationFrame(onFrame);

          session.on("addAnchors", () => {
            console.log("anchor add");
          });
          // 摄像头实时检测模式下,监测到文字时,updateAnchors 事件会连续触发 (每帧触发一次)
          session.on("updateAnchors", (anchors) => {
            throttleHandle(() => {
              setTextList(
                anchors.map((anchor) => {
                  console.log("--->206:", anchor);
                  return anchor.text;
                })
              );
            });
          });

          // 当文字区域从相机中离开时,会触发 removeAnchors 事件
          session.on("removeAnchors", () => {
            console.log("removeAnchors");
          });
        });
      });
  }, []);

实现效果:

image.png