uniapp 小程序 接入火山 语音转文字

133 阅读9分钟

火山api 中 java的代码转换来的 [www.volcengine.com/docs/6561/1…]


const b64ch = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';
const b64chs = [...b64ch];
const b64re = /^(?:[A-Za-z\d+\/]{4})*?(?:[A-Za-z\d+\/]{2}(?:==)?|[A-Za-z\d+\/]{3}=?)?$/;
const b64tab = ((a) => {
    let tab = {};
    a.forEach((c, i) => tab[c] = i);
    return tab;
})(b64chs);
const _fromCC = String.fromCharCode.bind(String);

/**
 * polyfill version of `btoa`
 */
function btoaPolyfill(bin) {
    // console.log('polyfilled');
    let u32, c0, c1, c2, asc = ''
    const pad = bin.length % 3
    for (let i = 0; i < bin.length;) {
        if ((c0 = bin.charCodeAt(i++)) > 255 ||
            (c1 = bin.charCodeAt(i++)) > 255 ||
            (c2 = bin.charCodeAt(i++)) > 255)
            throw new TypeError('invalid character found')
        u32 = (c0 << 16) | (c1 << 8) | c2
        asc += b64chs[u32 >> 18 & 63]
            + b64chs[u32 >> 12 & 63]
            + b64chs[u32 >> 6 & 63]
            + b64chs[u32 & 63]
    }
    return pad ? asc.slice(0, pad - 3) + "===".substring(pad) : asc
}

/**
 * polyfill version of `atob`
 */
function atobPolyfill(asc) {
    // console.log('polyfilled');
    asc = asc.replace(/\s+/g, '')
    if (!b64re.test(asc))
        throw new TypeError('malformed base64.')
    asc += '=='.slice(2 - (asc.length & 3))
    let u24, bin = '', r1, r2
    for (let i = 0; i < asc.length;) {
        u24 = b64tab[asc.charAt(i++)] << 18
            | b64tab[asc.charAt(i++)] << 12
            | (r1 = b64tab[asc.charAt(i++)]) << 6
            | (r2 = b64tab[asc.charAt(i++)])
        bin += r1 === 64 ? _fromCC(u24 >> 16 & 255)
            : r2 === 64 ? _fromCC(u24 >> 16 & 255, u24 >> 8 & 255)
                : _fromCC(u24 >> 16 & 255, u24 >> 8 & 255, u24 & 255)
    }
    return bin
}

// 协议常量定义
const PROTOCOL_VERSION = 0b0001;
const DEFAULT_HEADER_SIZE = 0b0001;

// 消息类型
const FULL_CLIENT_REQUEST = 0b0001;
const AUDIO_ONLY_REQUEST = 0b0010;
const FULL_SERVER_RESPONSE = 0b1001;
const SERVER_ACK = 0b1011;
const SERVER_ERROR_RESPONSE = 0b1111;

// 消息类型特定标志
const NO_SEQUENCE = 0b0000; // 无序列检查
const POS_SEQUENCE = 0b0001;
const NEG_SEQUENCE = 0b0010;
const NEG_WITH_SEQUENCE = 0b0011;
const NEG_SEQUENCE_1 = 0b0011;

// 消息序列化
const NO_SERIALIZATION = 0b0000;
const JSON_SERIALIZATION = 0b0001;

// 消息压缩
const NO_COMPRESSION = 0b0000;
const GZIP = 0b0001;

/**
 * 语音识别WebSocket客户端
 * 基于字节跳动大模型流式语音识别API
 */
class AsrWsClient {
    constructor(options = {}) {
        this.url = options.url || 'wss://openspeech.bytedance.com/api/v3/sauc/bigmodel';
        this.appId = options.appId || '';
        this.token = options.token || '';
        this.resourceId = options.resourceId || 'volc.bigasr.sauc.duration';

        this.socket = null;
        this.seq = 0;
        this.buffer = null;
        this.bufferSize = 0;

        // 回调函数
        this.onOpen = options.onOpen || (() => { });
        this.onMessage = options.onMessage || (() => { });
        this.onError = options.onError || (() => { });
        this.onClose = options.onClose || (() => { });

        // 音频配置
        this.audioConfig = options.audioConfig || {
            format: 'wav',
            sampleRate: 16000,
            bits: 16,
            channel: 1,
            codec: 'raw'
        };
    }

    /**
     * 生成协议头
     * @param {number} messageType 消息类型
     * @param {number} messageTypeSpecificFlags 消息类型特定标志
     * @param {number} serialMethod 序列化方法
     * @param {number} compressionType 压缩类型
     * @param {number} reservedData 保留数据
     * @returns {Uint8Array} 头部字节数组
     */
    getHeader(messageType, messageTypeSpecificFlags, serialMethod, compressionType, reservedData = 0) {
        const header = new Uint8Array(4);
        header[0] = (PROTOCOL_VERSION << 4) | DEFAULT_HEADER_SIZE; // 协议版本|头部大小
        header[1] = (messageType << 4) | messageTypeSpecificFlags; // 消息类型|消息类型特定标志
        header[2] = (serialMethod << 4) | compressionType; // 序列化方法|压缩类型
        header[3] = reservedData; // 保留数据
        return header;
    }

    /**
     * 整数转字节数组
     * @param {number} num 整数
     * @returns {Uint8Array} 字节数组
     */
    intToBytes(num) {
        return new Uint8Array([
            (num >> 24) & 0xFF,
            (num >> 16) & 0xFF,
            (num >> 8) & 0xFF,
            num & 0xFF
        ]);
    }

    /**
     * 字节数组转整数
     * @param {Uint8Array} bytes 字节数组
     * @returns {number} 整数
     */
    bytesToInt(bytes) {
        if (!bytes || bytes.length !== 4) {
            throw new Error('Invalid byte array');
        }
        return ((bytes[0] & 0xFF) << 24) |
            ((bytes[1] & 0xFF) << 16) |
            ((bytes[2] & 0xFF) << 8) |
            (bytes[3] & 0xFF);
    }

    /**
     * 生成序列号部分
     * @param {number} seq 序列号
     * @returns {Uint8Array} 序列号字节数组
     */
    generateBeforePayload(seq) {
        return this.intToBytes(seq);
    }

    /**
     * 字符串转ArrayBuffer
     * @param {string} str 字符串
     * @returns {ArrayBuffer} ArrayBuffer
     */
    stringToArrayBuffer(str) {
        try {
            // 优先使用 TextEncoder
            if (typeof TextEncoder !== 'undefined') {
                return new TextEncoder().encode(str).buffer;
            }

            // 使用 Uint8Array.from 和 encodeURIComponent 处理Unicode字符
            const escstr = encodeURIComponent(str);
            const binstr = escstr.replace(/%([0-9A-F]{2})/g, (match, p1) => {
                return String.fromCharCode('0x' + p1);
            });
            const buf = new Uint8Array(binstr.length);
            for (let i = 0; i < binstr.length; i++) {
                buf[i] = binstr.charCodeAt(i);
            }
            return buf.buffer;
        } catch (e) {
            console.error('stringToArrayBuffer error:', e);
            // 最终降级方案
            const buf = new ArrayBuffer(str.length * 3); // 为中文预留更多空间
            const bufView = new Uint8Array(buf);
            let pos = 0;
            for (let i = 0; i < str.length; i++) {
                const code = str.charCodeAt(i);
                if (code < 128) {
                    bufView[pos++] = code;
                } else if (code < 2048) {
                    bufView[pos++] = (code >> 6) | 192;
                    bufView[pos++] = (code & 63) | 128;
                } else {
                    bufView[pos++] = (code >> 12) | 224;
                    bufView[pos++] = ((code >> 6) & 63) | 128;
                    bufView[pos++] = (code & 63) | 128;
                }
            }
            return buf.slice(0, pos);
        }
    }

    /**
     * ArrayBuffer转字符串
     * @param {ArrayBuffer} buffer ArrayBuffer
     * @returns {string} 字符串
     */
    arrayBufferToString(buffer) {
        try {
            // 优先使用 TextDecoder
            if (typeof TextDecoder !== 'undefined') {
                return new TextDecoder('utf-8').decode(buffer instanceof Uint8Array ? buffer : new Uint8Array(buffer));
            }

            // 使用 decodeURIComponent 和 escape 处理Unicode字符
            const bytes = new Uint8Array(buffer);
            let binstr = '';
            for (let i = 0; i < bytes.length; i++) {
                binstr += String.fromCharCode(bytes[i]);
            }
            // 使用 decodeURIComponent 和 escape 处理Unicode字符
            return decodeURIComponent(escape(binstr));
        } catch (e) {
            console.error('arrayBufferToString error:', e);
            // 最终降级方案 - UTF-8解码
            const bytes = new Uint8Array(buffer);
            let result = '';
            let i = 0;
            while (i < bytes.length) {
                if (bytes[i] < 128) {
                    result += String.fromCharCode(bytes[i]);
                    i++;
                } else if (bytes[i] > 191 && bytes[i] < 224) {
                    result += String.fromCharCode(((bytes[i] & 31) << 6) | (bytes[i + 1] & 63));
                    i += 2;
                } else {
                    result += String.fromCharCode(((bytes[i] & 15) << 12) | ((bytes[i + 1] & 63) << 6) | (bytes[i + 2] & 63));
                    i += 3;
                }
            }
            return result;
        }
    }

    /**
     * GZIP压缩
     * @param {Uint8Array} data 数据
     * @param {number} length 长度
     * @returns {Uint8Array} 压缩后的数据
     */
    gzipCompress(data, length = data.length) {
        // 注意:JavaScript中需要使用pako库进行GZIP压缩
        try {
            // 如果使用pako库,可以这样实现:
            // return pako.gzip(data.slice(0, length));

            // 由于微信小程序环境可能无法直接使用pako,这里提供一个替代方案
            if (typeof wx !== 'undefined' && wx.compressImage) {
                // 微信小程序环境
                console.warn('GZIP compression not fully implemented in wx environment');
                return data; // 实际使用时需要替换为真正的压缩实现
            } else if (typeof uni !== 'undefined' && uni.compressImage) {
                // uni-app环境
                console.warn('GZIP compression not fully implemented in uni-app environment');
                return data; // 实际使用时需要替换为真正的压缩实现
            } else {
                // 浏览器或Node.js环境
                console.warn('GZIP compression not implemented, returning original data');
                return data; // 实际使用时需要替换为真正的压缩实现
            }
        } catch (e) {
            console.error('Compression error:', e);
            return data;
        }
    }

    /**
     * GZIP解压缩
     * @param {Uint8Array} data 压缩数据
     * @returns {Uint8Array} 解压后的数据
     */
    gzipDecompress(data) {
        if (!data || data.length === 0) {
            return new Uint8Array(0);
        }

        try {
            // 如果使用pako库,可以这样实现:
            // return pako.ungzip(data);

            // 由于微信小程序环境可能无法直接使用pako,这里提供一个替代方案
            if (typeof wx !== 'undefined' && wx.uncompress) {
                // 微信小程序环境
                return wx.uncompress(data); // 微信小程序的解压方法
            } else if (typeof uni !== 'undefined' && uni.uncompress) {
                // uni-app环境
                console.warn('GZIP decompression not fully implemented in uni-app environment');
                return data; // 实际使用时需要替换为真正的解压实现
            } else {
                // 浏览器或Node.js环境
                console.warn('GZIP decompression not implemented, returning original data');
                return data; // 实际使用时需要替换为真正的解压实现
            }
        } catch (e) {
            console.error('Decompression error:', e);
            return data;
        }
    }

    /**
     * 解析服务器响应
     * @param {ArrayBuffer} response 响应数据
     * @returns {number} 序列号
     */
    parseResponse(response) {
        const res = new Uint8Array(response);
        if (!res || res.length === 0) {
            return -1;
        }

        const num = 0b00001111;
        const result = {};

        // 解析头部 (4字节)
        result.protocol_version = (res[0] >> 4) & num;
        result.header_size = res[0] & 0x0f;

        result.message_type = (res[1] >> 4) & num;
        result.message_type_specific_flags = res[1] & 0x0f;

        result.serialization_method = (res[2] >> 4) & num;
        result.message_compression = res[2] & 0x0f;

        result.reserved = res[3];

        // 解析序列号 (4字节)
        const seqBytes = res.slice(4, 8);
        const sequence = this.bytesToInt(seqBytes);

        // 解析payload大小 (4字节)
        const sizeBytes = res.slice(8, 12);
        const payloadSize = this.bytesToInt(sizeBytes);

        // 解析payload
        const payload = res.slice(12);
        let payloadStr = null;

        // 根据消息类型处理响应
        if (result.message_type === FULL_SERVER_RESPONSE) {
            if (result.message_compression === GZIP) {
                const decompressed = this.gzipDecompress(payload);
                payloadStr = this.arrayBufferToString(decompressed.buffer || decompressed);
            } else {
                payloadStr = this.arrayBufferToString(payload.buffer || payload);
            }
            console.log('===>payload:', payloadStr);
            result.payload_size = payloadSize;
            result.payload = payloadStr;

            // 触发消息回调
            this.onMessage(result);

        } else if (result.message_type === SERVER_ACK) {
            payloadStr = this.arrayBufferToString(payload.buffer || payload);
            console.log('===>payload:', payloadStr);
            result.payload_size = payloadSize;
            result.payload = payloadStr;

            // 触发消息回调
            this.onMessage(result);

        } else if (result.message_type === SERVER_ERROR_RESPONSE) {
            // 此时sequence含义是错误码code,payload是error msg
            payloadStr = this.arrayBufferToString(payload.buffer || payload);
            result.code = sequence;
            result.error_msg = payloadStr;

            // 触发错误回调
            this.onError(result);
        }

        return sequence;
    }

    /**
     * 开始语音识别
     * @param {Object} options 选项
     * @returns {Promise} Promise对象
     */
    startRecognition(options = {}) {
        return new Promise((resolve, reject) => {
            // 合并音频配置
            this.audioConfig = { ...this.audioConfig, ...options.audioConfig };

            // 创建WebSocket连接
            const connectId = this.generateRequestId();

            // 构建请求头
            const headers = {
                'X-Api-App-Key': this.appId,
                'X-Api-Access-Key': this.token,
                'X-Api-Resource-Id': this.resourceId,
                'X-Api-Connect-Id': connectId
            };

            // 在微信小程序环境中
            if (typeof wx !== 'undefined' && wx.connectSocket) {
                this.socket = wx.connectSocket({
                    url: this.url,
                    header: headers,
                    success: () => {
                        console.log('WebSocket连接已建立');
                    },
                    fail: (err) => {
                        console.error('WebSocket连接失败:', err);
                        reject(err);
                    }
                });

                // 监听WebSocket事件
                this.socket.onOpen(() => {
                    this._sendInitialRequest();
                    resolve(this.socket);
                    this.onOpen();
                });

                this.socket.onMessage((res) => {
                    console.log(res.data, '>>>>>');
                    const sequence = this.parseResponse(res.data);
                    const isLastPackage = sequence < 0;

                    if (isLastPackage) {
                        console.log('===>识别完成');
                        this.close();
                        return;
                    }
                });

                this.socket.onError((err) => {
                    console.error('WebSocket错误:', err);
                    this.onError(err);
                    reject(err);
                });

                this.socket.onClose((res) => {
                    console.log('WebSocket已关闭:', res);
                    this.onClose(res);
                });
            } else if (typeof uni !== 'undefined' && uni.connectSocket) {
                // 在uni-app环境中
                this.socket = uni.connectSocket({
                    url: this.url,
                    header: headers,
                    success: () => {
                        console.log('WebSocket连接已建立');
                    },
                    fail: (err) => {
                        console.error('WebSocket连接失败:', err);
                        reject(err);
                    }
                });

                // 监听WebSocket事件
                uni.onSocketOpen(() => {
                    this._sendInitialRequest();
                    resolve(this.socket);
                    this.onOpen();
                });

                uni.onSocketMessage((res) => {
                    const sequence = this.parseResponse(res.data);
                    const isLastPackage = sequence < 0;

                    if (isLastPackage) {
                        console.log('===>识别完成');
                        this.close();
                        return;
                    }
                });

                uni.onSocketError((err) => {
                    console.error('WebSocket错误:', err);
                    this.onError(err);
                    reject(err);
                });

                uni.onSocketClose((res) => {
                    console.log('WebSocket已关闭:', res);
                    this.onClose(res);
                });
            } else {
                // 在浏览器环境中
                try {
                    this.socket = new WebSocket(this.url);

                    // 添加请求头
                    Object.keys(headers).forEach(key => {
                        this.socket.setRequestHeader && this.socket.setRequestHeader(key, headers[key]);
                    });

                    this.socket.binaryType = 'arraybuffer';

                    this.socket.onopen = () => {
                        this._sendInitialRequest();
                        resolve(this.socket);
                        this.onOpen();
                    };

                    this.socket.onmessage = (event) => {
                        const sequence = this.parseResponse(event.data);
                        const isLastPackage = sequence < 0;

                        if (isLastPackage) {
                            console.log('===>识别完成');
                            this.close();
                            return;
                        }
                    };

                    this.socket.onerror = (err) => {
                        console.error('WebSocket错误:', err);
                        this.onError(err);
                        reject(err);
                    };

                    this.socket.onclose = (event) => {
                        console.log('WebSocket已关闭:', event);
                        this.onClose(event);
                    };
                } catch (err) {
                    console.error('创建WebSocket失败:', err);
                    reject(err);
                }
            }
        });
    }

    /**
     * 发送初始请求
     * @private
     */
    _sendInitialRequest() {
        // 构建payload
        const payload = {
            user: {
                uid: 'test'
            },
            audio: this.audioConfig,
            request: {
                model_name: 'bigmodel',
                enable_punc: true
            }
        };

        const payloadStr = JSON.stringify(payload);
        console.log(payloadStr);

        // 不使用压缩,直接转换为字节数组
        const payloadBytes = new Uint8Array(this.stringToArrayBuffer(payloadStr));

        // 组装fullClientRequest: header + sequence + payload size + payload

        const header = this.getHeader(FULL_CLIENT_REQUEST, POS_SEQUENCE, JSON_SERIALIZATION, NO_COMPRESSION, 0);
        const payloadSize = this.intToBytes(payloadBytes.length);

        this.seq = 1;
        const seqBytes = this.generateBeforePayload(this.seq);

        // 合并所有部分
        const fullClientRequest = new Uint8Array(header.length + seqBytes.length + payloadSize.length + payloadBytes.length);
        let destPos = 0;

        fullClientRequest.set(header, destPos);
        destPos += header.length;

        fullClientRequest.set(seqBytes, destPos);
        destPos += seqBytes.length;

        fullClientRequest.set(payloadSize, destPos);
        destPos += payloadSize.length;

        fullClientRequest.set(payloadBytes, destPos);

        // 发送请求
        this._sendBinaryData(fullClientRequest.buffer);
    }

    /**
     * 发送音频数据
     * @param {ArrayBuffer} audioData 音频数据
     * @param {boolean} isLast 是否为最后一段数据
     * @returns {boolean} 是否发送成功
     */
    sendAudioData(audioData, isLast = false) {
        if (!this.socket) {
            console.error('WebSocket未连接');
            return false;
        }

        this.seq++;
        console.log('seq:', this.seq);

        let seq = this.seq;
        if (isLast) {
            seq = -seq;
        }

        const messageTypeSpecificFlags = isLast ? NEG_WITH_SEQUENCE : POS_SEQUENCE;

        // 构建header
        const header = this.getHeader(AUDIO_ONLY_REQUEST, messageTypeSpecificFlags, JSON_SERIALIZATION, NO_COMPRESSION, 0);

        // 构建sequence
        const sequenceBytes = this.generateBeforePayload(seq);

        // 
        const payloadBytes = new Uint8Array(audioData);

        // 构建payload size
        const payloadSize = this.intToBytes(payloadBytes.length);

        // 合并所有部分
        const audioOnlyRequest = new Uint8Array(header.length + sequenceBytes.length + payloadSize.length + payloadBytes.length);
        let destPos = 0;

        audioOnlyRequest.set(header, destPos);
        destPos += header.length;

        audioOnlyRequest.set(sequenceBytes, destPos);
        destPos += sequenceBytes.length;

        audioOnlyRequest.set(payloadSize, destPos);
        destPos += payloadSize.length;

        audioOnlyRequest.set(payloadBytes, destPos);

        // 发送请求
        return this._sendBinaryData(audioOnlyRequest.buffer);
    }

    /**
     * 发送二进制数据
     * @param {ArrayBuffer} data 二进制数据
     * @returns {boolean} 是否发送成功
     * @private
     */
    _sendBinaryData(data) {
        if (!this.socket) {
            console.error('WebSocket未连接');
            return false;
        }

        try {
            if (typeof wx !== 'undefined' && this.socket.send) {
                // 微信小程序环境
                this.socket.send({
                    data,
                    success: () => {
                        console.log('发送成功');
                        return true;
                    },
                    fail: (err) => {
                        console.error('发送失败:', err);
                        return false;
                    }
                });
            } else if (typeof uni !== 'undefined' && uni.sendSocketMessage) {
                // uni-app环境
                uni.sendSocketMessage({
                    data,
                    success: () => {
                        console.log('发送成功');
                        return true;
                    },
                    fail: (err) => {
                        console.error('发送失败:', err);
                        return false;
                    }
                });
            } else {
                // 浏览器环境
                this.socket.send(data);
            }
            return true;
        } catch (err) {
            console.error('发送数据失败:', err);
            return false;
        }
    }

    /**
     * 关闭WebSocket连接
     */
    close() {
        if (this.socket) {
            if (typeof wx !== 'undefined' && this.socket.close) {
                // 微信小程序环境
                this.socket.close({
                    code: 1000,
                    reason: 'finished',
                    success: () => {
                        console.log('WebSocket已关闭');
                    },
                    fail: (err) => {
                        console.error('关闭WebSocket失败:', err);
                    }
                });
            } else if (typeof uni !== 'undefined' && uni.closeSocket) {
                // uni-app环境
                uni.closeSocket({
                    code: 1000,
                    reason: 'finished',
                    success: () => {
                        console.log('WebSocket已关闭');
                    },
                    fail: (err) => {
                        console.error('关闭WebSocket失败:', err);
                    }
                });
            } else {
                // 浏览器环境
                this.socket.close(1000, 'finished');
            }
            this.socket = null;
        }
    }

    /**
     * 生成请求ID
     * @returns {string} 请求ID
     */
    generateRequestId() {
        return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function (c) {
            const r = Math.random() * 16 | 0;
            const v = c === 'x' ? r : (r & 0x3 | 0x8);
            return v.toString(16);
        });
    }

    /**
     * 计算分片大小
     * @param {number} frameLength 帧长度
     * @param {number} sampleSizeInBits 采样位数
     * @param {number} channels 通道数
     * @returns {number} 分片大小
     */
    calculateChunkSize(frameLength, sampleSizeInBits, channels) {
        // 一次性传输的帧数可视内存及网络承载能力决定
        const frames = Math.min(frameLength, frameLength / 10); // 切成10段
        return (sampleSizeInBits / 8) * channels * frames;
    }

    /**
     * 合并ArrayBuffer
     * @param {ArrayBuffer[]} buffers ArrayBuffer数组
     * @returns {ArrayBuffer} 合并后的ArrayBuffer
     */
    mergeArrayBuffers(buffers) {
        // 计算总长度
        let totalLength = 0;
        buffers.forEach(buffer => {
            totalLength += buffer.byteLength;
        });

        // 创建新的ArrayBuffer
        const result = new Uint8Array(totalLength);
        let offset = 0;

        // 复制数据
        buffers.forEach(buffer => {
            result.set(new Uint8Array(buffer), offset);
            offset += buffer.byteLength;
        });

        return result.buffer;
    }
}

// 导出AsrWsClient类
export default AsrWsClient;