现象更新: 我在手机的浏览器上试验了一下,也会有这个问题。所以这看起来是讯飞SDK的问题?
微信小程序webview模式下使用讯飞流式语音识别,发现有噪音效果很差我们的产品使用了微信小程序的webview的模式。 在接入讯飞的流式语音服务(https://www.xfyun.cn/doc/asr/voicedictation/API.htm)时,语音识别精度普遍不如预期。尤其是在语音输入较长时,识别精度的下降更为明显。这一点在其他用户中也会出现。进一步分析发现,发送给讯飞的原始pcm格式的音频其实有噪音,这可能是识别效果差的原因,但是噪音是如何来的不得而知。因为同样的代码在谷歌浏览器上就很正常, 如果有任何其他需要提供的信息,随时联系我,希望能够得到你们的帮助。 问题音频:https://lunar-fireplant-d5c.notion.site/80b77725a7c64ef98c55c2e19ed8224a 核心代码: function mergeArrayBuffers(arrayBuffers: ArrayBuffer[]) { const totalLength = arrayBuffers.reduce( (acc, buffer) => acc + buffer.byteLength, 0, ); const mergedBuffer = new Uint8Array(totalLength); let offset = 0; arrayBuffers.forEach((buffer) => { mergedBuffer.set(new Uint8Array(buffer), offset); offset += buffer.byteLength; }); return mergedBuffer.buffer; } function toBase64(buffer: ArrayBuffer) { const bytes = new Uint8Array(buffer); const len = bytes.byteLength; let binary = ''; for (let i = 0; i < len; i++) { binary += String.fromCharCode(bytes[i]); } return window.btoa(binary); } export function startRecorder(options: RecorderOptions) { function parseResult(resultData: string) { const jsonData = JSON.parse(resultData); if (jsonData.data && jsonData.data.result) { const data = jsonData.data.result; const ws = data.ws; let str = ''; for (let i = 0; i < ws.length; i++) { str = str + ws[i].cw[0].w; } // 开启wpgs会有此字段(前提:在控制台开通动态修正功能) // 取值为 "apd"时表示该片结果是追加到前面的最终结果;取值为"rpl" 时表示替换前面的部分结果,替换范围为rg字段 if (data.pgs) { if (data.pgs === 'apd') { // 将resultTextTemp同步给resultText resultText = resultTextTemp; } // 将结果存储在resultTextTemp中 resultTextTemp = resultText + str; } else { resultText = resultText + str; } options.onResult?.(resultTextTemp || resultText || ''); } if (jsonData.code === 0 && jsonData.data.status === 2) { ws?.close(); } if (jsonData.code !== 0) { ws?.close(); } } function initWS() { ws = new WebSocket(options.url); ws.onopen = () => { ws!.send( JSON.stringify({ common: { app_id: APPID, }, business: { language: 'en_us', // en_us zh_cn domain: 'iat', accent: 'mandarin', vad_eos: 5000, dwa: 'wpgs', }, data: { status: 0, format: 'audio/L16;rate=16000', encoding: 'raw', }, }), ); }; ws.onerror = (err) => { ignoreStopCallback = true; options.onError(err); }; ws.onmessage = (evt) => { parseResult(evt.data); }; ws.onclose = () => { stopRecorder(); status = 'CLOSED'; console.log({ blobParts }); if (!ignoreStopCallback) { options.onStop( new Blob(blobParts, { type: 'audio/ogg; codecs=opus', }), ); } }; } function stopRecorder() { if (stopped) { return; } clearTimeout(timer); stopped = true; recorder.stop(); if (!ws) { options.onCancel?.(); } } const timer = setTimeout(() => { stopRecorder(); options.onTimeout?.(); }, 60000); const startTime = Date.now(); let blobParts: BlobPart[] = []; let queueFrames: Frame[] | null = []; let ws: WebSocket | null = null; let stopped = false; let resultText = ''; let resultTextTemp = ''; let status = 'CONNECTING'; let ignoreStopCallback = false; recorder.onStart = () => { status = 'OPEN'; options.onStart?.(); }; recorder.onStop = (audioBuffers: ArrayBuffer[]) => { blobParts = audioBuffers; clearTimeout(timer); if (options.minDuration && Date.now() - startTime < options.minDuration) { ignoreStopCallback = true; options.onTimeShortage?.(); } // 因 recorder 实例是公共对象,需要手动清空内存 recorder.onStart = null; recorder.onStop = null; recorder.onFrameRecorded = null; }; recorder.onFrameRecorded = (frame: Frame) => { if (queueFrames) { queueFrames.push(frame); } if (ws?.readyState === WebSocket.OPEN) { const frames = queueFrames || [frame]; if (queueFrames) { queueFrames = null; } for (const item of frames) { ws.send( JSON.stringify({ data: { status: item.isLastFrame ? 2 : 1, format: 'audio/L16;rate=16000', encoding: 'raw', audio: toBase64(item.frameBuffer), }, }), ); } if (frame.isLastFrame) { status = 'CLOSING'; } } }; recorder .start({ sampleRate: 16000, frameSize: 1280 }) .then(() => { if (stopped) { return; } // 如果从请求到响应的时间间隔超过 300ms 就认为是询问后授权, // 因为询问时,系统弹出授权框会中断页面交互,增加这个字段可以针对性做逻辑处理 // ⚠️ 注意这个只是用响应延迟时间来判断,所以并不能保证 100% 正确 const isPrompt = Date.now() - startTime > 300; if (isPrompt && options.cancelOnPrompt) { ignoreStopCallback = true; stopRecorder(); return options.onCancel?.(); } initWS(); }) .catch((err: any) => { if (stopped) { return; } clearTimeout(timer); options.onDenied?.(err); }); return stopRecorder; }
2023-11-09