/// <summary> /// 提取识别结果 /// </summary> /// <param name="result">识别结果结构体</param> private void GetAsrResult(ASR_RECOG_RESULT result) { int sizeOfItem = Marshal.SizeOf(typeof(ASR_RECOG_RESULT_ITEM)); for (int i = 0; i < (int)result.uiResultItemCount; ++i) { ASR_RECOG_RESULT_ITEM item = (ASR_RECOG_RESULT_ITEM)Marshal.PtrToStructure((IntPtr)(result.psResultItemList.ToInt32() + i * sizeOfItem), typeof(ASR_RECOG_RESULT_ITEM)); int len = FindNullIndex(item.pszResult, 100); byte[] data = new byte[len]; Marshal.Copy(item.pszResult, data, 0, len); _sbResult.Append(Encoding.UTF8.GetString(data)); } }
public static extern int hci_asr_free_recog_result(ref ASR_RECOG_RESULT recogResult);
public static extern int hci_asr_recog(int sessionId, byte[] data, uint dataLen, string congfig, string grammarData, ref ASR_RECOG_RESULT recogResult);
/// <summary> /// 语音识别 /// </summary> /// <param name="audioData">音频数据</param> /// <param name="sessionId">会话ID</param> /// <returns>true-成功;false-失败</returns> private bool Recog(byte[] audioData, int sessionId) { int errCode = 0; bool success = false; int perLen = 6400; // 0.2s int len = 0; // 已传入的长度 ASR_RECOG_RESULT result = new ASR_RECOG_RESULT(); // 实时识别过程模拟,将待识别的音频数据分为多段,逐段传入识别接口 // 若某次传输音频数据检测到末端,则跳出循环,终止音频数据传入,以准备获取识别结果 while (len < audioData.Length) { // 本次要传入的参与识别的数据长度 // 剩余的多余 perLen 则传入 perLen 个字节,若不足则传入剩余数据 int currLen = 0; if (audioData.Length - len >= perLen) { currLen = perLen; } else { currLen = audioData.Length - len; } byte[] currData = new byte[currLen]; Buffer.BlockCopy(audioData, len, currData, 0, currLen); errCode = hci_api.hci_asr_recog(sessionId, currData, (uint)currLen, null, null, ref result); if (result.uiResultItemCount > 0) { GetAsrResult(result); hci_api.hci_asr_free_recog_result(ref result); } if (errCode == (int)HCI_ERR_CODE.HCI_ERR_ASR_REALTIME_END) { errCode = hci_api.hci_asr_recog(sessionId, null, 0, null, null, ref result); if (errCode == (int)HCI_ERR_CODE.HCI_ERR_NONE) { GetAsrResult(result); hci_api.hci_asr_free_recog_result(ref result); } else { _errMsg = string.Format("hci_asr_recog return {0}:{1}", errCode, hci_api.hci_get_error_info(errCode)); success = false; break; } } else if (errCode == (int)HCI_ERR_CODE.HCI_ERR_ASR_REALTIME_WAITING || errCode == (int)HCI_ERR_CODE.HCI_ERR_ASR_REALTIME_NO_VOICE_INPUT) { // 在连续识别的场景,忽略这两个情况,继续识别后面的音频 len += currLen; } else { // 识别失败 _errMsg = string.Format("hci_asr_recog return {0}:{1}", errCode, hci_api.hci_get_error_info(errCode)); success = false; break; } // TODO:需测试是否需要 Sleep() 方法以提高识别速度 // Thread.Sleep(200); // 模拟真实说话人语速,发送200ms数据后需等待200ms } // 若检测到端点,但数据已经传入完毕,则需要告诉引擎数据输入完毕 // 或者检测到末端了,也需要告诉引擎,获取结果 if (errCode == (int)HCI_ERR_CODE.HCI_ERR_ASR_REALTIME_WAITING || errCode == (int)HCI_ERR_CODE.HCI_ERR_ASR_REALTIME_END) { errCode = hci_api.hci_asr_recog(sessionId, null, 0, null, null, ref result); if (errCode == (int)HCI_ERR_CODE.HCI_ERR_NONE) { success = true; GetAsrResult(result); hci_api.hci_asr_free_recog_result(ref result); } else { success = false; _errMsg = string.Format("hci_asr_recog return {0}:{1}", errCode, hci_api.hci_get_error_info(errCode)); } } return(success); }