예제 #1
0
        private void Recognized(object sender, SpeechRecognitionEventArgs e)
        {
            Console.WriteLine();
            var test = GetResponse(e.Result.Properties);

            SpeechResult.Add(test);
        }
예제 #2
0
        public static void main(string[] args)
        {
            [email protected]("Loading models...");
            Configuration configuration = new Configuration();

            configuration.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
            configuration.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
            Context context = new Context(configuration);

            context.setLocalProperty("decoder->searchManager", "allphoneSearchManager");
            Recognizer  recognizer       = (Recognizer)context.getInstance(ClassLiteral <Recognizer> .Value);
            InputStream resourceAsStream = ClassLiteral <AllphoneDemo> .Value.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");

            resourceAsStream.skip((long)((ulong)44));
            recognizer.allocate();
            context.setSpeechSource(resourceAsStream, TimeFrame.INFINITE);
            Result result;

            while ((result = recognizer.recognize()) != null)
            {
                SpeechResult speechResult = new SpeechResult(result);
                [email protected]("Hypothesis: %s\n", new object[]
                {
                    speechResult.getHypothesis()
                });
                [email protected]("List of recognized words and their times:");
                Iterator iterator = speechResult.getWords().iterator();
                while (iterator.hasNext())
                {
                    WordResult wordResult = (WordResult)iterator.next();
                    [email protected](wordResult);
                }
            }
            recognizer.deallocate();
        }
예제 #3
0
 /// <summary>
 /// Adds the next recognition result received from the speech recognition service.
 /// </summary>
 /// <param name="recognitionResult">The recognition result to append.</param>
 /// <remarks>
 /// The recognition results are accumulated as they are received from the speech service, and
 /// may be used at a later time to build the partial and final speech recognition results.
 /// </remarks>
 internal void AppendResult(SpeechResult recognitionResult)
 {
     if ((recognitionResult is PartialRecognitionResult || (recognitionResult is RecognitionResult result && result.RecognitionStatus == RecognitionStatus.Success)) &&
         this.recognitionResults[this.recognitionResults.Count - 1] is PartialRecognitionResult)
     {
         // replace the immediately preceding partial result
         this.recognitionResults[this.recognitionResults.Count - 1] = recognitionResult;
     }
예제 #4
0
        private async void SpeechExecute()
        {
            SpeechResult result = await this.speechService.RecognizeAsync(null, null);

            if (result.IsSuccess)
            {
                var parameter = this.SelectedFolder.GetTaskCreationParameters();
                parameter.Title = result.Text;
                this.NavigationService.FlyoutTo(ViewLocator.CreateEditTaskPageNew, parameter);
            }
        }
예제 #5
0
파일: Recognition.cs 프로젝트: Ouay/MVP
        private void recognize()
        {
            waveInStream.StopRecording();
            mem.Position = 0;
            _recognizer.StartRecognition(mem, new TimeFrame(mem.Length));
            SpeechResult result = _recognizer.GetResult();

            _recognizer.StopRecognition();
            Console.WriteLine("result: " + result.GetHypothesis());
            Console.ReadKey();
            Console.ReadKey();
        }
예제 #6
0
        async void OnRecordReleased(object sender, EventArgs e)
        {
            DependencyService.Get <IAudioRecorder>().Stop();

            btnRecord.Text = "正识别中";
            //使用百度API进行语音识别
            //OutputText.Text = await ToTextByBaidu();
            SpeechResult result = await mmx.Speech.Asr(filepath);

            OutputText.Text = result.text;
            btnRecord.Text  = "录音识别";
        }
예제 #7
0
 /// <summary>
 /// Function that is called when the Watson API returns a result.
 /// </summary>
 /// <param name="results">List of speech-to-text results</param>
 protected void OnSpeechToTextResult(SpeechResultList results)
 {
     if (results.HasResult())
     {
         SpeechResult watsonResult = results.Results[0];
         var          textResult   = m_WatsonSpeechToTextComponent.CreateSpeechToTextResult(watsonResult);
         if (m_OnTextResult != null)
         {
             m_OnTextResult(textResult);
         }
         m_LastResult = textResult;
     }
 }
예제 #8
0
        /// <summary>
        /// 语音识别,语音转文本
        /// </summary>
        public static async Task <SpeechResult> Asr(string filepath)
        {
            SpeechResult result = new SpeechResult();

            if (File.Exists(filepath))
            {
                //var APP_ID = "14965195";
                //var API_KEY = "R2qXXgwr9xKtge3kxU5U7up2";
                //var SECRET_KEY = "Gnm2KhHcgZEDDLwy0Qtl66y4fFc8FmTj";
                var client = new Baidu.Aip.Speech.Asr(APP_ID, API_KEY, SECRET_KEY);
                client.Timeout = 60000;  // 修改超时时间

                //读取文件
                //string rootPath = Directory.GetCurrentDirectory();
                var data = File.ReadAllBytes(filepath);

                //识别语种,英文1737;
                Dictionary <string, object> op = new Dictionary <string, object>();
                op["dev_pid"] = 1737;

                //client.Timeout = 120000; // 若语音较长,建议设置更大的超时时间. ms

                var res = Task.Run(() =>
                {
                    var result1 = client.Recognize(data, "amr", 16000, op);

                    MResult mResult = JsonConvert.DeserializeObject <MResult>(result1.ToString());
                    if (mResult.err_no == 0)
                    {
                        //设置成功返回数据
                        result.status = 0;
                        result.text   = mResult.result[0].ToString();
                    }
                    else
                    {
                        //设置失败返回数据
                        result.status = 1;
                        result.error  = "语音错误:" + mResult.err_no.ToString();
                    }
                });
                await res;
            }
            else
            {
                //失败数据
                result.status = 1;
                result.error  = "语音错误:无语音";
            }

            return(result);
        }
예제 #9
0
 private void OnOutputReceived(object sender, DataReceivedEventArgs e)
 {
     try
     {
         lock (LOCK)
         {
             result = SpeechResult.Deserialize(e.Data);
             VRLog.Info("RECEIVED MESSAGE: " + e.Data);
         }
     }
     catch (Exception err)
     {
         VRLog.Error(err);
     }
 }
예제 #10
0
        /// <summary>
        /// Populates and returns a SpeechToTextResult object from a given Watson SpeechResult object.
        /// </summary>
        /// <param name="watsonResult">Watson SpeechResult object</param>
        /// <returns>A SpeechToTextResult object</returns>
        public SpeechToTextResult CreateSpeechToTextResult(SpeechResult watsonResult)
        {
            var textResult = new SpeechToTextResult();

            textResult.IsFinal          = watsonResult.Final;
            textResult.TextAlternatives = new TextAlternative[watsonResult.Alternatives.Length];
            for (int i = 0; i < textResult.TextAlternatives.Length; ++i)
            {
                SpeechAlt watsonAlternative = watsonResult.Alternatives[i];
                var       alternative       = new WatsonTextAlternative();
                alternative.Text                 = watsonAlternative.Transcript;
                alternative.Confidence           = (float)watsonAlternative.Confidence;
                alternative.TimeStamps           = watsonAlternative.Timestamps;
                alternative.WordConfidenceValues = watsonAlternative.WordConfidence;
                textResult.TextAlternatives[i]   = alternative;
            }
            return(textResult);
        }
예제 #11
0
        private async void QuickSpeechExecute()
        {
            SpeechResult result = await this.speechService.RecognizeAsync(null, null);

            if (result.IsSuccess)
            {
                var parameter = this.SelectedFolder.GetTaskCreationParameters();
                parameter.Title = result.Text;

                this.NavigationService.FlyoutTo(ViewLocator.CreateEditTaskPageNew, parameter);
            }
            else
            {
                await this.messageBoxService.ShowAsync(
                    StringResources.Message_Warning,
                    StringResources.Speech_ErrorDuringRecognitionFormat.TryFormat(result.Text));
            }
        }
예제 #12
0
        /// <summary>
        /// 语音合成,文本转语音
        /// </summary>
        /// <param name="text"></param>
        /// <param name="spd"></param>
        /// <param name="pit"></param>
        /// <returns></returns>
        public static async Task <SpeechResult> Tts(string text, int spd, int pit)
        {
            SpeechResult result = new SpeechResult();

            //var APP_ID = "14965195";
            //var API_KEY = "R2qXXgwr9xKtge3kxU5U7up2";
            //var SECRET_KEY = "Gnm2KhHcgZEDDLwy0Qtl66y4fFc8FmTj";
            var client = new Baidu.Aip.Speech.Tts(API_KEY, SECRET_KEY);

            client.Timeout = 60000;  // 修改超时时间

            // 可选参数
            var option = new Dictionary <string, object>()
            {
                { "spd", spd }, // 语速
                { "vol", 7 },   // 音量
                { "pit", pit }, // 语调
                { "per", 0 },   // 发音人,0为普通女声,1为普通男生,3为情感合成-度逍遥,4为情感合成-度丫丫,默认为普通女声
                { "aue", 3 }
            };

            var res = Task.Run(() =>
            {
                var res1 = client.Synthesis(text, option);


                if (res1.ErrorCode == 0)  // 或 result.Success
                {
                    //成功数据
                    result.status = 0;
                    result.speech = res1.Data;
                    //File.WriteAllBytes("合成的语音文件本地存储地址.mp3", result.Data);
                }
                else
                {
                    //失败数据
                    result.status = 1;
                    result.error  = "错误:" + res1.ErrorCode.ToString();
                }
            });
            await res;

            return(result);
        }
예제 #13
0
        private async Task SpeechExecute(Action <string> onSuccess, bool isNote)
        {
            SpeechResult result = await this.speechService.RecognizeAsync(null, null);

            if (result.IsSuccess && !string.IsNullOrWhiteSpace(result.Text))
            {
                onSuccess(result.Text);
                if (isNote && this.NoteSpeechRecognitionCompleted != null)
                {
                    this.NoteSpeechRecognitionCompleted(this, new EventArgs <string>(result.Text));
                }
            }
            else
            {
                await this.messageBoxService.ShowAsync(
                    StringResources.Message_Warning,
                    StringResources.Speech_ErrorDuringRecognitionFormat.TryFormat(result.Text));
            }
        }
예제 #14
0
        async void Record_OnClick()
        {
            try
            {
                var audioRecordingService = DependencyService.Get <IAudioRecorderService>();
                if (!isRecording)
                {
                    recording.IsVisible = true;
                    record.IsVisible    = false;
                    audioRecordingService.StartRecording();
                    //	IsProcessing = true;
                }
                else
                {
                    recording.IsVisible = false;
                    record.IsVisible    = true;
                    audioRecordingService.StopRecording();
                }

                isRecording = !isRecording;
                if (!isRecording)
                {
                    speechResult = await bingSpeechService.RecognizeSpeechAsync(Constants.AudioFilename);

                    if (!string.IsNullOrWhiteSpace(speechResult.DisplayText))
                    {
                        var result = new SpeechItem();
                        result.SpeechText = speechResult.DisplayText;
                        result.Speaker    = " - ";
                        ProcessResponse(result);
                    }
                }
            }
            catch (Exception ex)
            {
                return;
                //Debug.WriteLine(ex.Message);
            }
            finally
            {
            }
        }
예제 #15
0
        public void runFreeForm()
        {
            iSpeechRecognizer iSpeech = new iSpeechRecognizer(_api, _production);

            iSpeech.setFreeForm(iSpeechRecognizer.FREEFORM_DICTATION);

            StreamingBuffer streaming = iSpeech.startStreamingRecognize("audio/x-wav", this);

            uploadFile(_freeFormAudio, streaming);

            try
            {
                SpeechResult result = iSpeech.stopStreaming();
                Console.WriteLine("Text = " + result.Text + " Confidence = " + result.Confidence);
            }
            catch (Exception e)
            {
                Console.WriteLine("error " + e.ToString());
            }
        }
예제 #16
0
        public SpeechResult GetIntent(SpeechResult speech, string appId = "db72800d-c433-46fa-83e6-1e07648fa698", string key = "c9baf714974b4a7792324a7c4d3a916e")
        {
            string         html       = String.Empty;
            var            query      = HttpUtility.UrlEncode(speech.Lexical);
            string         requestUri = "https://api.projectoxford.ai/luis/v1/application?subscription-key=" + key + "&id=" + appId + "&q=" + query;
            HttpWebRequest request    = (HttpWebRequest)WebRequest.Create(requestUri);

            request.AutomaticDecompression = DecompressionMethods.GZip;

            using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                using (Stream stream = response.GetResponseStream())
                    using (StreamReader reader = new StreamReader(stream))
                    {
                        html = reader.ReadToEnd();
                    }

            var intentResult = JsonConvert.DeserializeObject <IntentResult>(html);

            speech.IntentResult = intentResult;

            return(speech);
        }
예제 #17
0
        public void runList()
        {
            iSpeechRecognizer iSpeech = new iSpeechRecognizer(_api, _production);

            iSpeech.setFreeForm(iSpeechRecognizer.FREEFORM_DISABLED);

            String[] names = new String[] { "jane", "bob", "john" };
            iSpeech.addAlias("NAMES", names);
            iSpeech.addCommand("call %NAMES%");

            StreamingBuffer streaming = iSpeech.startStreamingRecognize("audio/x-wav", this);

            uploadFile(_listAudio, streaming);

            try
            {
                SpeechResult result = iSpeech.stopStreaming();
                Console.WriteLine("Text = " + result.Text + " Confidence = " + result.Confidence);
            }
            catch (Exception e)
            {
                Console.WriteLine("error " + e.ToString());
            }
        }
예제 #18
0
        private SpeechResultList ParseRecognizeResponse(IDictionary resp)
        {
            if (resp == null)
            {
                return(null);
            }

            try
            {
                List <SpeechResult> results = new List <SpeechResult>();
                IList iresults = resp["results"] as IList;
                if (iresults == null)
                {
                    return(null);
                }

                foreach (var r in iresults)
                {
                    IDictionary iresult = r as IDictionary;
                    if (iresults == null)
                    {
                        continue;
                    }

                    SpeechResult result = new SpeechResult();
                    result.Final = (bool)iresult["final"];

                    IList ialternatives = iresult["alternatives"] as IList;
                    if (ialternatives == null)
                    {
                        continue;
                    }

                    List <SpeechAlt> alternatives = new List <SpeechAlt>();
                    foreach (var a in ialternatives)
                    {
                        IDictionary ialternative = a as IDictionary;
                        if (ialternative == null)
                        {
                            continue;
                        }

                        SpeechAlt alternative = new SpeechAlt();
                        alternative.Transcript = (string)ialternative["transcript"];
                        if (ialternative.Contains("confidence"))
                        {
                            alternative.Confidence = (double)ialternative["confidence"];
                        }

                        if (ialternative.Contains("timestamps"))
                        {
                            IList itimestamps = ialternative["timestamps"] as IList;

                            TimeStamp[] timestamps = new TimeStamp[itimestamps.Count];
                            for (int i = 0; i < itimestamps.Count; ++i)
                            {
                                IList itimestamp = itimestamps[i] as IList;
                                if (itimestamp == null)
                                {
                                    continue;
                                }

                                TimeStamp ts = new TimeStamp();
                                ts.Word       = (string)itimestamp[0];
                                ts.Start      = (double)itimestamp[1];
                                ts.End        = (double)itimestamp[2];
                                timestamps[i] = ts;
                            }

                            alternative.Timestamps = timestamps;
                        }
                        if (ialternative.Contains("word_confidence"))
                        {
                            IList iconfidence = ialternative["word_confidence"] as IList;

                            WordConfidence[] confidence = new WordConfidence[iconfidence.Count];
                            for (int i = 0; i < iconfidence.Count; ++i)
                            {
                                IList iwordconf = iconfidence[i] as IList;
                                if (iwordconf == null)
                                {
                                    continue;
                                }

                                WordConfidence wc = new WordConfidence();
                                wc.Word       = (string)iwordconf[0];
                                wc.Confidence = (double)iwordconf[1];
                                confidence[i] = wc;
                            }

                            alternative.WordConfidence = confidence;
                        }

                        alternatives.Add(alternative);
                    }
                    result.Alternatives = alternatives.ToArray();
                    results.Add(result);
                }

                return(new SpeechResultList(results.ToArray()));
            }
            catch (Exception e)
            {
                Log.Error("SpeechToText", "ParseJsonResponse exception: {0}", e.ToString());
                return(null);
            }
        }
예제 #19
0
        public virtual void collect(SpeechResult result)
        {
            Token token = result.getResult().getBestToken();

            if (token == null)
            {
                string text = "Best token not found!";

                throw new Exception(text);
            }
            do
            {
                FloatData   floatData   = (FloatData)token.getData();
                SearchState searchState = token.getSearchState();
                if (!(searchState is HMMSearchState) || !searchState.isEmitting())
                {
                    token = token.getPredecessor();
                }
                else
                {
                    this.nFrames++;
                    float[] array  = token.calculateComponentScore(floatData);
                    float[] values = FloatData.toFloatData(floatData).getValues();
                    int     num    = (int)((HMMSearchState)token.getSearchState()).getHMMState().getMixtureId();
                    if (this.loader is Sphinx3Loader && this.loader.hasTiedMixtures())
                    {
                        num = this.loader.getSenone2Ci()[num];
                    }
                    int[]   vectorLength         = this.loader.getVectorLength();
                    int     numStreams           = this.loader.getNumStreams();
                    int     numGaussiansPerState = this.loader.getNumGaussiansPerState();
                    float[] array2 = this.computePosterios(array, numStreams);
                    int     num2   = 0;
                    for (int i = 0; i < numStreams; i++)
                    {
                        for (int j = 0; j < numGaussiansPerState; j++)
                        {
                            int   classIndex = this.means.getClassIndex(num * numStreams * numGaussiansPerState + i * numGaussiansPerState + j);
                            float num3       = array2[i * numGaussiansPerState + j];
                            if ((double)num3 > (double)0f)
                            {
                                float[] array3 = (float[])this.loader.getMeansPool().get(num * numStreams * numGaussiansPerState + i * numGaussiansPerState + j);
                                for (int k = 0; k < vectorLength[i]; k++)
                                {
                                    float    num4 = array2[i * numGaussiansPerState + j] * values[k + num2];
                                    float    num5 = num4 * ((float[])this.loader.getVariancePool().get(num * numStreams * numGaussiansPerState + i * numGaussiansPerState + j))[k];
                                    float    num6 = num3 * ((float[])this.loader.getVariancePool().get(num * numStreams * numGaussiansPerState + i * numGaussiansPerState + j))[k];
                                    int      num8;
                                    double[] array5;
                                    for (int l = 0; l < vectorLength[i]; l++)
                                    {
                                        float num7 = num6 * array3[l];
                                        for (int m = l; m < vectorLength[i]; m++)
                                        {
                                            double[] array4 = this.regLs[classIndex][i][k][l];
                                            num8          = m;
                                            array5        = array4;
                                            array5[num8] += (double)(num7 * array3[m]);
                                        }
                                        double[] array6 = this.regLs[classIndex][i][k][l];
                                        num8          = vectorLength[i];
                                        array5        = array6;
                                        array5[num8] += (double)num7;
                                        double[] array7 = this.regRs[classIndex][i][k];
                                        num8          = l;
                                        array5        = array7;
                                        array5[num8] += (double)(num5 * array3[l]);
                                    }
                                    double[] array8 = this.regLs[classIndex][i][k][vectorLength[i]];
                                    num8          = vectorLength[i];
                                    array5        = array8;
                                    array5[num8] += (double)num6;
                                    double[] array9 = this.regRs[classIndex][i][k];
                                    num8          = vectorLength[i];
                                    array5        = array9;
                                    array5[num8] += (double)num5;
                                }
                            }
                        }
                        num2 += vectorLength[i];
                    }
                    token = token.getPredecessor();
                }
            }while (token != null);
        }
예제 #20
0
        private async Task <IMessageActivity> GetResponseFromSpeechResultAsync(ITurnContext turnContext,
                                                                               SpeechResult speechResult)
        {
            if (speechResult == null)
            {
                return(null);
            }

            var reply = turnContext.Activity.CreateReply();

            if (speechResult.DisplayText.Contains("도움"))
            {
                var helpAttachment = await MakeAttachmentAsync("도움말");

                reply.Text = "help message";
                reply.Attachments.Add(helpAttachment);
            }
            else if (speechResult.DisplayText.Contains("운전시작") || speechResult.DisplayText.Contains("운전 시작"))
            {
                var runAttachment = await MakeAttachmentAsync("운전시작");

                reply.Text = "run message";
                reply.Attachments.Add(runAttachment);
            }
            else if (speechResult.DisplayText.Contains("운전종료") || speechResult.DisplayText.Contains("운전 종료"))
            {
                var stopAttachment = await MakeAttachmentAsync("운전종료");

                reply.Text = "stop message";
                reply.Attachments.Add(stopAttachment);
            }
            else if (speechResult.DisplayText.Contains("오늘 날씨") || speechResult.DisplayText.Contains("오늘날씨") ||
                     speechResult.DisplayText.Contains("오늘의날씨"))
            {
                //날씨 api를 조회해서 결과 반환 - 일자시간으로 파일명 구성해서 저장
                //일자시간에 해당하는 내용이 있는지 확인해서 없으면 음성 생성하고 BLOB에 업로드 후 경로 반환
                //있으면 해당 음성 파일 경로 반환
                var weatherAttachment = await MakeAttachmentAsync("오늘날씨");

                reply.Text = "today weather message";
                reply.Attachments.Add(weatherAttachment);
            }
            else if (speechResult.DisplayText.Contains("미세 먼지 정보") || speechResult.DisplayText.Contains("미세먼지 정보") ||
                     speechResult.DisplayText.Contains("미세먼지정보") || speechResult.DisplayText.Contains("미세먼지") ||
                     speechResult.DisplayText.Contains("미세 먼지"))
            {
                //미세먼지 정보를 조회해서 음성 정보로 생성 - 일자시간으로 파일명 구성해서 저장
                //일자시간에 해당하는 내용이 있는지 확인해서 없으면 음성 생성하고 BLOB에 업로드 후 경로 반환
                //있으면 해당 음성 파일 경로 반환
                var dustAttachment = await MakeAttachmentAsync("미세먼지정보");

                reply.Text = "today dust message";
                reply.Attachments.Add(dustAttachment);
            }
            else
            {
                reply.Text  = "Unknown command";
                reply.Value = speechResult.DisplayText;
            }

            return(reply);
        }
예제 #21
0
        public void Collect(SpeechResult result)
        {
            Token token = result.Result.GetBestToken();

            float[] componentScore, featureVector, posteriors, tmean;
            int[]   len;
            float   dnom, wtMeanVar, wtDcountVar, wtDcountVarMean, mean;
            int     mId, cluster;
            int     numStreams, gauPerState;

            if (token == null)
            {
                throw new Exception("Best token not found!");
            }

            do
            {
                FloatData    feature = (FloatData)token.Data;
                ISearchState ss      = token.SearchState;

                if (!(ss is IHMMSearchState && ss.IsEmitting))
                {
                    token = token.Predecessor;
                    continue;
                }

                componentScore = token.CalculateComponentScore(feature);
                featureVector  = FloatData.ToFloatData(feature).Values;
                mId            = (int)((IHMMSearchState)token.SearchState).HmmState
                                 .GetMixtureId();
                if (_loader is Sphinx3Loader && _loader.HasTiedMixtures())
                {
                    // use CI phone ID for tied mixture model
                    mId = _loader.Senone2Ci[mId];
                }
                len         = _loader.VectorLength;
                numStreams  = _loader.NumStreams;
                gauPerState = _loader.NumGaussiansPerState;
                posteriors  = ComputePosterios(componentScore, numStreams);
                int featVectorStartIdx = 0;

                for (int i = 0; i < numStreams; i++)
                {
                    for (int j = 0; j < gauPerState; j++)
                    {
                        cluster = _means.GetClassIndex(mId * numStreams
                                                       * gauPerState + i * gauPerState + j);
                        dnom = posteriors[i * gauPerState + j];
                        if (dnom > 0f)
                        {
                            tmean = _loader.MeansPool.Get(
                                mId * numStreams * gauPerState + i
                                * gauPerState + j);

                            for (int k = 0; k < len[i]; k++)
                            {
                                mean = posteriors[i * gauPerState + j]
                                       * featureVector[k + featVectorStartIdx];
                                wtMeanVar = mean
                                            * _loader.VariancePool.Get(
                                    mId * numStreams * gauPerState + i
                                    * gauPerState + j)[k];
                                wtDcountVar = dnom
                                              * _loader.VariancePool.Get(
                                    mId * numStreams * gauPerState + i
                                    * gauPerState + j)[k];

                                for (int p = 0; p < len[i]; p++)
                                {
                                    wtDcountVarMean = wtDcountVar * tmean[p];

                                    for (int q = p; q < len[i]; q++)
                                    {
                                        RegLs[cluster][i][k][p][q] += wtDcountVarMean
                                                                      * tmean[q];
                                    }
                                    RegLs[cluster][i][k][p][len[i]] += wtDcountVarMean;
                                    RegRs[cluster][i][k][p]         += wtMeanVar * tmean[p];
                                }
                                RegLs[cluster][i][k][len[i]][len[i]] += wtDcountVar;
                                RegRs[cluster][i][k][len[i]]         += wtMeanVar;
                            }
                        }
                    }
                    featVectorStartIdx += len[i];
                }
                token = token.Predecessor;
            } while (token != null);
        }
예제 #22
0
        /// <summary>
        /// Converts a <see cref="SpeechResult"/> to a <see cref="StreamingSpeechRecognitionResult"/>.
        /// </summary>
        /// <param name="result">The <see cref="SpeechResult"/>.</param>
        /// <param name="rankOrder">A value indicating whether to score the alternates by their rank-order.</param>
        /// <returns>The <see cref="StreamingSpeechRecognitionResult"/>.</returns>
        internal static StreamingSpeechRecognitionResult ToStreamingSpeechRecognitionResult(this SpeechResult result, bool rankOrder = true)
        {
            if (result is PartialRecognitionResult partialResult)
            {
                return(new StreamingSpeechRecognitionResult(
                           false,
                           partialResult.Text,
                           1.0,
                           new SpeechRecognitionAlternate[] { new SpeechRecognitionAlternate(partialResult.Text, 1.0) },
                           null,
                           result.Duration));
            }
            else if (result is RecognitionResult recoResult)
            {
                var alternates = recoResult.Results?.Select(r => new SpeechRecognitionAlternate(r.LexicalForm, r.Confidence)) ?? new[] { new SpeechRecognitionAlternate(string.Empty, 1.0) };

                if (!rankOrder)
                {
                    // order by confidence score
                    alternates = alternates.OrderByDescending(r => r.Confidence);
                }

                var topAlternate = alternates.First();
                return(new StreamingSpeechRecognitionResult(
                           false,
                           topAlternate.Text,
                           topAlternate.Confidence,
                           alternates,
                           null,
                           result.Duration));
            }
            else
            {
                throw new InvalidOperationException("Unexpected recognition result type!");
            }
        }
예제 #23
0
 public SpeechRecognizedEventArgs(SpeechResult result)
 {
     Result = result;
 }
예제 #24
0
        public void collect(SpeechResult result)
        {
            Token token = result.getResult().getBestToken();

            float[] componentScore, featureVector, posteriors, tmean;
            int[]   len;
            float   dnom, wtMeanVar, wtDcountVar, wtDcountVarMean, mean;
            int     mId, cluster;
            int     numStreams, gauPerState;

            if (token == null)
            {
                throw new Exception("Best token not found!");
            }

            do
            {
                FloatData    feature = (FloatData)token.getData();
                ISearchState ss      = token.getSearchState();

                if (!(ss is IHMMSearchState && ss.isEmitting()))
                {
                    token = token.getPredecessor();
                    continue;
                }

                componentScore = token.calculateComponentScore(feature);
                featureVector  = FloatData.toFloatData(feature).getValues();
                mId            = (int)((IHMMSearchState)token.getSearchState()).getHMMState()
                                 .getMixtureId();
                if (loader is Sphinx3Loader && ((Sphinx3Loader)loader).hasTiedMixtures())
                {
                    // use CI phone ID for tied mixture model
                    mId = ((Sphinx3Loader)loader).getSenone2Ci()[mId];
                }
                len         = loader.getVectorLength();
                numStreams  = loader.getNumStreams();
                gauPerState = loader.getNumGaussiansPerState();
                posteriors  = this.computePosterios(componentScore, numStreams);
                int featVectorStartIdx = 0;

                for (int i = 0; i < numStreams; i++)
                {
                    for (int j = 0; j < gauPerState; j++)
                    {
                        cluster = means.getClassIndex(mId * numStreams
                                                      * gauPerState + i * gauPerState + j);
                        dnom = posteriors[i * gauPerState + j];
                        if (dnom > 0f)
                        {
                            tmean = loader.getMeansPool().get(
                                mId * numStreams * gauPerState + i
                                * gauPerState + j);

                            for (int k = 0; k < len[i]; k++)
                            {
                                mean = posteriors[i * gauPerState + j]
                                       * featureVector[k + featVectorStartIdx];
                                wtMeanVar = mean
                                            * loader.getVariancePool().get(
                                    mId * numStreams * gauPerState + i
                                    * gauPerState + j)[k];
                                wtDcountVar = dnom
                                              * loader.getVariancePool().get(
                                    mId * numStreams * gauPerState + i
                                    * gauPerState + j)[k];

                                for (int p = 0; p < len[i]; p++)
                                {
                                    wtDcountVarMean = wtDcountVar * tmean[p];

                                    for (int q = p; q < len[i]; q++)
                                    {
                                        regLs[cluster][i][k][p][q] += wtDcountVarMean
                                                                      * tmean[q];
                                    }
                                    regLs[cluster][i][k][p][len[i]] += wtDcountVarMean;
                                    regRs[cluster][i][k][p]         += wtMeanVar * tmean[p];
                                }
                                regLs[cluster][i][k][len[i]][len[i]] += wtDcountVar;
                                regRs[cluster][i][k][len[i]]         += wtMeanVar;
                            }
                        }
                    }
                    featVectorStartIdx += len[i];
                }
                token = token.getPredecessor();
            } while (token != null);
        }
예제 #25
0
        /// <summary>
        /// API获取语音,文本转语音
        /// </summary>
        /// <param name="text">需要转换语音的文本</param>
        /// <param name="vol">音量</param>
        /// <param name="per">发间人</param>
        /// <param name="spd">语速</param>
        /// <param name="pit">语调</param>
        /// <returns></returns>
        public static async Task <SpeechResult> TextToSpeech1(string text, string vol, string per, string spd, string pit, string token)
        {
            SpeechResult result = new SpeechResult();
            string       url    = "http://tsn.baidu.com/text2audio"; //百度tts请求地址
            HttpClient   client = new HttpClient();

            client.Timeout = new TimeSpan(0, 1, 0); //超时时间设置aue


            Dictionary <string, string> param = new Dictionary <string, string>()
            {
                { "lan", "zh" },
                { "ctp", "1" },
                { "vol", vol },  //音量:0-15,默认5中音量
                { "per", per },  //发音人:0为普通女声,1为普通男声,3为情感合成-度逍遥,4为情感合成-度丫丫
                { "spd", spd },  //语速:0-15,5为中语速
                { "pit", pit },  //音调:0-15,5为中语调
                { "aue", "3" }   //不需要修改的参数
            };

            param.Add("tex", text);                                           //需要转换的文本内容
            param.Add("cuid", "delafqmisspeed");                              //用户唯一标识"24.021dc614e13e2f98f43b661fb40495d5.2592000.1557308095.282335-14965195"
            param.Add("tok", token);                                          //access_token,在发送之前先调用一次获取到这个值
            FormUrlEncodedContent content = new FormUrlEncodedContent(param); //post请求参数设置对象

            try
            {
                HttpResponseMessage x = await client.PostAsync(url, content);

                if (x.StatusCode == HttpStatusCode.OK)
                {
                    #region 错误处理
                    HttpContentHeaders header = x.Content.Headers;
                    //如果返回错误信息
                    if (header.ContentType.ToString() == "application/json")
                    {
                        string res = await x.Content.ReadAsStringAsync();

                        var Item = JsonConvert.DeserializeObject <TResult>(res);

                        //设置失败返回数据
                        result.status = 1;
                        result.error  = Item.err_no.ToString();
                    }
                    #endregion

                    var resbyte = await x.Content.ReadAsByteArrayAsync();

                    //设置成功返回数据
                    result.status = 0;
                    result.speech = resbyte;
                }
                else
                {
                    //失败
                    result.status = 1;
                    result.error  = "请求失败";
                }
            }
            catch (Exception ex)
            {
                //失败
                result.status = 1;
                result.error  = ex.Message;
            }

            return(result);
        }
        public async Task <ActionResult> GetTextToSpeech(string text, int no)
        {
            if (text == null)
            {
                text = "Kein text angekommen.";
            }

            Debug.WriteLine(no);

            SpeechResult speechResult = new SpeechResult();

            speechResult.TextPartNo = no;
            speechResult.Text       = text;

            #region Bing Speech Authentication

            // Note: The way to get api key:
            // Free: https://www.microsoft.com/cognitive-services/en-us/subscriptions?productId=/products/Bing.Speech.Preview
            // Paid: https://portal.azure.com/#create/Microsoft.CognitiveServices/apitype/Bing.Speech/pricingtier/S0
            string bingApiKey = ConfigurationManager.AppSettings["BingSpeechApiKey"];

            Authentication auth = new Authentication(bingApiKey);
            string         accessToken;

            try
            {
                accessToken = auth.GetAccessToken();
                speechResult.AccessToken = accessToken;
            }
            catch (Exception ex)
            {
                speechResult.ErrorMessage = "GetTextToSpeech --> Failed authentication --> " + ex.Message + " " + ex.ToString();

                JsonResult result = new JsonResult()
                {
                    Data = speechResult
                };
                return(result);
            }

            #endregion

            #region Text-To-Speech

            // Debug.WriteLine("Starting TTSSample request code execution.");

            string requestUri = "https://speech.platform.bing.com/synthesize";

            var cortana = new Synthesize();

            cortana.OnAudioAvailable += (s, args) => {
                var input = args.EventData;

                byte[] buffer = new byte[20 * 1024 * 1024];
                using (MemoryStream ms = new MemoryStream())
                {
                    int read;
                    while ((read = input.Read(buffer, 0, buffer.Length)) > 0)
                    {
                        ms.Write(buffer, 0, read);
                    }

                    speechResult.SpeechEventData = ms.ToArray();;
                }
            };

            //cortana.OnError += ErrorHandler;
            cortana.OnError += (s, e) =>
            {
                var message = $"Unable to complete the TTS request: {e.ToString()}";
                message += e.EventData.Message;
                speechResult.ErrorMessage = message;
            };

            // Reuse Synthesize object to minimize latency
            await cortana.Speak(CancellationToken.None,
                                new Synthesize.InputOptions()
            {
                RequestUri = new Uri(requestUri),
                // Text to be spoken.
                Text      = text,
                VoiceType = Gender.Female,
                // Refer to the documentation for complete list of supported locales.
                Locale = ConfigurationManager.AppSettings["DefaultLocale"],     // "de-DE",
                // You can also customize the output voice. Refer to the documentation to view the different
                // voices that the TTS service can output.
                VoiceName = ConfigurationManager.AppSettings["VoiceName"],     // "Microsoft Server Speech Text to Speech Voice (de-De, Hedda)",
                // Service can return audio in different output format.
                OutputFormat       = AudioOutputFormat.Riff16Khz16BitMonoPcm,
                AuthorizationToken = "Bearer " + accessToken,
            });

            JsonResult jsonResult = new JsonResult()
            {
                Data = speechResult
            };

            #endregion

            return(jsonResult);
        }