private void speakButton_Click(object sender, EventArgs e) { string sentence = sentenceTextBox.Text; if (sentence != "") { speechVisualizer.MarkerList = new List <SoundMarker>(); speechVisualizer.SetPitchPeriodSpecification(null); string voiceName = voiceSelectionComboBox.SelectedItem.ToString(); speechSynthesizer.SetOutputToWaveFile("./tmpOutput.wav", new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); speechSynthesizer.Speak(sentence); speechSynthesizer.SetOutputToDefaultAudioDevice(); speechSynthesizer.SelectVoice(voiceName); speechSynthesizer.Speak(sentence); currentSound = new WAVSound(); currentSound.LoadFromFile("./tmpOutput.wav"); double startTime = currentSound.GetFirstTimeAboveThreshold(0, 10, 20); double endTime = currentSound.GetLastTimeAboveThreshold(0, 10, 20); currentSound = currentSound.Extract(startTime, endTime); speechVisualizer.SetRange(0, currentSound.GetDuration(), -32768, 32768); speechVisualizer.SetSound(currentSound); speechVisualizer.Invalidate(); soundTypeIdentificationButton.Enabled = true; playSoundButton.Enabled = true; modifySoundButton.Enabled = true; saveSoundToolStripMenuItem.Enabled = true; } }
public IWRRecognitionResult RecognizeSingle(WAVSound sound) { // Compute the features of the current sound sound.SubtractMean(); double startTime = sound.GetFirstTimeAboveThreshold(0, soundExtractionMovingAverageLength, soundExtractionThreshold); double endTime = sound.GetLastTimeAboveThreshold(0, soundExtractionMovingAverageLength, soundExtractionThreshold); WAVSound extractedInstance = sound.Extract(startTime, endTime); if (extractedInstance == null) { return(null); } // 20170114 extractedInstance.PreEmphasize(preEmphasisThresholdFrequency); WAVFrameSet frameSet = new WAVFrameSet(extractedInstance, frameDuration, frameShift); frameSet.ApplyHammingWindows(alpha); SoundFeatureSet soundFeatureSet = new SoundFeatureSet(); List <SoundFeature> autoCorrelationFeatureList = frameSet.GetAutoCorrelationSeries("AutoCorrelation", autoCorrelationOrder); soundFeatureSet.FeatureList.AddRange(autoCorrelationFeatureList); List <SoundFeature> lpcAndCepstralFeatureList = frameSet.GetLPCAndCepstralSeries("LPC", lpcOrder, "Cepstral", cepstralOrder); soundFeatureSet.FeatureList.AddRange(lpcAndCepstralFeatureList); SoundFeature relativeNumberOfZeroCrossingsFeature = frameSet.GetRelativeNumberOfZeroCrossingsSeries("RNZC"); soundFeatureSet.FeatureList.Add(relativeNumberOfZeroCrossingsFeature); soundFeatureSet.SetNormalizedTime(); soundFeatureSet.Interpolate(numberOfValuesPerFeature); IWRRecognitionResult recognitionResult = new IWRRecognitionResult(); recognitionResult.SoundFeatureSet = soundFeatureSet; if (averageSoundFeatureSetList != null) { foreach (SoundFeatureSet averageSoundFeatureSet in averageSoundFeatureSetList) { double deviation = SoundFeatureSet.GetDeviation(averageSoundFeatureSet, soundFeatureSet, weightList); string soundName = averageSoundFeatureSet.Information; recognitionResult.DeviationList.Add(new Tuple <string, double>(soundName, deviation)); } recognitionResult.DeviationList.Sort((a, b) => a.Item2.CompareTo(b.Item2)); } return(recognitionResult); }
public WAVFrameSet(WAVSound sound, double frameDuration, double frameShift) { double soundDuration = sound.GetDuration(); this.frameDuration = frameDuration; this.frameShift = frameShift; this.frameList = new List <WAVSound>(); int numberOfFrames = (int)Math.Truncate((soundDuration - frameDuration + frameShift) / frameShift); this.startTimeList = new List <double>(); for (int ii = 0; ii < numberOfFrames; ii++) { double startTime = ii * frameShift; double endTime = startTime + frameDuration; WAVSound frame = sound.Extract(startTime, endTime); frameList.Add(frame); startTimeList.Add(startTime); } }
private void ShowSound(WAVSound sound) { ClearHistory(); try // This (using try-catch) is ugly, but appears to be necessary for some hardware configurations { double duration = sound.GetDuration(); if (duration > viewingInterval) { WAVSound visibleSound = sound.Extract(duration - viewingInterval, duration); SetSound(visibleSound); } else { SetSound(sound); } } catch { // Nothing to do here.. } displayBusy = false; }
private void RunLoop() { Thread.Sleep(1); DateTime utteranceStartDateTime = DateTime.Now; // Just needed for initialization. DateTime utteranceEndDateTime = DateTime.MinValue; // Just needed for initialization. DateTime previousUtteranceStartDateTime = DateTime.MinValue; DateTime previousUtteranceEndDateTime = DateTime.MinValue; DateTime recordingStartDateTime; DateTime recordingEndDateTime; double utteranceStartTime = 0; // In seconds, measured from the start of the current recording. (=0 just for initialization). double utteranceEndTime; while (running) { Thread.Sleep(millisecondRecordingInterval); byte[] soundData = wavRecorder.GetAllRecordedBytes(out recordingStartDateTime, out recordingEndDateTime); if (soundData != null) { if (soundData.Length > 0) { WAVSound sound = new WAVSound("", wavRecorder.SampleRate, wavRecorder.NumberOfChannels, wavRecorder.NumberOfBitsPerSample); sound.AppendSamplesAsBytes(soundData); if (showSoundStream) { if (!displayBusy) { WAVSound soundToDisplay = sound.Copy(); // 20171207: Make a new copy here, since the code below may process the sound before visualization is completed. if (InvokeRequired) { this.BeginInvoke(new MethodInvoker(() => ShowSound(soundToDisplay))); } else { ShowSound(soundToDisplay); } } } // Next, remove all parts of the sound that have already been recognized, if any: if (previousUtteranceEndDateTime > recordingStartDateTime) { double extractionStartTime = (previousUtteranceEndDateTime - recordingStartDateTime).TotalSeconds; double extractionEndTime = sound.GetDuration(); sound = sound.Extract(extractionStartTime, extractionEndTime); // Debug code, remove /* if (sound == null) // Should not happen, unless the recognition thread is stopped using a breakpoint. * { * * } */ } if (!inUtterance) { utteranceStartTime = sound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold); if (utteranceStartTime > 0) { double duration = sound.GetDuration(); double timeToEnd = duration - utteranceStartTime; long ticksToEnd = TICKS_PER_SECOND * (long)(timeToEnd); utteranceStartDateTime = recordingEndDateTime.Subtract(new TimeSpan(ticksToEnd)); if (utteranceStartDateTime > previousUtteranceEndDateTime) // True (by construction) the FIRST time. { inUtterance = true; long utteranceStartTimeAsTicks = (long)(TICKS_PER_SECOND * utteranceStartTime); // Corrected 20170907 (1000000 -> 10000000) utteranceStartDateTime = recordingStartDateTime.Add(new TimeSpan(utteranceStartTimeAsTicks)); } } } else { double duration = sound.GetDuration(); WAVSound endOfSound = sound.Extract(duration - detectionSilenceInterval, duration); double startTimeInEndOfSound = endOfSound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold); if (startTimeInEndOfSound < 0) // <=> silence at the end of the sound { inUtterance = false; utteranceEndDateTime = recordingEndDateTime; // recordingStartDateTime.Add(new TimeSpan(utteranceEndTimeAsTicks)); previousUtteranceStartDateTime = utteranceStartDateTime; previousUtteranceEndDateTime = utteranceEndDateTime; // Monitor.Enter(recognitionLockObject); if (!recognizerBusy) { recognizerBusy = true; WAVSound soundToRecognize = sound.Extract(utteranceStartTime - extractionMargin, duration).Copy(); // Monitor.Exit(recognitionLockObject); RunRecognizer(soundToRecognize); } } } } } } }