private void RecognitionLoop(WAVSound soundToRecognize) { // Monitor.Enter(recognitionLockObject); try { soundToRecognize.GenerateMemoryStream(); speechRecognitionEngine.SetInputToWaveStream(soundToRecognize.WAVMemoryStream); RecognitionResult r = speechRecognitionEngine.Recognize(); if (r != null) { OnSoundRecognized(r); } if (extractDetectedSounds) { OnSoundDetected(soundToRecognize.Copy()); } recognizerBusy = false; } catch { // Nothing to do here - try-catch needed to avoid (rare) crashes when the WAVE stream cannot be found. } finally { recognizerBusy = false; // Needed if the catch is triggered. } // Monitor.Exit(recognitionLockObject); }
public virtual SpeechType GetFrameSpeechType(WAVSound frame) // , int channel, double lowPassCutoffFrequency, double lowPassRatioThreshold, // double energyThreshold, double silenceThreshold) { double zeroCrossingRate = frame.GetRelativeNumberOfZeroCrossings(channel); double averageEnergy = frame.GetAverageEnergy(channel); if (averageEnergy < silenceThreshold) { return(SpeechType.Silence); } else { WAVSound lowPassFilteredFrame = frame.Copy(); lowPassFilteredFrame.LowPassFilter(lowPassCutoffFrequency); double lowPassFilteredAverageEnergy = lowPassFilteredFrame.GetAverageEnergy(channel); double energyRatio = lowPassFilteredAverageEnergy / averageEnergy; if ((energyRatio > lowPassRatioThreshold) && (averageEnergy > energyThreshold) && (zeroCrossingRate < zeroCrossingRateThreshold)) { return(SpeechType.Voiced); } else if (zeroCrossingRate < zeroCrossingRateThreshold) { return(SpeechType.Voiced); } else { return(SpeechType.Unvoiced); } } }
private void modifySoundButton_Click(object sender, EventArgs e) { speechVisualizer.MarkerList = new List <SoundMarker>(); speechModifier.TopFraction = double.Parse(topFractionTextBox.Text); double relativeStartPitch = double.Parse(relativeStartPitchTextBox.Text); double relativeEndPitch = double.Parse(relativeEndPitchTextBox.Text); Boolean adjustDuration = Boolean.Parse(adjustDurationComboBox.SelectedItem.ToString()); double relativeDuration = double.Parse(relativeDurationTextBox.Text); // Only relevant if adjustDuration = true. WAVSound modifiedSound = speechModifier.Modify(currentSound, relativeStartPitch, relativeEndPitch, adjustDuration, relativeDuration); // modifiedSound.MedianFilter(5); // modifiedSound.LowPassFilter(1500); // modifiedSound.SetMaximumNonClippingVolume(); // modifiedSound.SetMaximumNonClippingVolume(); SoundPlayer soundPlayer = new SoundPlayer(); modifiedSound.GenerateMemoryStream(); modifiedSound.WAVMemoryStream.Position = 0; // Manually rewind stream soundPlayer.Stream = null; soundPlayer.Stream = modifiedSound.WAVMemoryStream; soundPlayer.PlaySync(); speechVisualizer.SetRange(0, modifiedSound.GetDuration(), -32768, 32768); speechVisualizer.SetPitchPeriodSpecification(null); speechVisualizer.SetSound(modifiedSound); currentSound = modifiedSound.Copy(); soundTypeIdentificationButton.Enabled = true; findPitchPeriodsButton.Enabled = false; findPitchMarksButton.Enabled = false; }
private void RunLoop() { Thread.Sleep(1); DateTime utteranceStartDateTime = DateTime.Now; // Just needed for initialization. DateTime utteranceEndDateTime = DateTime.MinValue; // Just needed for initialization. DateTime previousUtteranceStartDateTime = DateTime.MinValue; DateTime previousUtteranceEndDateTime = DateTime.MinValue; DateTime recordingStartDateTime; DateTime recordingEndDateTime; double utteranceStartTime = 0; // In seconds, measured from the start of the current recording. (=0 just for initialization). double utteranceEndTime; while (running) { Thread.Sleep(millisecondRecordingInterval); byte[] soundData = wavRecorder.GetAllRecordedBytes(out recordingStartDateTime, out recordingEndDateTime); if (soundData != null) { if (soundData.Length > 0) { WAVSound sound = new WAVSound("", wavRecorder.SampleRate, wavRecorder.NumberOfChannels, wavRecorder.NumberOfBitsPerSample); sound.AppendSamplesAsBytes(soundData); if (showSoundStream) { if (!displayBusy) { WAVSound soundToDisplay = sound.Copy(); // 20171207: Make a new copy here, since the code below may process the sound before visualization is completed. if (InvokeRequired) { this.BeginInvoke(new MethodInvoker(() => ShowSound(soundToDisplay))); } else { ShowSound(soundToDisplay); } } } // Next, remove all parts of the sound that have already been recognized, if any: if (previousUtteranceEndDateTime > recordingStartDateTime) { double extractionStartTime = (previousUtteranceEndDateTime - recordingStartDateTime).TotalSeconds; double extractionEndTime = sound.GetDuration(); sound = sound.Extract(extractionStartTime, extractionEndTime); // Debug code, remove /* if (sound == null) // Should not happen, unless the recognition thread is stopped using a breakpoint. * { * * } */ } if (!inUtterance) { utteranceStartTime = sound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold); if (utteranceStartTime > 0) { double duration = sound.GetDuration(); double timeToEnd = duration - utteranceStartTime; long ticksToEnd = TICKS_PER_SECOND * (long)(timeToEnd); utteranceStartDateTime = recordingEndDateTime.Subtract(new TimeSpan(ticksToEnd)); if (utteranceStartDateTime > previousUtteranceEndDateTime) // True (by construction) the FIRST time. { inUtterance = true; long utteranceStartTimeAsTicks = (long)(TICKS_PER_SECOND * utteranceStartTime); // Corrected 20170907 (1000000 -> 10000000) utteranceStartDateTime = recordingStartDateTime.Add(new TimeSpan(utteranceStartTimeAsTicks)); } } } else { double duration = sound.GetDuration(); WAVSound endOfSound = sound.Extract(duration - detectionSilenceInterval, duration); double startTimeInEndOfSound = endOfSound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold); if (startTimeInEndOfSound < 0) // <=> silence at the end of the sound { inUtterance = false; utteranceEndDateTime = recordingEndDateTime; // recordingStartDateTime.Add(new TimeSpan(utteranceEndTimeAsTicks)); previousUtteranceStartDateTime = utteranceStartDateTime; previousUtteranceEndDateTime = utteranceEndDateTime; // Monitor.Enter(recognitionLockObject); if (!recognizerBusy) { recognizerBusy = true; WAVSound soundToRecognize = sound.Extract(utteranceStartTime - extractionMargin, duration).Copy(); // Monitor.Exit(recognitionLockObject); RunRecognizer(soundToRecognize); } } } } } } }
public WAVSoundEventArgs(WAVSound sound) { this.sound = sound.Copy(); }