private void RecognitionLoop(WAVSound soundToRecognize)
 {
     //      Monitor.Enter(recognitionLockObject);
     try
     {
         soundToRecognize.GenerateMemoryStream();
         speechRecognitionEngine.SetInputToWaveStream(soundToRecognize.WAVMemoryStream);
         RecognitionResult r = speechRecognitionEngine.Recognize();
         if (r != null)
         {
             OnSoundRecognized(r);
         }
         if (extractDetectedSounds)
         {
             OnSoundDetected(soundToRecognize.Copy());
         }
         recognizerBusy = false;
     }
     catch
     {
         // Nothing to do here - try-catch needed to avoid (rare) crashes when the WAVE stream cannot be found.
     }
     finally
     {
         recognizerBusy = false; // Needed if the catch is triggered.
     }
     //   Monitor.Exit(recognitionLockObject);
 }
Esempio n. 2
0
        public virtual SpeechType GetFrameSpeechType(WAVSound frame)
        // , int channel, double lowPassCutoffFrequency, double lowPassRatioThreshold,
        //                                 double energyThreshold, double silenceThreshold)
        {
            double zeroCrossingRate = frame.GetRelativeNumberOfZeroCrossings(channel);
            double averageEnergy    = frame.GetAverageEnergy(channel);

            if (averageEnergy < silenceThreshold)
            {
                return(SpeechType.Silence);
            }
            else
            {
                WAVSound lowPassFilteredFrame = frame.Copy();
                lowPassFilteredFrame.LowPassFilter(lowPassCutoffFrequency);
                double lowPassFilteredAverageEnergy = lowPassFilteredFrame.GetAverageEnergy(channel);
                double energyRatio = lowPassFilteredAverageEnergy / averageEnergy;
                if ((energyRatio > lowPassRatioThreshold) && (averageEnergy > energyThreshold) && (zeroCrossingRate < zeroCrossingRateThreshold))
                {
                    return(SpeechType.Voiced);
                }
                else if (zeroCrossingRate < zeroCrossingRateThreshold)
                {
                    return(SpeechType.Voiced);
                }
                else
                {
                    return(SpeechType.Unvoiced);
                }
            }
        }
        private void modifySoundButton_Click(object sender, EventArgs e)
        {
            speechVisualizer.MarkerList = new List <SoundMarker>();

            speechModifier.TopFraction = double.Parse(topFractionTextBox.Text);
            double  relativeStartPitch = double.Parse(relativeStartPitchTextBox.Text);
            double  relativeEndPitch   = double.Parse(relativeEndPitchTextBox.Text);
            Boolean adjustDuration     = Boolean.Parse(adjustDurationComboBox.SelectedItem.ToString());
            double  relativeDuration   = double.Parse(relativeDurationTextBox.Text); // Only relevant if adjustDuration = true.

            WAVSound modifiedSound = speechModifier.Modify(currentSound, relativeStartPitch, relativeEndPitch, adjustDuration, relativeDuration);


            //   modifiedSound.MedianFilter(5);
            //  modifiedSound.LowPassFilter(1500);
            //  modifiedSound.SetMaximumNonClippingVolume();

            //   modifiedSound.SetMaximumNonClippingVolume();
            SoundPlayer soundPlayer = new SoundPlayer();

            modifiedSound.GenerateMemoryStream();
            modifiedSound.WAVMemoryStream.Position = 0; // Manually rewind stream
            soundPlayer.Stream = null;
            soundPlayer.Stream = modifiedSound.WAVMemoryStream;
            soundPlayer.PlaySync();
            speechVisualizer.SetRange(0, modifiedSound.GetDuration(), -32768, 32768);
            speechVisualizer.SetPitchPeriodSpecification(null);
            speechVisualizer.SetSound(modifiedSound);

            currentSound = modifiedSound.Copy();

            soundTypeIdentificationButton.Enabled = true;
            findPitchPeriodsButton.Enabled        = false;
            findPitchMarksButton.Enabled          = false;
        }
        private void RunLoop()
        {
            Thread.Sleep(1);
            DateTime utteranceStartDateTime         = DateTime.Now;      // Just needed for initialization.
            DateTime utteranceEndDateTime           = DateTime.MinValue; // Just needed for initialization.
            DateTime previousUtteranceStartDateTime = DateTime.MinValue;
            DateTime previousUtteranceEndDateTime   = DateTime.MinValue;
            DateTime recordingStartDateTime;
            DateTime recordingEndDateTime;
            double   utteranceStartTime = 0; // In seconds, measured from the start of the current recording.  (=0 just for initialization).
            double   utteranceEndTime;

            while (running)
            {
                Thread.Sleep(millisecondRecordingInterval);
                byte[] soundData = wavRecorder.GetAllRecordedBytes(out recordingStartDateTime, out recordingEndDateTime);
                if (soundData != null)
                {
                    if (soundData.Length > 0)
                    {
                        WAVSound sound = new WAVSound("", wavRecorder.SampleRate, wavRecorder.NumberOfChannels, wavRecorder.NumberOfBitsPerSample);
                        sound.AppendSamplesAsBytes(soundData);
                        if (showSoundStream)
                        {
                            if (!displayBusy)
                            {
                                WAVSound soundToDisplay = sound.Copy(); // 20171207: Make a new copy here, since the code below may process the sound before visualization is completed.
                                if (InvokeRequired)
                                {
                                    this.BeginInvoke(new MethodInvoker(() => ShowSound(soundToDisplay)));
                                }
                                else
                                {
                                    ShowSound(soundToDisplay);
                                }
                            }
                        }

                        // Next, remove all parts of the sound that have already been recognized, if any:
                        if (previousUtteranceEndDateTime > recordingStartDateTime)
                        {
                            double extractionStartTime = (previousUtteranceEndDateTime - recordingStartDateTime).TotalSeconds;
                            double extractionEndTime   = sound.GetDuration();
                            sound = sound.Extract(extractionStartTime, extractionEndTime);

                            // Debug code, remove

                            /*   if (sound == null)  // Should not happen, unless the recognition thread is stopped using a breakpoint.
                             *  {
                             *
                             *  }  */
                        }

                        if (!inUtterance)
                        {
                            utteranceStartTime = sound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold);
                            if (utteranceStartTime > 0)
                            {
                                double duration   = sound.GetDuration();
                                double timeToEnd  = duration - utteranceStartTime;
                                long   ticksToEnd = TICKS_PER_SECOND * (long)(timeToEnd);
                                utteranceStartDateTime = recordingEndDateTime.Subtract(new TimeSpan(ticksToEnd));
                                if (utteranceStartDateTime > previousUtteranceEndDateTime) // True (by construction) the FIRST time.
                                {
                                    inUtterance = true;
                                    long utteranceStartTimeAsTicks = (long)(TICKS_PER_SECOND * utteranceStartTime);  // Corrected 20170907 (1000000 -> 10000000)
                                    utteranceStartDateTime = recordingStartDateTime.Add(new TimeSpan(utteranceStartTimeAsTicks));
                                }
                            }
                        }
                        else
                        {
                            double   duration              = sound.GetDuration();
                            WAVSound endOfSound            = sound.Extract(duration - detectionSilenceInterval, duration);
                            double   startTimeInEndOfSound = endOfSound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold);
                            if (startTimeInEndOfSound < 0)  //  <=> silence at the end of the sound
                            {
                                inUtterance                    = false;
                                utteranceEndDateTime           = recordingEndDateTime; // recordingStartDateTime.Add(new TimeSpan(utteranceEndTimeAsTicks));
                                previousUtteranceStartDateTime = utteranceStartDateTime;
                                previousUtteranceEndDateTime   = utteranceEndDateTime;
                                //    Monitor.Enter(recognitionLockObject);
                                if (!recognizerBusy)
                                {
                                    recognizerBusy = true;
                                    WAVSound soundToRecognize = sound.Extract(utteranceStartTime - extractionMargin, duration).Copy();
                                    //    Monitor.Exit(recognitionLockObject);
                                    RunRecognizer(soundToRecognize);
                                }
                            }
                        }
                    }
                }
            }
        }
Esempio n. 5
0
 public WAVSoundEventArgs(WAVSound sound)
 {
     this.sound = sound.Copy();
 }