Beispiel #1
0
        public void FindSpeechTypeVariation(WAVSound sound)
        // , int channel, double frameDuration, double frameShift,
        //                      double lowPassCutoffFrequency, double lowPassRatioThreshold, double energyThreshold, double silenceThreshold)
        {
            WAVFrameSet frameSet = new WAVFrameSet(sound, frameDuration, frameShift);

            speechTypeSpecification = new SpeechTypeSpecification();
            double time = 0;

            for (int ii = 0; ii < frameSet.FrameList.Count; ii++)
            {
                WAVSound   frame      = frameSet.FrameList[ii];
                SpeechType speechType = this.GetFrameSpeechType(frame); //
                // SpeechType speechType = this.GetFrameSpeechType(frame, channel, lowPassCutoffFrequency, lowPassRatioThreshold, energyThreshold, silenceThreshold);
                time = frameSet.StartTimeList[ii] + frameDuration / 2;  // The speech type is assigned to the center of the frame
                speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(time, speechType));
            }
            // Finally, to make sure that the speech type can be interpolated over the entire sound, set the
            // end values:
            SpeechType firstSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList[0].Item2;

            speechTypeSpecification.TimeSpeechTypeTupleList.Insert(0, new Tuple <double, SpeechType>(0, firstSpeechType));
            SpeechType lastSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item2;
            double     duration       = sound.GetDuration();

            if (speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item1 < duration) // Will ALMOST always be the case, unless the duration is an exact multiple of the frame shift
            {
                speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(duration, lastSpeechType));
            }
            for (int jj = 0; jj < numberOfAdjustmentSteps; jj++)
            {
                Adjust();
            }
        }
Beispiel #2
0
        public IWRRecognitionResult RecognizeSingle(WAVSound sound)
        {
            // Compute the features of the current sound
            sound.SubtractMean();
            double startTime = sound.GetFirstTimeAboveThreshold(0, soundExtractionMovingAverageLength,
                                                                soundExtractionThreshold);
            double endTime = sound.GetLastTimeAboveThreshold(0, soundExtractionMovingAverageLength,
                                                             soundExtractionThreshold);
            WAVSound extractedInstance = sound.Extract(startTime, endTime);

            if (extractedInstance == null)
            {
                return(null);
            }                                               // 20170114
            extractedInstance.PreEmphasize(preEmphasisThresholdFrequency);
            WAVFrameSet frameSet = new WAVFrameSet(extractedInstance, frameDuration, frameShift);

            frameSet.ApplyHammingWindows(alpha);
            SoundFeatureSet     soundFeatureSet            = new SoundFeatureSet();
            List <SoundFeature> autoCorrelationFeatureList = frameSet.GetAutoCorrelationSeries("AutoCorrelation", autoCorrelationOrder);

            soundFeatureSet.FeatureList.AddRange(autoCorrelationFeatureList);
            List <SoundFeature> lpcAndCepstralFeatureList = frameSet.GetLPCAndCepstralSeries("LPC", lpcOrder, "Cepstral", cepstralOrder);

            soundFeatureSet.FeatureList.AddRange(lpcAndCepstralFeatureList);
            SoundFeature relativeNumberOfZeroCrossingsFeature = frameSet.GetRelativeNumberOfZeroCrossingsSeries("RNZC");

            soundFeatureSet.FeatureList.Add(relativeNumberOfZeroCrossingsFeature);

            soundFeatureSet.SetNormalizedTime();
            soundFeatureSet.Interpolate(numberOfValuesPerFeature);

            IWRRecognitionResult recognitionResult = new IWRRecognitionResult();

            recognitionResult.SoundFeatureSet = soundFeatureSet;
            if (averageSoundFeatureSetList != null)
            {
                foreach (SoundFeatureSet averageSoundFeatureSet in averageSoundFeatureSetList)
                {
                    double deviation = SoundFeatureSet.GetDeviation(averageSoundFeatureSet, soundFeatureSet, weightList);
                    string soundName = averageSoundFeatureSet.Information;
                    recognitionResult.DeviationList.Add(new Tuple <string, double>(soundName, deviation));
                }
                recognitionResult.DeviationList.Sort((a, b) => a.Item2.CompareTo(b.Item2));
            }
            return(recognitionResult);
        }
Beispiel #3
0
        public void AppendSound(string name, List <WAVSound> instanceList)
        {
            List <SoundFeatureSet> soundFeatureSetList = new List <SoundFeatureSet>();

            // Compute the sound feature set for each instance
            foreach (WAVSound soundInstance in instanceList)
            {
                soundInstance.SubtractMean();
                double startTime = soundInstance.GetFirstTimeAboveThreshold(0, soundExtractionMovingAverageLength,
                                                                            soundExtractionThreshold);
                double endTime = soundInstance.GetLastTimeAboveThreshold(0, soundExtractionMovingAverageLength,
                                                                         soundExtractionThreshold);
                WAVSound extractedInstance = soundInstance.Extract(startTime, endTime);
                extractedInstance.PreEmphasize(preEmphasisThresholdFrequency);
                WAVFrameSet frameSet = new WAVFrameSet(extractedInstance, frameDuration, frameShift);
                frameSet.ApplyHammingWindows(alpha);
                SoundFeatureSet     soundFeatureSet            = new SoundFeatureSet();
                List <SoundFeature> autoCorrelationFeatureList = frameSet.GetAutoCorrelationSeries("AutoCorrelation", autoCorrelationOrder);
                soundFeatureSet.FeatureList.AddRange(autoCorrelationFeatureList);
                List <SoundFeature> lpcAndCepstralFeatureList = frameSet.GetLPCAndCepstralSeries("LPC", lpcOrder, "Cepstral", cepstralOrder);
                soundFeatureSet.FeatureList.AddRange(lpcAndCepstralFeatureList);
                SoundFeature relativeNumberOfZeroCrossingsFeature = frameSet.GetRelativeNumberOfZeroCrossingsSeries("RNZC");
                soundFeatureSet.FeatureList.Add(relativeNumberOfZeroCrossingsFeature);

                soundFeatureSet.SetNormalizedTime();
                soundFeatureSet.Interpolate(numberOfValuesPerFeature);
                soundFeatureSetList.Add(soundFeatureSet);
            }
            SoundFeatureSet averageSoundFeatureSet = SoundFeatureSet.GenerateAverage(soundFeatureSetList);

            averageSoundFeatureSet.Information = name; // The name of the stored sound.
            if (averageSoundFeatureSetList == null)
            {
                averageSoundFeatureSetList = new List <SoundFeatureSet>();
            }
            averageSoundFeatureSetList.Add(averageSoundFeatureSet);
            averageSoundFeatureSetList.Sort((a, b) => a.Information.CompareTo(b.Information)); // Perhaps remove?
            OnAvailableSoundsChanged();
        }