public void FindSpeechTypeVariation(WAVSound sound) // , int channel, double frameDuration, double frameShift, // double lowPassCutoffFrequency, double lowPassRatioThreshold, double energyThreshold, double silenceThreshold) { WAVFrameSet frameSet = new WAVFrameSet(sound, frameDuration, frameShift); speechTypeSpecification = new SpeechTypeSpecification(); double time = 0; for (int ii = 0; ii < frameSet.FrameList.Count; ii++) { WAVSound frame = frameSet.FrameList[ii]; SpeechType speechType = this.GetFrameSpeechType(frame); // // SpeechType speechType = this.GetFrameSpeechType(frame, channel, lowPassCutoffFrequency, lowPassRatioThreshold, energyThreshold, silenceThreshold); time = frameSet.StartTimeList[ii] + frameDuration / 2; // The speech type is assigned to the center of the frame speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(time, speechType)); } // Finally, to make sure that the speech type can be interpolated over the entire sound, set the // end values: SpeechType firstSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList[0].Item2; speechTypeSpecification.TimeSpeechTypeTupleList.Insert(0, new Tuple <double, SpeechType>(0, firstSpeechType)); SpeechType lastSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item2; double duration = sound.GetDuration(); if (speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item1 < duration) // Will ALMOST always be the case, unless the duration is an exact multiple of the frame shift { speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(duration, lastSpeechType)); } for (int jj = 0; jj < numberOfAdjustmentSteps; jj++) { Adjust(); } }
public IWRRecognitionResult RecognizeSingle(WAVSound sound) { // Compute the features of the current sound sound.SubtractMean(); double startTime = sound.GetFirstTimeAboveThreshold(0, soundExtractionMovingAverageLength, soundExtractionThreshold); double endTime = sound.GetLastTimeAboveThreshold(0, soundExtractionMovingAverageLength, soundExtractionThreshold); WAVSound extractedInstance = sound.Extract(startTime, endTime); if (extractedInstance == null) { return(null); } // 20170114 extractedInstance.PreEmphasize(preEmphasisThresholdFrequency); WAVFrameSet frameSet = new WAVFrameSet(extractedInstance, frameDuration, frameShift); frameSet.ApplyHammingWindows(alpha); SoundFeatureSet soundFeatureSet = new SoundFeatureSet(); List <SoundFeature> autoCorrelationFeatureList = frameSet.GetAutoCorrelationSeries("AutoCorrelation", autoCorrelationOrder); soundFeatureSet.FeatureList.AddRange(autoCorrelationFeatureList); List <SoundFeature> lpcAndCepstralFeatureList = frameSet.GetLPCAndCepstralSeries("LPC", lpcOrder, "Cepstral", cepstralOrder); soundFeatureSet.FeatureList.AddRange(lpcAndCepstralFeatureList); SoundFeature relativeNumberOfZeroCrossingsFeature = frameSet.GetRelativeNumberOfZeroCrossingsSeries("RNZC"); soundFeatureSet.FeatureList.Add(relativeNumberOfZeroCrossingsFeature); soundFeatureSet.SetNormalizedTime(); soundFeatureSet.Interpolate(numberOfValuesPerFeature); IWRRecognitionResult recognitionResult = new IWRRecognitionResult(); recognitionResult.SoundFeatureSet = soundFeatureSet; if (averageSoundFeatureSetList != null) { foreach (SoundFeatureSet averageSoundFeatureSet in averageSoundFeatureSetList) { double deviation = SoundFeatureSet.GetDeviation(averageSoundFeatureSet, soundFeatureSet, weightList); string soundName = averageSoundFeatureSet.Information; recognitionResult.DeviationList.Add(new Tuple <string, double>(soundName, deviation)); } recognitionResult.DeviationList.Sort((a, b) => a.Item2.CompareTo(b.Item2)); } return(recognitionResult); }
public void AppendSound(string name, List <WAVSound> instanceList) { List <SoundFeatureSet> soundFeatureSetList = new List <SoundFeatureSet>(); // Compute the sound feature set for each instance foreach (WAVSound soundInstance in instanceList) { soundInstance.SubtractMean(); double startTime = soundInstance.GetFirstTimeAboveThreshold(0, soundExtractionMovingAverageLength, soundExtractionThreshold); double endTime = soundInstance.GetLastTimeAboveThreshold(0, soundExtractionMovingAverageLength, soundExtractionThreshold); WAVSound extractedInstance = soundInstance.Extract(startTime, endTime); extractedInstance.PreEmphasize(preEmphasisThresholdFrequency); WAVFrameSet frameSet = new WAVFrameSet(extractedInstance, frameDuration, frameShift); frameSet.ApplyHammingWindows(alpha); SoundFeatureSet soundFeatureSet = new SoundFeatureSet(); List <SoundFeature> autoCorrelationFeatureList = frameSet.GetAutoCorrelationSeries("AutoCorrelation", autoCorrelationOrder); soundFeatureSet.FeatureList.AddRange(autoCorrelationFeatureList); List <SoundFeature> lpcAndCepstralFeatureList = frameSet.GetLPCAndCepstralSeries("LPC", lpcOrder, "Cepstral", cepstralOrder); soundFeatureSet.FeatureList.AddRange(lpcAndCepstralFeatureList); SoundFeature relativeNumberOfZeroCrossingsFeature = frameSet.GetRelativeNumberOfZeroCrossingsSeries("RNZC"); soundFeatureSet.FeatureList.Add(relativeNumberOfZeroCrossingsFeature); soundFeatureSet.SetNormalizedTime(); soundFeatureSet.Interpolate(numberOfValuesPerFeature); soundFeatureSetList.Add(soundFeatureSet); } SoundFeatureSet averageSoundFeatureSet = SoundFeatureSet.GenerateAverage(soundFeatureSetList); averageSoundFeatureSet.Information = name; // The name of the stored sound. if (averageSoundFeatureSetList == null) { averageSoundFeatureSetList = new List <SoundFeatureSet>(); } averageSoundFeatureSetList.Add(averageSoundFeatureSet); averageSoundFeatureSetList.Sort((a, b) => a.Information.CompareTo(b.Information)); // Perhaps remove? OnAvailableSoundsChanged(); }