public int Recognize(ISoundSignalReader signal, HiddenMarkovClassifier hmm, out string name, SignalVisitor voiceVisitor = null) { var featureUtility = new FeatureUtility(_engineParameters); signal.Reset(); var features = featureUtility.ExtractFeatures(signal, voiceVisitor).First(); var observations = _codeBook.Quantize(features.Select(item => new Point(item)).ToArray()); double[] responsabilities; var ret = hmm.Compute(observations, out responsabilities); var models = hmm.Models; var likelyHoodValue = Double.MinValue; name = string.Empty; foreach (var model in models) { var val = model.Evaluate(observations); if (val > likelyHoodValue) { likelyHoodValue = val; name = model.Tag.ToString(); } } return ret; }
public int Recognize(ISoundSignalReader signal, HiddenMarkovModel[] models, out string name, SignalVisitor voiceVisitor = null) { var featureUtility = new FeatureUtility(_engineParameters); signal.Reset(); var features = featureUtility.ExtractFeatures(signal, voiceVisitor).First(); var observations = _codeBook.Quantize(features.Select(item => new Point(item)).ToArray()); var likelyHoodValue = Double.MinValue; name = string.Empty; var index = 0; var ret = 0; foreach (var model in models) { var val = model.Evaluate(observations); if (val > likelyHoodValue) { likelyHoodValue = val; name = model.Tag.ToString(); ret = index; } index++; } return ret; }
public VoiceActivityDetection(ISoundSignalReader signal, int frameSize, int emptyFrames = 3, VoiceActivationDetectionEnhancement enhancements = VoiceActivationDetectionEnhancement.All) { _signal = signal; _frameSize = frameSize; _emptyFrames = emptyFrames; _enhancements = enhancements; Init(); }
public VoiceActivitySignalReader(ISoundSignalReader signal, int frameSize, int emptyFrames = 3, VoiceActivationDetectionEnhancement enhancements = VoiceActivationDetectionEnhancement.All) { _voiceActivityDetection = new VoiceActivityDetection(signal, frameSize, emptyFrames, enhancements); signal.Reset(); _soundSignal = signal; _properties = new Dictionary<string, object> {{VoiceProperty, false}}; SupportedPropertiesSet.Add(VoiceProperty); }
public void ExtractFeaturesAsync(ISoundSignalReader signal, Action<List<double[]>> action, SignalVisitor voiceVisitor = null) { Action<List<double[]>> addfeatures = features => { action.BeginInvoke(features, null, null); }; ExtractFeaturesInternalUsingVad(signal, addfeatures, voiceVisitor); }
public IEnumerable<List<double[]>> ExtractFeatures(ISoundSignalReader signal, SignalVisitor voiceVisitor = null) { List<List<Double[]>> allObservables = new List<List<Double[]>>(); Action<List<double[]>> addfeatures = features => { allObservables.Add(features); }; ExtractFeaturesInternalUsingVad(signal, addfeatures, voiceVisitor); return allObservables; }
public static IFeatureProvider GetProvider(FeatureProviderParameters parameters, ISoundSignalReader signal) { var lpcParam = parameters as LpcCepstrumParameters; if (lpcParam!= null) { return new LpcCepstrumProvider(lpcParam.LpcOrder, lpcParam.NumberOfCoeff); } var melParam = parameters as MellFreqProviderParameters; if (melParam != null) { //var mfcc = new MFCC(NumberOfFilterBanks, samplingRate: signal.SampleRate, lowerfrequency: 50); return new MellFrequencyCepstrumProvider(melParam.NumberOfFilterBanks, samplingRate: signal.SampleRate, lowerfrequency: 50, numberOfCoef: melParam.NumberOfExtractedMelItems + 1); } return null; }
public void RecognizeAsync(ISoundSignalReader signal, HiddenMarkovClassifier hmm, Action<string> handleMessage, SignalVisitor voiceVisitor = null) { Action<List<double[]>> action = features => { var observations = _codeBook.Quantize(features.Select(item => new Point(item)).ToArray()); double[] responsabilities; var ret = hmm.Compute(observations, out responsabilities); var models = hmm.Models; var likelyHoodValue = Double.MinValue; foreach (var model in models) { var val = model.Evaluate(observations); if (val > likelyHoodValue) { likelyHoodValue = val; } } handleMessage(hmm[ret].Tag.ToString()); }; var featureUtility = new FeatureUtility(_engineParameters); featureUtility.ExtractFeaturesAsync(signal, action, voiceVisitor); }
public PreemphasisFilter(ISoundSignalReader signalReader, float filterRatio) { _signalReader = signalReader; _filterRatio = filterRatio; Length = signalReader.Length; }
public void RecognizeAsync(ISoundSignalReader signal, Action<string> handleMessage, SignalVisitor visitor = null) { Action<List<double[]>> action = features => { var observations = _codeBook.Quantize(features.Select(item => new Point(item)).ToArray()); var likelyHoodValue = Double.MinValue; HiddenMarkovModel bestFit = null; var modelsToSearchFor = _models.Values.Where(item => { var idProp = (IdentificationProperties) item.Tag; var rateLength = Math.Abs(idProp.MeanFeaturesLength - observations.Length)/idProp.MeanFeaturesLength; return rateLength < 0.1; }).ToArray(); foreach (var model in modelsToSearchFor) { var val = model.Evaluate(observations); if (val > likelyHoodValue) { likelyHoodValue = val; bestFit = model; } } if (bestFit != null) { var idProp = (IdentificationProperties)bestFit.Tag; handleMessage(idProp.Label); } }; var featureUtility = new FeatureUtility(_engineParameters); featureUtility.ExtractFeaturesAsync(signal, action, visitor); }
public int Recognize(ISoundSignalReader signal, out string name, SignalVisitor visitor = null) { var featureUtility = new FeatureUtility(_engineParameters); signal.Reset(); var features = featureUtility.ExtractFeatures(signal, visitor).First(); var observations = _codeBook.Quantize(features.Select(item => new Point(item)).ToArray()); var likelyHoodValue = Double.MinValue; HiddenMarkovModel bestFit = null; var index = 0; var ret = 0; foreach (var model in _models.Values) { var val = model.Evaluate(observations); if (val > likelyHoodValue) { likelyHoodValue = val; bestFit = model; ret = index; } index++; } if (bestFit != null) { var idProp = (IdentificationProperties) bestFit.Tag; name = idProp.Label; } else name = string.Empty; return ret; }
private void ExtractFeaturesInternalUsingVad(ISoundSignalReader signal, Action<List<double[]>> featureExtracted, SignalVisitor voiceVisitor) { var featureProvider = FeaturesProviderFactory.GetProvider(ProviderParameters, signal); var frameSize = (int) Math.Floor(signal.SampleRate*FrameSizeMiliseconds/1000.0); var stepSize = (int) Math.Floor(signal.SampleRate*StepSizeMiliseconds/1000.0); var filteredSignal = new PreemphasisFilter(signal, 0.95f); float[] frame; var voiceStream = new VoiceActivitySignalReader(filteredSignal, frameSize, 8); if (voiceVisitor != null) { voiceStream.Accept(voiceVisitor); } int index = 0, silentSamples = 0, noOfItems = ProviderParameters.NumberOfCoeff - 1; var observables = new List<double[]>(); bool isVoice; while (voiceStream.Read(frameSize, stepSize, out frame, out isVoice)) { if (isVoice) { bool isEmpty; var features = featureProvider.Extract(frame, out isEmpty); silentSamples = 0; observables.Add(features); if (featureProvider.ComputeDelta) { ComputeDelta(observables, index - 1, 1, noOfItems); ComputeDelta(observables, index - 2, noOfItems + 1, noOfItems, true); } index++; } else if (observables.Count > 0 && silentSamples > SilenceThreshHold) { if (index >= MinWordLength) { if (featureProvider.ComputeDelta) { ComputeDelta(observables, index - 1, 1, noOfItems); ComputeDelta(observables, index - 2, noOfItems + 1, noOfItems, true); ComputeDelta(observables, index - 1, noOfItems + 1, noOfItems, true); } featureExtracted(observables); } observables = new List<double[]>(); index = 0; } else { silentSamples++; } } }