private void UpdateRow(TrainFilesCarrier.TrainFileRow row) { string file = VCDir.Instance.TrainDirAudio + row.Path; if (File.Exists(file)) { _yourChanged = true; _yourPath = file; _yourWav = new WavFileWrapper(_yourPath); if (_yourWav.Load()) { _yourWav.NormalizeWave(1.0); LogUtil.Info("Load Wave: {0} -- OK\n", _yourPath); option = VCContext.Instance.MFCCOptions; if (option.ShiftSampleToZero) { LogUtil.Info("Shift Sample To Zero: -- -- OK\n"); _yourWav.ShifToZero(); } waveViewer.WaveData = _yourWav.FullData; waveViewer.FitToScreen(); _label = row.Word; int size = _yourWav.FullData.Count; _startSelected = (float)row.Start / size; _endSelected = (float)row.End / size; waveViewer.LeftSlider = _startSelected; waveViewer.RightSlider = _endSelected; } } }
public MainTestControlTest() { option = VCContext.Instance.MFCCOptions; //ExtractionWrapper.OptionWrapper.SetLog(option.EnableLog); InitializeComponent(); selectedChart = showChart.Selected; }
private void SettingChanged(object obj, SettingEventArgs e) { leftChanged = true; rightChanged = true; option = e.Option; //ExtractionWrapper.OptionWrapper.SetLog(option.EnableLog); }
private void SetValueOnGUI(MfccOptions opt) { if (InvokeRequired) { Action <MfccOptions> action = new Action <MfccOptions>(SetValueOnGUI); Invoke(action, new object[] { opt }); return; } else { //if (options.CepFilter != opt.CepFilter) { cepfilter_tb.Text = String.Format("{0:0}", opt.CepFilter); //} //if (options.NumCeps != opt.NumCeps) { numceps_tb.Text = String.Format("{0:0}", opt.NumCeps); //} //if (options.LowFreq != opt.LowFreq) { lowfreq_tb.Text = String.Format("{0:0.000}", opt.LowFreq); //} //if (options.HighFreq != opt.HighFreq) { highfreq_tb.Text = String.Format("{0:0.000}", opt.HighFreq); //} //if (options.TimeFrame != opt.TimeFrame) { timeframe_tb.Text = String.Format("{0:0.000}", opt.TimeFrame); //} //if (options.TimeShift != opt.TimeShift) { timeshift_tb.Text = String.Format("{0:0.000}", opt.TimeShift); //} } }
private void SettingChanged(object obj, SettingEventArgs e) { _refChanged = true; _yourChanged = true; option = e.Option; ExtractionWrapper.OptionWrapper.SetLog(e.Option.LogLevel); }
private void SettingChanged(object obj, SettingEventArgs e) { refChanged = true; yourChanged = true; option = e.Option; ExtractionWrapper.OptionWrapper.SetLog(option.EnableLog); }
public MainTestControl() { option = new MfccOptions(); ExtractionWrapper.OptionWrapper.SetLog(option.EnableLog); InitializeComponent(); selectedChart = showChart.Selected; initListWords(); }
private void Update(MfccOptions opt) { this.cepfilter_tb.Text = opt.CepFilter.ToString(); this.highfreq_tb.Text = opt.HighFreq.ToString(); this.numceps_tb.Text = opt.NumCeps.ToString(); this.lowfreq_tb.Text = opt.LowFreq.ToString(); this.timeshift_tb.Text = opt.TimeShift.ToString(); this.timeframe_tb.Text = opt.TimeFrame.ToString(); this.useStandardization_cbx.Checked = opt.UseStandardization; this.pitchtype_cbx.SelectedIndex = opt.PitchType; this.yinThreshold_tbx.Text = opt.YinThreshhold.ToString(); this.hightFreq_tbx.Text = opt.PitchHighFreq.ToString(); this.lowFreq_tbx.Text = opt.PitchLowFreq.ToString(); this.timeshift_tbl.Text = opt.PitchTimeShift.ToString(); this.timeframe_tbl.Text = opt.PitchTimeFrame.ToString(); this.median_cb.Checked = opt.UseMedian; this.median_tbl.Text = opt.MedianWindow.ToString(); this.removeUnpitch_cb.Checked = opt.DropUnPitch; this.pitch_tbx.Text = opt.PitchThreshold.ToString(); this.energy_txb.Text = opt.EnergyThreshold.ToString(); // Train Tab: hmmStateNum_tbx.Text = opt.TrainHMMState.ToString(); gmmCompNum_tbx.Text = opt.TrainGMMComponent.ToString(); dataType_cbx.SelectedIndex = (int)opt.TrainCofficientType; gmmCoVarType_cbx.SelectedIndex = (int)opt.TrainGMMCovVar; normal_audio_cbx.Checked = opt.NormalizeAudio; remove_noise_cbx.Checked = opt.RemoveNoiseYourAudio; shiftToZero_cbx.Checked = opt.ShiftSampleToZero; // Log enanblelog_cbx.Checked = opt.EnableLog; logLevel_cbx.SelectedIndex = 0; if (opt.LogLevel == (int)LOGLEVEL.STEP) { logLevel_cbx.SelectedIndex = 1; } else if (opt.LogLevel == (int)LOGLEVEL.INFORMATION) { logLevel_cbx.SelectedIndex = 2; } else if (opt.LogLevel == (int)LOGLEVEL.DETAIL) { logLevel_cbx.SelectedIndex = 3; } else if (opt.LogLevel == (int)LOGLEVEL.DATA) { logLevel_cbx.SelectedIndex = 4; } }
public MainControl() { option = VCContext.Instance.MFCCOptions; //ExtractionWrapper.OptionWrapper.SetLog(option.EnableLog); recoding = false; InitializeComponent(); waveViewer.TimeSelectedChanged += SelectedTimeEventHandler; _waveOut = new NAudio.Wave.WaveOut(); _selectedChart = showChart.Selected; initSampleRate_cbx(); //initListWords(); FreshListDevices(); }
private VCContext() { _watcherListFile = new FileSystemWatcher(VCDir.Instance.XMLDir); _watcherListFile.NotifyFilter = NotifyFilters.LastWrite; _watcherListFile.Filter = ".xml"; _watcherListFile.Changed += FileSystemChangeEventHandler; _watcherListFile.EnableRaisingEvents = true; option = new MfccOptions(); option.LoadFromXML(VCDir.Instance.SettingFile); PaserWordTask listWordParser = new PaserWordTask(); if (listWordParser.LoadData(VCDir.Instance.ListWordDir)) { _listAmTiet = listWordParser.ListAmTiet; } }
private void bgw_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) { Debug.WriteLine("Complete Procesing: At - {0}", DateTime.Now); ShowChart(); _refChanged = false; _yourChanged = false; process_btn.Enabled = true; option = VCContext.Instance.MFCCOptions; if ((bool)e.Result) { reg_btn.Enabled = true; } if (option.SeparateLog && option.EnableLog && (_refChanged || _yourChanged)) { ExtractionWrapper.OptionWrapper.SeparateLog(); } }
public TestControl() { locationChart = new Point[8]; locationChart[0] = new Point(Screen.PrimaryScreen.WorkingArea.X + 10, Screen.PrimaryScreen.WorkingArea.Y + 10); // yourvoice_wave = new WaveViewerForm(); refvoice_wave = new WaveViewerForm(); yourvoice_freq = new MfccChartForm(); refvoice_freq = new MfccChartForm(); locationChart[1] = new Point(locationChart[0].X + yourvoice_freq.Width + 10, locationChart[0].Y); locationChart[2] = new Point(locationChart[0].X, locationChart[0].Y + yourvoice_freq.Height + 10); locationChart[3] = new Point(locationChart[0].X + yourvoice_freq.Width + 10, locationChart[0].Y + yourvoice_freq.Height + 10); yourvoice_mfcc = new MfccChartForm(); refvoice_mfcc = new MfccChartForm(); locationChart[4] = new Point(locationChart[0].X, locationChart[0].Y + 2 * yourvoice_freq.Height + 20); locationChart[5] = new Point(locationChart[0].X + yourvoice_freq.Width + 10, locationChart[0].Y + 2 * yourvoice_freq.Height + 20); yourvoice_detal = new MfccChartForm(); refvoice_detal = new MfccChartForm(); locationChart[6] = new Point(locationChart[0].X, locationChart[0].Y + 2 * yourvoice_freq.Height + 20); locationChart[7] = new Point(locationChart[0].X + yourvoice_freq.Width + 10, locationChart[0].Y + 2 * yourvoice_freq.Height + 20); yourvoice_double = new MfccChartForm(); refvoice_double = new MfccChartForm(); InitializeComponent(); selectShowChart.ShowChartYourWave += DisplayYourWaveChart; selectShowChart.ShowChartYourFreq += DisplayYourFreqChart; selectShowChart.ShowChartYourMfcc += DisplayYourMfccChart; selectShowChart.ShowChartYourDetal += DisplayYourDetalChart; selectShowChart.ShowChartYourDouble += DisplayYourDoubleChart; selectShowChart.ShowChartRefWave += DisplayRefWaveChart; selectShowChart.ShowChartRefFreq += DisplayRefFreqChart; selectShowChart.ShowChartRefMfcc += DisplayRefMfccChart; selectShowChart.ShowChartRefDetal += DisplayRefDetalChart; selectShowChart.ShowChartRefDouble += DisplayRefDoubleChart; mfcc_setting = new MfccOptions(); }
public void TestOnlineFeatureExtractor() { var mfccOptions = new MfccOptions { SamplingRate = 8000, FeatureCount = 5, FrameSize = 256, HopSize = 50, FilterBankSize = 8 }; var signal = new WhiteNoiseBuilder().OfLength(1000).Build(); var mfccExtractor = new MfccExtractor(mfccOptions); var mfccVectors = mfccExtractor.ComputeFrom(signal); var onlineMfccExtractor = new OnlineFeatureExtractor(new MfccExtractor(mfccOptions)); var onlineMfccVectors = new List <float[]>(); var i = 0; while (i < signal.Length) { // emulating online blocks with different sizes: var size = (i + 1) * 15; var block = signal.Samples.Skip(i).Take(size).ToArray(); var newVectors = onlineMfccExtractor.ComputeFrom(block); onlineMfccVectors.AddRange(newVectors); i += size; } var diff = mfccVectors.Zip(onlineMfccVectors, (e, o) => e.Zip(o, (f1, f2) => f1 - f2).Sum()); Assert.That(diff, Is.All.EqualTo(0).Within(1e-7f)); }
private void vadVoice() { option = VCContext.Instance.MFCCOptions; if (_yourWav != null && _yourWav.IsValid) { VadWrapper vad = new VadWrapper(_yourWav); ZeroRateWrapper zrc = new ZeroRateWrapper(_yourWav, 0.02f, 0.01f, true); vad.UseEnergy(0.015f, 0.01f, true, 3, false); zrc.Process(); if (vad.Process(option.EnergyThreshold)) { float begin = 0; float end = 0; uint deta = 0; for (uint i = 0; i < vad.GetSizeOfSegment(); i++) { if (vad.GetEndSegment(i) - vad.GetStartSegment(i) > deta) { deta = vad.GetEndSegment(i) - vad.GetStartSegment(i); begin = (float)vad.GetStartSegment(i); end = (float)vad.GetEndSegment(i); } } if (deta > 0) { int size = _yourWav.FullData.Count; _startSelected = begin / size; _endSelected = end / size; waveViewer.LeftSlider = _startSelected; waveViewer.RightSlider = _endSelected; waveViewer.ThresholdChart = (float)vad.ThresholdEnergy; //waveViewer.Chart = vad.SmoothEnergies; waveViewer.ChartBlue = zrc.ZeroRate; } } } }
static void Main(string[] args) { DiscreteSignal signal; // load var mfcc_no = 24; var samplingRate = 16000; var mfccOptions = new MfccOptions { SamplingRate = samplingRate, FeatureCount = mfcc_no, FrameDuration = 0.025 /*sec*/, HopDuration = 0.010 /*sec*/, PreEmphasis = 0.97, Window = WindowTypes.Hamming }; var opts = new MultiFeatureOptions { SamplingRate = samplingRate, FrameDuration = 0.025, HopDuration = 0.010 }; var tdExtractor = new TimeDomainFeaturesExtractor(opts); var mfccExtractor = new MfccExtractor(mfccOptions); var folders = Directory.GetDirectories(Path.Combine(Environment.CurrentDirectory, "Dataset")); Console.WriteLine($"Started!"); using (var writer = File.CreateText(Path.Combine(Environment.CurrentDirectory, "Data.csv"))) { //Write header var main_header = "genre,"; main_header += String.Join(",", mfccExtractor.FeatureDescriptions); main_header += ","; main_header += String.Join(",", tdExtractor.FeatureDescriptions); main_header += ",centroid,spread,flatness,noiseness,roloff,crest,decrease,spectral_entropy"; writer.WriteLine(main_header); string feature_string = String.Empty; foreach (var folder in folders) { var f_name = new DirectoryInfo(folder).Name; var files = Directory.GetFiles(Path.Combine(Environment.CurrentDirectory, "Dataset", folder)); //Write the genre label here Console.WriteLine($"{f_name}"); foreach (var filename in files) { feature_string = String.Empty; feature_string = $"{f_name},"; //MFCC var avg_vec_mfcc = new List <float>(mfcc_no + 1); //TD Features var avg_vec_td = new List <float>(4); //Spectral features var avg_vec_spect = new List <float>(10); for (var i = 0; i < mfcc_no; i++) { avg_vec_mfcc.Add(0f); } for (var i = 0; i < 4; i++) { avg_vec_td.Add(0f); } for (var i = 0; i < 10; i++) { avg_vec_spect.Add(0f); } string specFeatures = String.Empty; using (var stream = new FileStream(Path.Combine(Environment.CurrentDirectory, "Dataset", filename), FileMode.Open)) { var waveFile = new WaveFile(stream); signal = waveFile[Channels.Average]; //Compute MFCC tdVectors = tdExtractor.ComputeFrom(signal); mfccVectors = mfccExtractor.ComputeFrom(signal); var fftSize = 1024; var fft = new Fft(fftSize); var resolution = (float)samplingRate / fftSize; var frequencies = Enumerable.Range(0, fftSize / 2 + 1) .Select(f => f * resolution) .ToArray(); var spectrum = new Fft(fftSize).MagnitudeSpectrum(signal).Samples; var centroid = Spectral.Centroid(spectrum, frequencies); var spread = Spectral.Spread(spectrum, frequencies); var flatness = Spectral.Flatness(spectrum, 0); var noiseness = Spectral.Noiseness(spectrum, frequencies, 3000); var rolloff = Spectral.Rolloff(spectrum, frequencies, 0.85f); var crest = Spectral.Crest(spectrum); var decrease = Spectral.Decrease(spectrum); var entropy = Spectral.Entropy(spectrum); specFeatures = $"{centroid},{spread},{flatness},{noiseness},{rolloff},{crest},{decrease},{entropy}"; } //Write label here TODO foreach (var inst in mfccVectors) { for (var i = 0; i < mfcc_no; i++) { avg_vec_mfcc[i] += inst[i]; } } foreach (var inst in tdVectors) { for (var i = 0; i < 4; i++) { avg_vec_td[i] += inst[i]; } } for (var i = 0; i < mfcc_no; i++) { avg_vec_mfcc[i] /= mfccVectors.Count; } for (var i = 0; i < 4; i++) { avg_vec_td[i] /= tdVectors.Count; } // Write MFCCs feature_string += String.Join(",", avg_vec_mfcc); feature_string += ","; feature_string += String.Join(",", avg_vec_td); //Write Spectral features as well feature_string += ","; feature_string += specFeatures; writer.WriteLine(feature_string); var file_name = new DirectoryInfo(filename).Name; Console.WriteLine($"{file_name}"); } } } Console.WriteLine($"DONE"); Console.ReadLine(); }
public MfccSetting() { options = new MfccOptions(); options.Reset(); InitializeComponent(); }
public MfccExtractorTestHtk(MfccOptions options) : base(options) { _hammingWin = Window.OfType(WindowTypes.Hamming, FrameSize); }
/// <summary> /// Constructs extractor from configuration <paramref name="options"/>. /// </summary> public MfccExtractor(MfccOptions options) : base(options) { FeatureCount = options.FeatureCount; var filterbankSize = options.FilterBankSize; if (options.FilterBank is null) { _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize); var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency); FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel); // HTK/Kaldi-style } else { FilterBank = options.FilterBank; filterbankSize = FilterBank.Length; _blockSize = 2 * (FilterBank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _fft = new RealFft(_blockSize); _lifterSize = options.LifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _includeEnergy = options.IncludeEnergy; _logEnergyFloor = options.LogEnergyFloor; // setup DCT: ============================================================================ _dctType = options.DctType; switch (_dctType[0]) { case '1': _dct = new Dct1(filterbankSize); break; case '3': _dct = new Dct3(filterbankSize); break; case '4': _dct = new Dct4(filterbankSize); break; default: _dct = new Dct2(filterbankSize); break; } if (_dctType.EndsWith("N", StringComparison.OrdinalIgnoreCase)) { _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs); } else { _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs); } // setup spectrum post-processing: ======================================================= _logFloor = options.LogFloor; _nonLinearityType = options.NonLinearity; switch (_nonLinearityType) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break; } _spectrumType = options.SpectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; default: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _melSpectrum = new float[filterbankSize]; }
private MfccOptions Get() { MfccOptions options = new MfccOptions(); try { //MFCC if (cepfilter_tb.Text.Length > 0) { options.CepFilter = Convert.ToUInt32(cepfilter_tb.Text); } if (numceps_tb.Text.Length > 0) { options.NumCeps = Convert.ToUInt32(numceps_tb.Text); } if (lowfreq_tb.Text.Length > 0) { options.LowFreq = Convert.ToSingle(lowfreq_tb.Text); } if (highfreq_tb.Text.Length > 0) { options.HighFreq = Convert.ToSingle(highfreq_tb.Text); } if (timeframe_tb.Text.Length > 0) { options.TimeFrame = Convert.ToSingle(timeframe_tb.Text); } if (timeshift_tb.Text.Length > 0) { options.TimeShift = Convert.ToSingle(timeshift_tb.Text); } options.UseStandardization = useStandardization_cbx.Checked; //Pitch options.PitchType = pitchtype_cbx.SelectedIndex; if (yinThreshold_tbx.Text.Length > 0) { options.YinThreshhold = Convert.ToSingle(yinThreshold_tbx.Text); } if (hightFreq_tbx.Text.Length > 0) { options.PitchHighFreq = Convert.ToSingle(hightFreq_tbx.Text); } if (timeshift_tbl.Text.Length > 0) { options.PitchTimeShift = Convert.ToSingle(timeshift_tbl.Text); } if (timeframe_tbl.Text.Length > 0) { options.PitchTimeFrame = Convert.ToSingle(timeframe_tbl.Text); } if (lowFreq_tbx.Text.Length > 0) { options.PitchLowFreq = Convert.ToSingle(lowFreq_tbx.Text); } options.UseMedian = median_cb.Checked; if (median_tbl.Text.Length > 0) { options.MedianWindow = Convert.ToInt32(median_tbl.Text); } options.DropUnPitch = removeUnpitch_cb.Checked; // VAD if (energy_txb.Text.Length > 0) { options.EnergyThreshold = Convert.ToSingle(energy_txb.Text); } if (pitch_tbx.Text.Length > 0) { options.PitchThreshold = Convert.ToSingle(pitch_tbx.Text); } //Noise and Normalize options.NormalizeAudio = normal_audio_cbx.Checked; options.RemoveNoiseYourAudio = remove_noise_cbx.Checked; options.ShiftSampleToZero = shiftToZero_cbx.Checked; // Log options.EnableLog = enanblelog_cbx.Checked; int selectedText = logLevel_cbx.SelectedIndex; options.LogLevel = (int)LOGLEVEL.NONE; if (selectedText == 1) { options.LogLevel = (int)LOGLEVEL.STEP; } else if (selectedText == 2) { options.LogLevel = (int)LOGLEVEL.INFORMATION; } else if (selectedText == 3) { options.LogLevel = (int)LOGLEVEL.DETAIL; } else if (selectedText == 4) { options.LogLevel = (int)LOGLEVEL.DATA; } //Train options.TrainHMMState = (uint)Convert.ToInt32(hmmStateNum_tbx.Text); options.TrainGMMComponent = (uint)Convert.ToInt32(gmmCompNum_tbx.Text); options.TrainCofficientType = (uint)dataType_cbx.SelectedIndex; options.TrainGMMCovVar = (uint)gmmCoVarType_cbx.SelectedIndex; } catch (Exception) { } return(options); }
public SettingEventArgs(MfccOptions opt) { Option = opt; }
public SettingChangedEventArgs(MfccOptions value) { Value = value; }
private void showWaveSound() { _yourWav = new WavFileWrapper(_yourPath); if (_yourWav.Load()) { waveViewer.WaveData = _yourWav.FullData; waveViewer.FitToScreen(); vadVoice(); if (_regMode) { uint size = (uint)_yourWav.FullData.Count; uint begin = (uint)(_startSelected * size); uint end = (uint)(_endSelected * size); _yourWav.NormalizeWave(1.0f); option = VCContext.Instance.MFCCOptions; LogUtil.Info("Load Wave: {0} -- OK\n", _yourPath); if (option.ShiftSampleToZero) { LogUtil.Info("Shift Sample To Zero: -- -- OK\n"); _yourWav.ShifToZero(); } Debug.WriteLine("Select Data voice: Start {0} End {1}", begin, end); _yourWav.SelectedWave(begin, end); _yourMfcc = new MFCCWrapper(_yourWav, option.TimeFrame, option.TimeShift, option.CepFilter, option.LowFreq, option.HighFreq, option.NumCeps, 4); _yourMfcc.UserStandardization = option.UseStandardization; if (_yourMfcc != null && _yourMfcc.Process()) { List <List <double> > data = null; switch (VCContext.Instance.MFCCOptions.TrainCofficientType) { case 0: data = _yourMfcc.Mfcc; break; case 1: data = _yourMfcc.DetalMfcc; break; case 2: data = _yourMfcc.DoubleDetalMfcc; break; } Action act = new Action(() => { reg_lb.Text = _trainTask.Reg(data); }); Invoke(act); } else { MessageBox.Show(" Cant Extraction file {0}\n", _yourPath); } } } }
public void extractFeatures() { //NWaves //Initial setup if (_filePath != null) { DiscreteSignal signal; // load var mfcc_no = 24; var samplingRate = 44100; var mfccOptions = new MfccOptions { SamplingRate = samplingRate, FeatureCount = mfcc_no, FrameDuration = 0.025 /*sec*/, HopDuration = 0.010 /*sec*/, PreEmphasis = 0.97, Window = WindowTypes.Hamming }; var opts = new MultiFeatureOptions { SamplingRate = samplingRate, FrameDuration = 0.025, HopDuration = 0.010 }; var tdExtractor = new TimeDomainFeaturesExtractor(opts); var mfccExtractor = new MfccExtractor(mfccOptions); // Read from file. featureString = String.Empty; featureString = $"green,"; //MFCC var avg_vec_mfcc = new List <float>(mfcc_no + 1); //TD Features var avg_vec_td = new List <float>(4); //Spectral features var avg_vec_spect = new List <float>(10); for (var i = 0; i < mfcc_no; i++) { avg_vec_mfcc.Add(0f); } for (var i = 0; i < 4; i++) { avg_vec_td.Add(0f); } for (var i = 0; i < 10; i++) { avg_vec_spect.Add(0f); } string specFeatures = String.Empty; Console.WriteLine($"{tag} Reading from file"); using (var stream = new FileStream(_filePath, FileMode.Open)) { var waveFile = new WaveFile(stream); signal = waveFile[channel : Channels.Left]; ////Compute MFCC float[] mfvfuck = new float[25]; var sig_sam = signal.Samples; mfccVectors = mfccExtractor.ComputeFrom(sig_sam); var fftSize = 1024; tdVectors = tdExtractor.ComputeFrom(signal.Samples); var fft = new Fft(fftSize); var resolution = (float)samplingRate / fftSize; var frequencies = Enumerable.Range(0, fftSize / 2 + 1) .Select(f => f * resolution) .ToArray(); var spectrum = new Fft(fftSize).MagnitudeSpectrum(signal).Samples; var centroid = Spectral.Centroid(spectrum, frequencies); var spread = Spectral.Spread(spectrum, frequencies); var flatness = Spectral.Flatness(spectrum, 0); var noiseness = Spectral.Noiseness(spectrum, frequencies, 3000); var rolloff = Spectral.Rolloff(spectrum, frequencies, 0.85f); var crest = Spectral.Crest(spectrum); var decrease = Spectral.Decrease(spectrum); var entropy = Spectral.Entropy(spectrum); specFeatures = $"{centroid},{spread},{flatness},{noiseness},{rolloff},{crest},{decrease},{entropy}"; //} Console.WriteLine($"{tag} All features ready"); for (int calibC = 0; calibC < mfccVectors.Count; calibC += (mfccVectors.Count / duration) - 1) { featureString = String.Empty; var tmp = new ModelInput(); for (var i = 0; i < mfcc_no; i++) { avg_vec_mfcc[i] = mfccVectors[calibC][i]; } for (var i = 0; i < 4; i++) { avg_vec_td[i] = tdVectors[calibC][i]; } featureString += String.Join(",", avg_vec_mfcc); featureString += ","; featureString += String.Join(",", avg_vec_td); featureString += ","; featureString += specFeatures; Console.WriteLine($"{tag} Feature String ready {featureString}"); if (File.Exists(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"))) { File.Delete(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp")); File.WriteAllText(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"), featureString); } else { File.WriteAllText(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"), featureString); } MLContext mLContext = new MLContext(); string fileName = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"); IDataView dataView = mLContext.Data.LoadFromTextFile <ModelInput>( path: fileName, hasHeader: false, separatorChar: ',', allowQuoting: true, allowSparse: false); // Use first line of dataset as model input // You can replace this with new test data (hardcoded or from end-user application) ModelInput sampleForPrediction = mLContext.Data.CreateEnumerable <ModelInput>(dataView, false) .First(); ModelOutput opm = ConsumeModel.Predict(sampleForPrediction); featureTimeList.Add(opm.Score); Console.WriteLine($"{tag} Feature vs time list ready"); } //Console.WriteLine($"{tag} MFCC: {mfccVectors.Count}"); //Console.WriteLine($"{tag} TD: {tdVectors.Count}"); //Console.WriteLine($"{tag} featureTimeArray: {featureTimeList.Count} {featureString}"); } } }
async public void extractFeatures(string _filepath, StorageFile sf) { op = new float[10]; tdVectors = new List <float[]>(); mfccVectors = new List <float[]>(); featureTimeList = new List <float[]>(); //NWaves FilePath = _filepath; PredictedLabel = "Ready!."; //player.Load(GetStreamFromFile(FilePath)); //player.Play(); mMedia.Source = MediaSource.CreateFromStorageFile(sf); bool test = player.IsPlaying; mMedia.AutoPlay = true; MusicProperties properties = await sf.Properties.GetMusicPropertiesAsync(); TimeSpan myTrackDuration = properties.Duration; duration = Convert.ToInt32(myTrackDuration.TotalSeconds); if (FilePath != null) { DiscreteSignal signal; // load var mfcc_no = 24; var samplingRate = 44100; var mfccOptions = new MfccOptions { SamplingRate = samplingRate, FeatureCount = mfcc_no, FrameDuration = 0.025 /*sec*/, HopDuration = 0.010 /*sec*/, PreEmphasis = 0.97, Window = WindowTypes.Hamming }; var opts = new MultiFeatureOptions { SamplingRate = samplingRate, FrameDuration = 0.025, HopDuration = 0.010 }; var tdExtractor = new TimeDomainFeaturesExtractor(opts); var mfccExtractor = new MfccExtractor(mfccOptions); // Read from file. featureString = String.Empty; featureString = $"green,"; //MFCC var mfccList = new List <List <double> >(); var tdList = new List <List <double> >(); //MFCC //TD Features //Spectral features for (var i = 0; i < mfcc_no; i++) { mfccList.Add(new List <double>()); } for (var i = 0; i < 4; i++) { tdList.Add(new List <double>()); } string specFeatures = String.Empty; Console.WriteLine($"{tag} Reading from file"); using (var stream = new FileStream(FilePath, FileMode.Open)) { var waveFile = new WaveFile(stream); signal = waveFile[channel : Channels.Left]; ////Compute MFCC float[] mfvfuck = new float[25]; var sig_sam = signal.Samples; mfccVectors = mfccExtractor.ComputeFrom(sig_sam); var fftSize = 1024; tdVectors = tdExtractor.ComputeFrom(signal.Samples); var fft = new Fft(fftSize); var resolution = (float)samplingRate / fftSize; var frequencies = Enumerable.Range(0, fftSize / 2 + 1) .Select(f => f * resolution) .ToArray(); var spectrum = new Fft(fftSize).MagnitudeSpectrum(signal).Samples; var centroid = Spectral.Centroid(spectrum, frequencies); var spread = Spectral.Spread(spectrum, frequencies); var flatness = Spectral.Flatness(spectrum, 0); var noiseness = Spectral.Noiseness(spectrum, frequencies, 3000); var rolloff = Spectral.Rolloff(spectrum, frequencies, 0.85f); var crest = Spectral.Crest(spectrum); var decrease = Spectral.Decrease(spectrum); var entropy = Spectral.Entropy(spectrum); specFeatures = $"{centroid},{spread},{flatness},{noiseness},{rolloff},{crest},{decrease},{entropy}"; //} Console.WriteLine($"{tag} All features ready"); for (int calibC = 0; calibC < mfccVectors.Count;) { featureString = String.Empty; var tmp = new ModelInput(); for (var j = 0; j < (mfccVectors.Count / duration) - 1 && calibC < mfccVectors.Count; j++) { for (var i = 0; i < mfcc_no; i++) { mfccList[i].Add(mfccVectors[calibC][i]); } for (var i = 0; i < 4; i++) { tdList[i].Add(tdVectors[calibC][i]); } calibC += 1; } var mfcc_statistics = new List <double>(); for (var i = 0; i < mfcc_no; i++) { //preheader += m + "_mean"; //preheader += m + "_min"; //preheader += m + "_var"; //preheader += m + "_sd"; //preheader += m + "_med"; //preheader += m + "_lq"; //preheader += m + "_uq"; //preheader += m + "_skew"; //preheader += m + "_kurt"; mfcc_statistics.Add(Statistics.Mean(mfccList[i])); mfcc_statistics.Add(Statistics.Minimum(mfccList[i])); mfcc_statistics.Add(Statistics.Variance(mfccList[i])); mfcc_statistics.Add(Statistics.StandardDeviation(mfccList[i])); mfcc_statistics.Add(Statistics.Median(mfccList[i])); mfcc_statistics.Add(Statistics.LowerQuartile(mfccList[i])); mfcc_statistics.Add(Statistics.UpperQuartile(mfccList[i])); mfcc_statistics.Add(Statistics.Skewness(mfccList[i])); mfcc_statistics.Add(Statistics.Kurtosis(mfccList[i])); } var td_statistics = new List <double>(); for (var i = 0; i < 4; i++) { td_statistics.Add(Statistics.Mean(tdList[i])); td_statistics.Add(Statistics.Minimum(tdList[i])); td_statistics.Add(Statistics.Variance(tdList[i])); td_statistics.Add(Statistics.StandardDeviation(tdList[i])); td_statistics.Add(Statistics.Median(tdList[i])); td_statistics.Add(Statistics.LowerQuartile(tdList[i])); td_statistics.Add(Statistics.UpperQuartile(tdList[i])); td_statistics.Add(Statistics.Skewness(tdList[i])); td_statistics.Add(Statistics.Kurtosis(tdList[i])); } // Write MFCCs featureString += String.Join(",", mfcc_statistics); featureString += ","; featureString += String.Join(",", td_statistics); //Write Spectral features as well featureString += ","; featureString += specFeatures; Console.WriteLine($"{tag} Feature String ready {featureString}"); if (File.Exists(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"))) { File.Delete(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp")); File.WriteAllText(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"), featureString); } else { File.WriteAllText(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"), featureString); } MLContext mLContext = new MLContext(); string fileName = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "temp"); IDataView dataView = mLContext.Data.LoadFromTextFile <ModelInput>( path: fileName, hasHeader: false, separatorChar: ',', allowQuoting: true, allowSparse: false); // Use first line of dataset as model input // You can replace this with new test data (hardcoded or from end-user application) ModelInput sampleForPrediction = mLContext.Data.CreateEnumerable <ModelInput>(dataView, false) .First(); ModelOutput opm = ConsumeModel.Predict(sampleForPrediction); featureTimeList.Add(opm.Score); Console.WriteLine($"{tag} Feature vs time list ready"); } //Console.WriteLine($"{tag} MFCC: {mfccVectors.Count}"); //Console.WriteLine($"{tag} TD: {tdVectors.Count}"); //Console.WriteLine($"{tag} featureTimeArray: {featureTimeList.Count} {featureString}"); } } playAudio(); }
private void bgw_DoWork(object sender, DoWorkEventArgs e) { bool reg = (bool)e.Argument; if (_yourChanged) { _yourWav = new WavFileWrapper(_yourPath); if (_yourWav.Load()) { _yourWav.NormalizeWave(1.0); option = VCContext.Instance.MFCCOptions; LogUtil.Info("Load Wave: {0} -- OK\n", _yourPath); if (option.ShiftSampleToZero) { LogUtil.Info("Shift Sample To Zero: -- -- OK\n"); _yourWav.ShifToZero(); } int size = _yourWav.FullData.Count; uint startPnt = (uint)(_startSelected * size); uint endPnt = (uint)(_endSelected * size); Debug.WriteLine("Select Data voice: Start {0} End {1}", startPnt, endPnt); _yourWav.SelectedWave(startPnt, endPnt); _yourMfcc = new MFCCWrapper(_yourWav, option.TimeFrame, option.TimeShift, option.CepFilter, option.LowFreq, option.HighFreq, option.NumCeps, 4); _yourMfcc.UserStandardization = option.UseStandardization; _yourMfcc.Process(); _yourPitch = new PitchWrapper(_yourWav, option.PitchTimeFrame, option.PitchTimeShift, option.PitchLowFreq, option.PitchHighFreq, option.PitchType, option.DropUnPitch); if (option.UseMedian) { _yourPitch.SetMedianWindowSize(option.MedianWindow); } _yourPitch.Process(); } } if (_yourMfcc != null && _yourMfcc.ProcessDone) { // TO DO: Process Bar SetDataChart(FormTag.YOUR_WAVE); SetDataChart(FormTag.YOUR_MFCC); SetDataChart(FormTag.YOUR_FREQ); SetDataChart(FormTag.YOUR_DOUBLE); SetDataChart(FormTag.YOUR_DETAL); SetDataChart(FormTag.YOUR_PITCH); } e.Result = reg; if (reg) { if (_yourMfcc != null) { List <List <double> > data = null; switch (VCContext.Instance.MFCCOptions.TrainCofficientType) { case 0: data = _yourMfcc.Mfcc; break; case 1: data = _yourMfcc.DetalMfcc; break; case 2: data = _yourMfcc.DoubleDetalMfcc; break; } Action act = new Action(() => { reg_lb.Text = _trainTask.Reg(data); }); Invoke(act); } } }