private void UpdateRow(TrainFilesCarrier.TrainFileRow row) { string file = VCDir.Instance.TrainDirAudio + row.Path; if (File.Exists(file)) { _yourChanged = true; _yourPath = file; _yourWav = new WavFileWrapper(_yourPath); if (_yourWav.Load()) { _yourWav.NormalizeWave(1.0); LogUtil.Info("Load Wave: {0} -- OK\n", _yourPath); option = VCContext.Instance.MFCCOptions; if (option.ShiftSampleToZero) { LogUtil.Info("Shift Sample To Zero: -- -- OK\n"); _yourWav.ShifToZero(); } waveViewer.WaveData = _yourWav.FullData; waveViewer.FitToScreen(); _label = row.Word; int size = _yourWav.FullData.Count; _startSelected = (float)row.Start / size; _endSelected = (float)row.End / size; waveViewer.LeftSlider = _startSelected; waveViewer.RightSlider = _endSelected; } } }
public string Reg(string path, int start, int end) { if (_words == null || _words.Count == 0) { return(" Models Failed"); } WavFileWrapper Wav = new WavFileWrapper(path); LogUtil.Info("*******************Reg*******************\n"); LogUtil.Info("Load Wave: {0}\n", path); if (Wav.Load()) { Wav.NormalizeWave(1.0f); LogUtil.Info("Load Wave: {0} -- OK\n", path); if (VCContext.Instance.MFCCOptions.ShiftSampleToZero) { LogUtil.Info("Shift Sample To Zero: -- -- OK\n"); Wav.ShifToZero(); } Wav.SelectedWave((uint)start, (uint)end); MFCCWrapper Mfcc = new MFCCWrapper(Wav, VCContext.Instance.MFCCOptions.TimeFrame, VCContext.Instance.MFCCOptions.TimeShift, VCContext.Instance.MFCCOptions.CepFilter, VCContext.Instance.MFCCOptions.LowFreq, VCContext.Instance.MFCCOptions.HighFreq, VCContext.Instance.MFCCOptions.NumCeps, 2); bool res = Mfcc.Process(); List <List <double> > data = null; if (res) { switch (VCContext.Instance.MFCCOptions.TrainCofficientType) { case 0: data = Mfcc.Mfcc; break; case 1: data = Mfcc.DetalMfcc; break; case 2: data = Mfcc.DoubleDetalMfcc; break; default: data = Mfcc.Mfcc; break; } LogUtil.Info("Load Wave: {0} Process MFCC -- OK\n", path); int reg = 0; double max = _models[reg].LogProbability(data); LogUtil.Info("HModel = {0} Log Value = {1}\n", _words[reg], max); for (int i = 1; i < _models.Count; i++) { double cur = _models[i].LogProbability(data); LogUtil.Info("HModel = {0} Log Value = {1}\n", _words[i], cur); if (max < cur) { max = cur; reg = i; } } LogUtil.Info("HModel = {0} Log Value = {1} Reg = {2}\n", _words[reg], max, _words[reg]); return(_words[reg]); } else { LogUtil.Info("Load Wave: {0} -- FAILED\n", path); } } return("NONE"); }
public bool Train() { _words = new List <string>(); _hmms = new List <string>(); _models = new List <HMMWrapper>(); foreach (TrainFilesCarrier.TrainFileRow ent in tbEntry) { if (!_words.Contains(ent.Word)) { _words.Add(ent.Word); LogUtil.Info("Word : {0}\n", ent.Word); } } foreach (string word in _words) { List <string> files = new List <string>(); LogUtil.Info("List File for Word : {0}\n", word); VCDir.CreateDirectory(VCDir.Instance.TrainDirMFCC + word); VCDir.CreateDirectory(VCDir.Instance.TrainDirHMM); foreach (TrainFilesCarrier.TrainFileRow ent in tbEntry) { if (word.Equals(ent.Word)) { string audio = VCDir.Instance.TrainDirAudio + ent.Path; string mfcc = VCDir.Instance.TrainDirMFCC + ent.Path; WavFileWrapper Wav = new WavFileWrapper(audio); if (Wav.Load()) { Wav.NormalizeWave(1.0f); if (VCContext.Instance.MFCCOptions.ShiftSampleToZero) { Wav.ShifToZero(); } Wav.SelectedWave((uint)ent.Start, (uint)ent.End); MFCCWrapper Mfcc = new MFCCWrapper(Wav, VCContext.Instance.MFCCOptions.TimeFrame, VCContext.Instance.MFCCOptions.TimeShift, VCContext.Instance.MFCCOptions.CepFilter, VCContext.Instance.MFCCOptions.LowFreq, VCContext.Instance.MFCCOptions.HighFreq, VCContext.Instance.MFCCOptions.NumCeps, 2); Mfcc.UserStandardization = VCContext.Instance.MFCCOptions.UseStandardization; bool res = Mfcc.Process(); res &= Mfcc.SaveMFCC(mfcc + ".Mfcc" + ".xml"); res &= Mfcc.SaveDeltaMFCC(mfcc + ".Delta" + ".xml"); res &= Mfcc.SaveDoubleMFCC(mfcc + ".Double" + ".xml"); if (res) { switch (VCContext.Instance.MFCCOptions.TrainCofficientType) { case 0: files.Add(mfcc + ".Mfcc" + ".xml"); break; case 1: files.Add(mfcc + ".Delta" + ".xml"); break; case 2: files.Add(mfcc + ".Double" + ".xml"); break; } } LogUtil.Info("File : W - {0} Path - {1} MFCC process - {2}\n", ent.Word, VCDir.Instance.TrainDirMFCC + ent.Path + ".xml", (res) ? "Completed" : "Failed"); } } } HMMWrapper hmm = new HMMWrapper(VCContext.Instance.MFCCOptions.TrainHMMState, VCContext.Instance.MFCCOptions.TrainGMMComponent, VCContext.Instance.MFCCOptions.TrainGMMCovVar); bool ok = hmm.Trainning(files); LogUtil.Info("Train word: {0} - {1}\n", word, (ok)? "Completed": "Failed"); if (ok) { hmm.Save(VCDir.Instance.TrainDirHMM + word + ".xml"); _models.Add(hmm); _hmms.Add(word + ".xml"); LogUtil.Info("Save {0} Model to {1}\n", word, VCDir.Instance.TrainDirHMM + word + ".xml"); } } Save(VCDir.Instance.TrainXmlFile); return(true); }
private void bgw_DoWork(object sender, DoWorkEventArgs e) { bool reg = (bool)e.Argument; if (_yourChanged) { _yourWav = new WavFileWrapper(_yourPath); if (_yourWav.Load()) { _yourWav.NormalizeWave(1.0); option = VCContext.Instance.MFCCOptions; LogUtil.Info("Load Wave: {0} -- OK\n", _yourPath); if (option.ShiftSampleToZero) { LogUtil.Info("Shift Sample To Zero: -- -- OK\n"); _yourWav.ShifToZero(); } int size = _yourWav.FullData.Count; uint startPnt = (uint)(_startSelected * size); uint endPnt = (uint)(_endSelected * size); Debug.WriteLine("Select Data voice: Start {0} End {1}", startPnt, endPnt); _yourWav.SelectedWave(startPnt, endPnt); _yourMfcc = new MFCCWrapper(_yourWav, option.TimeFrame, option.TimeShift, option.CepFilter, option.LowFreq, option.HighFreq, option.NumCeps, 4); _yourMfcc.UserStandardization = option.UseStandardization; _yourMfcc.Process(); _yourPitch = new PitchWrapper(_yourWav, option.PitchTimeFrame, option.PitchTimeShift, option.PitchLowFreq, option.PitchHighFreq, option.PitchType, option.DropUnPitch); if (option.UseMedian) { _yourPitch.SetMedianWindowSize(option.MedianWindow); } _yourPitch.Process(); } } if (_yourMfcc != null && _yourMfcc.ProcessDone) { // TO DO: Process Bar SetDataChart(FormTag.YOUR_WAVE); SetDataChart(FormTag.YOUR_MFCC); SetDataChart(FormTag.YOUR_FREQ); SetDataChart(FormTag.YOUR_DOUBLE); SetDataChart(FormTag.YOUR_DETAL); SetDataChart(FormTag.YOUR_PITCH); } e.Result = reg; if (reg) { if (_yourMfcc != null) { List <List <double> > data = null; switch (VCContext.Instance.MFCCOptions.TrainCofficientType) { case 0: data = _yourMfcc.Mfcc; break; case 1: data = _yourMfcc.DetalMfcc; break; case 2: data = _yourMfcc.DoubleDetalMfcc; break; } Action act = new Action(() => { reg_lb.Text = _trainTask.Reg(data); }); Invoke(act); } } }
private void showWaveSound() { _yourWav = new WavFileWrapper(_yourPath); if (_yourWav.Load()) { waveViewer.WaveData = _yourWav.FullData; waveViewer.FitToScreen(); vadVoice(); if (_regMode) { uint size = (uint)_yourWav.FullData.Count; uint begin = (uint)(_startSelected * size); uint end = (uint)(_endSelected * size); _yourWav.NormalizeWave(1.0f); option = VCContext.Instance.MFCCOptions; LogUtil.Info("Load Wave: {0} -- OK\n", _yourPath); if (option.ShiftSampleToZero) { LogUtil.Info("Shift Sample To Zero: -- -- OK\n"); _yourWav.ShifToZero(); } Debug.WriteLine("Select Data voice: Start {0} End {1}", begin, end); _yourWav.SelectedWave(begin, end); _yourMfcc = new MFCCWrapper(_yourWav, option.TimeFrame, option.TimeShift, option.CepFilter, option.LowFreq, option.HighFreq, option.NumCeps, 4); _yourMfcc.UserStandardization = option.UseStandardization; if (_yourMfcc != null && _yourMfcc.Process()) { List <List <double> > data = null; switch (VCContext.Instance.MFCCOptions.TrainCofficientType) { case 0: data = _yourMfcc.Mfcc; break; case 1: data = _yourMfcc.DetalMfcc; break; case 2: data = _yourMfcc.DoubleDetalMfcc; break; } Action act = new Action(() => { reg_lb.Text = _trainTask.Reg(data); }); Invoke(act); } else { MessageBox.Show(" Cant Extraction file {0}\n", _yourPath); } } } }