public override VowelType Decide(ReadOnlySpan <float> samples, int sampleRate) { var input = new MfccAccord(sampleRate, samples.Length, 0, 8000, 24).ComputeMfcc12D(samples); this._network.Compute(input).Max(out var i); return((VowelType)i); }
private static void MelFilteredSpectrum() { const int windowSize = 2048; const int sampleRate = 44100; var mfccComputer = new MfccAccord(sampleRate, windowSize, 0, 8000, 8); var hzAxis = mfccComputer.HzAxisOfMelSpectrum(); float[] ReadSamples(string fileName, double secs) { float[] xs; using (var wavReader = new WaveFileReader(Path.Combine(CommonUtils.GetTrainingDataDirectory(), fileName))) { var provider = wavReader.ToSampleProvider().Skip(TimeSpan.FromSeconds(secs)).ToMono(); if (provider.WaveFormat.SampleRate != sampleRate) { throw new Exception(); } xs = new float[windowSize]; for (var readSamples = 0; readSamples < xs.Length;) { var count = provider.Read(xs, readSamples, xs.Length - readSamples); if (count == 0) { break; } readSamples += count; } } return(xs); } void ShowSpectrum(string title, params (string, double)[] inputs)
private static void Mfcc() { const int windowSize = 2048; int rate; var data = new float[windowSize]; // 2.5s のところから 2048 サンプル取得してくる using (var reader = new WaveFileReader(Path.Combine(CommonUtils.GetTrainingDataDirectory(), "あいうえお 2017-12-18 00-17-09.wav"))) { var provider = reader.ToSampleProvider() .Skip(TimeSpan.FromSeconds(2.5)) .ToMono(); rate = provider.WaveFormat.SampleRate; for (var readSamples = 0; readSamples < data.Length;) { var delta = provider.Read(data, readSamples, data.Length - readSamples); if (delta == 0) { throw new EndOfStreamException(); } readSamples += delta; } } var mfcc = new MfccAccord(rate, windowSize, 0, 8000, 24); foreach (var x in mfcc.ComputeMfcc12D(data)) { Console.WriteLine(x); } }
public override Task AddTrainingDataAsync(string csvFileName) { float[] samples; int rate; // 同じファイル名の .wav ファイルをロード using (var wavReader = new WaveFileReader(Path.ChangeExtension(csvFileName, ".wav"))) { var provider = wavReader.ToSampleProvider().ToMono(); rate = provider.WaveFormat.SampleRate; // 全サンプル読み込んじゃえ samples = new float[wavReader.SampleCount]; for (var readSamples = 0; readSamples < samples.Length;) { var count = provider.Read(samples, readSamples, samples.Length - readSamples); if (count == 0) { break; } readSamples += count; } } var tasks = new List <Task>(); using (var csvReader = new CsvReader(csvFileName, true)) { const int windowSize = 2048; var mfcc = new MfccAccord(rate, windowSize, 0, 6000, 8); while (csvReader.ReadNextRecord()) { var time = double.Parse(csvReader["Time"], CultureInfo.InvariantCulture); var vowelType = ParseVowelType(csvReader["Class"]); // どうしよう if (vowelType == VowelType.Other) { continue; } // 並列でばんばか投げていくぞ tasks.Add(Task.Run(() => { var v = mfcc.MelSpectrum(new ReadOnlySpan <float>(samples, (int)(time * rate), windowSize)); v.Subtract(v.Mean(), v); lock (this.TrainingData) this.TrainingData.Add((v, vowelType)); })); } } return(Task.WhenAll(tasks)); }
public override VowelType Decide(ReadOnlySpan <float> samples, int sampleRate) { var input = new MfccAccord(sampleRate, samples.Length, 0, 6000, 8).MelSpectrum(samples); return((VowelType)this._teacher.Model.Decide(input)); }
public override VowelType Decide(ReadOnlySpan <float> samples, int sampleRate) { var input = new MfccAccord(sampleRate, samples.Length, 0, 8000, 24).ComputeMfcc12D(samples); return((VowelType)this._teacher.Model.Decide(input)); }