private static void BasicTest() { const int windowSize = 1024; int rate; var data = new float[windowSize]; // 2.5sのところから1024サンプル取得してくる using (var reader = new AudioFileReader(@"C:\Users\azyob\Documents\Jupyter\chroma\BEYOND THE STARLIGHT.wav")) { var provider = reader.ToSampleProvider() .Skip(TimeSpan.FromSeconds(5.2)) .ToMono(); rate = provider.WaveFormat.SampleRate; for (var readSamples = 0; readSamples < data.Length;) { var delta = provider.Read(data, readSamples, data.Length - readSamples); if (delta == 0) { throw new EndOfStreamException(); } readSamples += delta; } } var fft = Array.ConvertAll(data, x => (Complex)x); FourierTransform2.FFT(fft, FourierTransform.Direction.Forward); var fftSeries = new LineSeries(); fftSeries.Points.AddRange(fft.Take(fft.Length / 2).Select((x, i) => new DataPoint(i, Math.Log(x.SquaredMagnitude())))); ShowPlot(new PlotModel() { Title = "スペクトル", Series = { fftSeries } }); var nsdf = McLeodPitchMethod.NormalizedSquareDifference(data); var series = new LineSeries(); series.Points.AddRange(nsdf.Select((x, i) => new DataPoint(i, x))); ShowPlot(new PlotModel() { Title = "NSDF", Series = { series } }); Console.WriteLine("{0} Hz", McLeodPitchMethod.EstimateFundamentalFrequency(rate, data)); }
private static IEnumerable <PitchUnit> LoadAudioFile(string fileName, bool play) { using (var playerReader = new AudioFileReader(fileName)) using (var player = new WaveOutEvent()) { if (play) { player.Init(playerReader); player.Play(); } var startTime = Environment.TickCount; const int analysisUnit = 4096; const int pitchWindowSize = 1024; using (var reader = new AudioFileReader(fileName)) { var provider = reader.ToSampleProvider().ToMono(); var sampleRate = provider.WaveFormat.SampleRate; var samples = new float[analysisUnit]; for (var unitIndex = 0; ; unitIndex++) { if (play) { var waitTime = (int)(startTime + unitIndex * analysisUnit * 1000.0 / sampleRate) - Environment.TickCount; if (waitTime > 0) { Thread.Sleep(waitTime); } } for (var readSamples = 0; readSamples < samples.Length;) { var count = provider.Read(samples, readSamples, samples.Length - readSamples); if (count == 0) { yield break; } readSamples += count; } // 実効値を求める var squared = 0.0; for (var i = 0; i < samples.Length; i++) { squared += samples[i] * samples[i]; } var rms = Math.Sqrt(squared / samples.Length); // 512 ずつずらしながらピッチ検出 const int pitchOffsetDelta = 512; var f0s = new List <double>((analysisUnit - pitchOffsetDelta) / pitchOffsetDelta); for (var offset = 0; offset <= analysisUnit - pitchWindowSize; offset += pitchOffsetDelta) { var f = McLeodPitchMethod.EstimateFundamentalFrequency( sampleRate, new ReadOnlySpan <float>(samples, offset, pitchWindowSize) ); if (f.HasValue) { f0s.Add(f.Value); } } if (f0s.Count == 0) { continue; } f0s.Sort(); var f0 = f0s[f0s.Count / 2]; // 中央値 var normalizedPitch = NormalizePitch(f0); if (normalizedPitch.HasValue) { yield return(new PitchUnit(unitIndex, rms, normalizedPitch.Value)); } } } } }
private static void PitchGraph() { const int windowSize = 1024; var series = new ScatterSeries(); using (var reader = new WaveFileReader(Path.Combine(CommonUtils.GetTrainingDataDirectory(), "校歌 2018-01-17 15-10-46.wav"))) { var provider = reader.ToSampleProvider().ToMono(); var rate = provider.WaveFormat.SampleRate; var history = new LinkedList <double>(); var data = new float[windowSize]; { // 1 回目 for (var readSamples = 0; readSamples < data.Length;) { var count = provider.Read(data, readSamples, data.Length - readSamples); if (count == 0) { return; } readSamples += count; } var pitch = McLeodPitchMethod.EstimateFundamentalFrequency(rate, data); if (pitch.HasValue) { history.AddLast(pitch.Value); } } for (var i = windowSize; ; i += windowSize / 2) { // 半分ずらして読み出し Array.Copy(data, windowSize / 2, data, 0, windowSize / 2); for (var readSamples = windowSize / 2; readSamples < data.Length;) { var count = provider.Read(data, readSamples, data.Length - readSamples); if (count == 0) { goto Show; } readSamples += count; } var pitch = McLeodPitchMethod.EstimateFundamentalFrequency(rate, data); if (pitch.HasValue) { history.AddLast(pitch.Value); if (history.Count >= 16) { if (history.Count > 16) { history.RemoveFirst(); } var h = history.ToArray(); Array.Sort(h); var med = h[h.Length / 2]; series.Points.Add(new ScatterPoint((double)i / rate, CommonUtils.HzToMidiNote(med))); } } } } Show: ShowPlot(new PlotModel() { Title = "ピッチ", Series = { series } }); }
private static void PitchAndLyric() { float[] samples; int rate; using (var wavReader = new WaveFileReader(Path.Combine(CommonUtils.GetTrainingDataDirectory(), "校歌 2018-01-17 15-10-46.wav"))) { var provider = wavReader.ToSampleProvider().ToMono(); rate = provider.WaveFormat.SampleRate; samples = new float[wavReader.SampleCount]; for (var readSamples = 0; readSamples < samples.Length;) { var count = provider.Read(samples, readSamples, samples.Length - readSamples); if (count == 0) { break; } readSamples += count; } } const int analysisUnit = 4096; // 4096 サンプルを 1 まとまりとする const int vowelWindowSize = 2048; const int pitchWindowSize = 1024; var classifier = PrepareVowelClassifier(); var series = new IntervalBarSeries(); var secsPerAnalysisUnit = (double)analysisUnit / rate; var analysisUnitCount = samples.Length / analysisUnit; for (var i = 0; i < analysisUnitCount; i++) { var startIndex = analysisUnit * i; var endIndex = startIndex + analysisUnit; var maxPower = 0f; for (var j = startIndex + 1; j < endIndex - 1; j++) { if (samples[j] > maxPower) { maxPower = samples[j]; } } // 音量小さすぎ if (maxPower < 0.15) { continue; } // 512 ずつずらしながら母音認識 var vowelCandidates = new int[(int)VowelType.Other + 1]; for (var offset = startIndex; offset <= endIndex - vowelWindowSize; offset += 512) { vowelCandidates[(int)classifier.Decide(new ReadOnlySpan <float>(samples, offset, vowelWindowSize), rate)]++; } var vowelCandidate = default(VowelType?); var maxNumOfVotes = 0; for (var j = 0; j < vowelCandidates.Length; j++) { if (vowelCandidates[j] > maxNumOfVotes) { maxNumOfVotes = vowelCandidates[j]; vowelCandidate = (VowelType)j; } else if (vowelCandidates[j] == maxNumOfVotes) { vowelCandidate = null; } } // 母音が定まらなかったので、終了 if (!vowelCandidate.HasValue || vowelCandidate.Value == VowelType.Other) { continue; } // 512 ずつずらしながらピッチ検出 const int pitchOffsetDelta = 512; var basicFreqs = new List <double>(analysisUnit / pitchOffsetDelta); for (var offset = startIndex; offset <= endIndex - pitchWindowSize; offset += pitchOffsetDelta) { var f = McLeodPitchMethod.EstimateFundamentalFrequency( rate, new ReadOnlySpan <float>(samples, offset, pitchWindowSize) ); if (f.HasValue) { basicFreqs.Add(f.Value); } } // ピッチ検出に失敗したので終了 if (basicFreqs.Count == 0) { continue; } basicFreqs.Sort(); var basicFreq = basicFreqs[basicFreqs.Count / 2]; // 中央値 var noteNum = CommonUtils.HzToMidiNote(basicFreq); var plotItem = new IntervalBarItem() { Start = secsPerAnalysisUnit * i, End = secsPerAnalysisUnit * (i + 1), Title = vowelCandidate.ToString(), CategoryIndex = noteNum }; var items = series.Items; if (items.Count > 0) { var lastItem = items[items.Count - 1]; if (lastItem.End == plotItem.Start && lastItem.CategoryIndex == plotItem.CategoryIndex && lastItem.Title == plotItem.Title) { // マージできる lastItem.End = plotItem.End; continue; } } items.Add(plotItem); } var categoryAxis = new CategoryAxis() { Position = AxisPosition.Left }; var noteNames = new[] { "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" }; for (var i = 0; i <= 127; i++) { categoryAxis.Labels.Add(noteNames[i % 12] + (i / 12).ToString(CultureInfo.InvariantCulture)); } ShowPlot(new PlotModel() { Title = "ピッチと母音", Axes = { categoryAxis }, Series = { series } }); }
public async ValueTask <VoiceAnalysisResult> Analyze(VowelClassifierType classifierType, ISampleProvider provider) { // TODO: あまりにも無駄な再計算が多い & 並列化しろ const int vowelWindowSize = 2048; const int pitchWindowSize = 1024; provider = provider.ToMono(); var sampleRate = provider.WaveFormat.SampleRate; var blocks = new List <NoteBlockModel>(); var classifier = await this._vowelClassifierTasks[classifierType].ConfigureAwait(false); var samples = new float[Logics.AnalysisUnit]; for (var unitCount = 0; ; unitCount++) { // 4096 サンプルを読み込み for (var readSamples = 0; readSamples < samples.Length;) { var count = provider.Read(samples, readSamples, samples.Length - readSamples); if (count == 0) { return(new VoiceAnalysisResult(blocks.ToImmutableArray(), unitCount)); } readSamples += count; } var maxPower = 0f; foreach (var x in samples) { if (x > maxPower) { maxPower = x; } } // 音量小さすぎ if (maxPower < 0.15) { continue; } // 512 ずつずらしながら母音認識 var vowelCandidates = new int[(int)VowelType.Other + 1]; for (var offset = 0; offset <= Logics.AnalysisUnit - vowelWindowSize; offset += 512) { vowelCandidates[(int)classifier.Decide(new ReadOnlySpan <float>(samples, offset, vowelWindowSize), sampleRate)]++; } var vowelCandidate = default(VowelType?); var maxNumOfVotes = 0; for (var j = 0; j < vowelCandidates.Length; j++) { if (vowelCandidates[j] > maxNumOfVotes) { maxNumOfVotes = vowelCandidates[j]; vowelCandidate = (VowelType)j; } else if (vowelCandidates[j] == maxNumOfVotes) { vowelCandidate = null; } } // 母音が定まらなかったので、終了 if (!vowelCandidate.HasValue || vowelCandidate.Value == VowelType.Other) { continue; } // 512 ずつずらしながらピッチ検出 const int pitchOffsetDelta = 512; var basicFreqs = new List <double>((Logics.AnalysisUnit - pitchOffsetDelta) / pitchOffsetDelta); for (var offset = 0; offset <= Logics.AnalysisUnit - pitchWindowSize; offset += pitchOffsetDelta) { var f = McLeodPitchMethod.EstimateFundamentalFrequency( sampleRate, new ReadOnlySpan <float>(samples, offset, pitchWindowSize) ); if (f.HasValue) { basicFreqs.Add(f.Value); } } // ピッチ検出に失敗したので終了 if (basicFreqs.Count == 0) { continue; } basicFreqs.Sort(); var basicFreq = basicFreqs[basicFreqs.Count / 2]; // 中央値 var noteNum = CommonUtils.HzToMidiNote(basicFreq); var block = new NoteBlockModel(unitCount, noteNum, vowelCandidate.Value); if (blocks.Count == 0 || !blocks[blocks.Count - 1].MergeIfPossible(block)) { blocks.Add(block); } } }
private void EstimatePitch() { var gotPitch = false; double?f0 = null; while (true) { const int pitchWindowLength = 2048; var rms = 0.0; var ok = this._sampleBuffer.Read(pitchWindowLength, segment => { foreach (var x in segment) { rms += x * x; } rms = Math.Sqrt(rms / pitchWindowLength); f0 = McLeodPitchMethod.EstimateFundamentalFrequency(this._waveIn.WaveFormat.SampleRate, segment); }); if (!ok) { break; // バッファーをすべて読み終わった } gotPitch = true; if (f0.HasValue && this._store.IsKeyEstimationRunning) { // クロマベクトルに反映 var m = CommonUtils.ToChromaIndex(CommonUtils.HzToMidiNote(f0.Value)); this._chromaVector[m] += rms; } } if (gotPitch) { this._store.EstimatedPitch = f0; } if (gotPitch && this._store.IsKeyEstimationRunning) { // キー推定 this._store.EstimatedKey = KeyFinding.FindKey(this._chromaVector); } if (f0.HasValue && (this._store.IsUpperHarmonyEnabled || this._store.IsLowerHarmonyEnabled)) { var key = this._store.EstimatedKey; if (key.HasValue) { var baseScale = key.Value.Mode == KeyMode.Major ? s_majorScale : s_minorScale; var scale = new bool[12]; for (var i = 0; i < scale.Length; i++) { scale[i] = baseScale[(key.Value.Tonic + i) % 12]; } var noteNum = CommonUtils.HzToMidiNote(f0.Value); var m = CommonUtils.ToChromaIndex(noteNum); if (scale[m]) { // スケール内の音なら、ハモる音高を求める if (this._store.IsUpperHarmonyEnabled) { // スケール内で3度上の音を探す var harmonyNoteNum = noteNum + 1; for (var i = 0; i < 2; harmonyNoteNum++) { if (scale[CommonUtils.ToChromaIndex(harmonyNoteNum)]) { i++; } } var freq = CommonUtils.MidiNoteToHz(harmonyNoteNum); this._upperHarmonyPlayer.SetPitchFactor((float)(freq / f0.Value)); } if (this._store.IsLowerHarmonyEnabled) { // スケール内で3度下の音を探す var harmonyNoteNum = noteNum - 1; for (var i = 0; i < 2; harmonyNoteNum--) { if (scale[CommonUtils.ToChromaIndex(harmonyNoteNum)]) { i++; } } var freq = CommonUtils.MidiNoteToHz(harmonyNoteNum); this._upperHarmonyPlayer.SetPitchFactor((float)(freq / f0.Value)); } } } } }