public WAVSound Modify(WAVSound sound, double relativeStartPitch, double relativeEndPitch, Boolean adjustDuration, double relativeDuration) { // First, find the speech type variation: // speechTypeEstimator = new SpeechTypeEstimator(); speechTypeEstimator.FindSpeechTypeVariation(sound); /* speechTypeEstimator.FindSpeechTypeVariation(sound, 0, frameDuration, frameShift, speechTypeLowPassCutoffFrequency, speechTypeLowPassRatioThreshold, * speechTypeEnergyThreshold, speechTypeSilenceThreshold); * speechTypeEstimator.Adjust(3); * speechTypeEstimator.Adjust(3); // repeat the adjustment to remove double errors. */ SpeechTypeSpecification speechTypeSpecification = speechTypeEstimator.SpeechTypeSpecification; // Next, find the pitch periods: PitchPeriodEstimator pitchPeriodEstimator = new PitchPeriodEstimator(); pitchPeriodEstimator.ComputePitchPeriods(sound, 0.0, sound.GetDuration()); //, minimumPitchPeriod, maximumPitchPeriod, frameShift); // 0.0120, 0.01, 0.03); pitchPeriodEstimator.AdjustAndInterpolate(speechTypeSpecification); //, pitchPeriodDeltaTime, setUnvoicedPitch); // 0.005, true); PitchPeriodSpecification pitchPeriodSpecification = pitchPeriodEstimator.PitchPeriodSpecification; // Then, find the pitch marks: pitchMarkEstimator = new PitchMarkEstimator(); pitchMarkEstimator.FindPitchMarks(sound, speechTypeSpecification, pitchPeriodSpecification); // , 0.0025, 0.0025, 0.45, 0.002); List <double> pitchMarkTimeList = pitchMarkEstimator.PitchMarkTimeList; // Then, change the pitch of the sound double originalDuration = sound.GetDuration(); double desiredDuration = originalDuration * relativeDuration; double actualRelativeDuration = relativeDuration; // Valid if the pitch is unchanged ... WAVSound pitchChangedSound; if ((Math.Abs(relativeStartPitch - 1) > double.Epsilon) || (Math.Abs(relativeEndPitch - 1) > double.Epsilon)) // To save some time, if only duration is to be changed.. { pitchChangedSound = ChangePitch(sound, pitchMarkTimeList, relativeStartPitch, relativeEndPitch); // The pitch change also changes the duration of the sound: double newDuration = pitchChangedSound.GetDuration(); actualRelativeDuration = desiredDuration / newDuration; // ...but if the pitch is changed, the duration changes too. } else { pitchChangedSound = sound; // No copying needed here, a reference is sufficient. modifiedPitchMarkTimeList = pitchMarkTimeList; // No pitch change => use original pitch marks. } // If the adjustDuration is true, change the duration, using the stored pitchmark time list (to avoid repeating the three steps above): if (adjustDuration) { WAVSound durationChangedSound = ChangeDuration(pitchChangedSound, modifiedPitchMarkTimeList, actualRelativeDuration); return(durationChangedSound); } else { return(pitchChangedSound); } }
public void SetSound(WAVSound sound) { this.sound = sound; if (sound == null) { return; } // 20160912 if (soundSequenceList == null) { soundSequenceList = new List <WAVSound>(); } soundSequenceList.Add(this.sound.Copy()); this.xMin = 0; this.xMax = (float)sound.GetDuration(); scrollbarVisible = false; SetRange(xMin, xMax, MINIMUM_SAMPLE_VALUE, MAXIMUM_SAMPLE_VALUE); OnViewingAreaChanged(); horizontalTickMarkList = new List <float>(); float tickMarkPosition = (float)xMin; while (tickMarkPosition <= xMax) { horizontalTickMarkList.Add(tickMarkPosition); tickMarkPosition += tickMarkSpacing; } OnAssignedSoundChanged(); Invalidate(); }
public void SetSound(WAVSound sound) { this.sound = sound; if (sound == null) { Refresh(); return; } SetRange(0, sound.GetDuration(), -32768, 32768); horizontalTickMarkList = new List <double>(); if (!pitchPanelVisible) { soundPanelFraction = 1; } nominalTopPanelHeight = (int)Math.Round(soundPanelFraction * this.Height); soundPanelHeight = nominalTopPanelHeight - dividerHeight; pitchPanelHeight = this.Height - soundPanelHeight - dividerHeight; double tickMarkPosition = xMin; while (tickMarkPosition <= xMax) { horizontalTickMarkList.Add(tickMarkPosition); tickMarkPosition += tickMarkSpacing; } this.zoomLevel = 1; scrollbarVisible = false; PlotSoundAndPitch(); }
public void FindSpeechTypeVariation(WAVSound sound) // , int channel, double frameDuration, double frameShift, // double lowPassCutoffFrequency, double lowPassRatioThreshold, double energyThreshold, double silenceThreshold) { WAVFrameSet frameSet = new WAVFrameSet(sound, frameDuration, frameShift); speechTypeSpecification = new SpeechTypeSpecification(); double time = 0; for (int ii = 0; ii < frameSet.FrameList.Count; ii++) { WAVSound frame = frameSet.FrameList[ii]; SpeechType speechType = this.GetFrameSpeechType(frame); // // SpeechType speechType = this.GetFrameSpeechType(frame, channel, lowPassCutoffFrequency, lowPassRatioThreshold, energyThreshold, silenceThreshold); time = frameSet.StartTimeList[ii] + frameDuration / 2; // The speech type is assigned to the center of the frame speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(time, speechType)); } // Finally, to make sure that the speech type can be interpolated over the entire sound, set the // end values: SpeechType firstSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList[0].Item2; speechTypeSpecification.TimeSpeechTypeTupleList.Insert(0, new Tuple <double, SpeechType>(0, firstSpeechType)); SpeechType lastSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item2; double duration = sound.GetDuration(); if (speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item1 < duration) // Will ALMOST always be the case, unless the duration is an exact multiple of the frame shift { speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(duration, lastSpeechType)); } for (int jj = 0; jj < numberOfAdjustmentSteps; jj++) { Adjust(); } }
public void SetSound(WAVSound sound, List <double> pitchList) { this.sound = sound; this.pitchList = pitchList; SetRange(0, sound.GetDuration(), 0, 1); PlotSoundAndPitch(); }
private void speakButton_Click(object sender, EventArgs e) { string sentence = sentenceTextBox.Text; if (sentence != "") { speechVisualizer.MarkerList = new List <SoundMarker>(); speechVisualizer.SetPitchPeriodSpecification(null); string voiceName = voiceSelectionComboBox.SelectedItem.ToString(); speechSynthesizer.SetOutputToWaveFile("./tmpOutput.wav", new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); speechSynthesizer.Speak(sentence); speechSynthesizer.SetOutputToDefaultAudioDevice(); speechSynthesizer.SelectVoice(voiceName); speechSynthesizer.Speak(sentence); currentSound = new WAVSound(); currentSound.LoadFromFile("./tmpOutput.wav"); double startTime = currentSound.GetFirstTimeAboveThreshold(0, 10, 20); double endTime = currentSound.GetLastTimeAboveThreshold(0, 10, 20); currentSound = currentSound.Extract(startTime, endTime); speechVisualizer.SetRange(0, currentSound.GetDuration(), -32768, 32768); speechVisualizer.SetSound(currentSound); speechVisualizer.Invalidate(); soundTypeIdentificationButton.Enabled = true; playSoundButton.Enabled = true; modifySoundButton.Enabled = true; saveSoundToolStripMenuItem.Enabled = true; } }
private void modifySoundButton_Click(object sender, EventArgs e) { speechVisualizer.MarkerList = new List <SoundMarker>(); speechModifier.TopFraction = double.Parse(topFractionTextBox.Text); double relativeStartPitch = double.Parse(relativeStartPitchTextBox.Text); double relativeEndPitch = double.Parse(relativeEndPitchTextBox.Text); Boolean adjustDuration = Boolean.Parse(adjustDurationComboBox.SelectedItem.ToString()); double relativeDuration = double.Parse(relativeDurationTextBox.Text); // Only relevant if adjustDuration = true. WAVSound modifiedSound = speechModifier.Modify(currentSound, relativeStartPitch, relativeEndPitch, adjustDuration, relativeDuration); // modifiedSound.MedianFilter(5); // modifiedSound.LowPassFilter(1500); // modifiedSound.SetMaximumNonClippingVolume(); // modifiedSound.SetMaximumNonClippingVolume(); SoundPlayer soundPlayer = new SoundPlayer(); modifiedSound.GenerateMemoryStream(); modifiedSound.WAVMemoryStream.Position = 0; // Manually rewind stream soundPlayer.Stream = null; soundPlayer.Stream = modifiedSound.WAVMemoryStream; soundPlayer.PlaySync(); speechVisualizer.SetRange(0, modifiedSound.GetDuration(), -32768, 32768); speechVisualizer.SetPitchPeriodSpecification(null); speechVisualizer.SetSound(modifiedSound); currentSound = modifiedSound.Copy(); soundTypeIdentificationButton.Enabled = true; findPitchPeriodsButton.Enabled = false; findPitchMarksButton.Enabled = false; }
private void findPitchPeriodsButton_Click(object sender, EventArgs e) { findPitchPeriodsButton.Enabled = false; PitchPeriodEstimator pitchPeriodEstimator = speechModifier.PitchPeriodEstimator; pitchPeriodEstimator.ComputePitchPeriods(currentSound, 0.0, currentSound.GetDuration()); //, 0.0050, 0.0120, 0.01); pitchPeriodEstimator.AdjustAndInterpolate(speechTypeSpecification); // , 0.005, true); speechVisualizer.SetPitchPeriodSpecification(pitchPeriodEstimator.PitchPeriodSpecification); pitchPeriodSpecification = pitchPeriodEstimator.PitchPeriodSpecification; findPitchMarksButton.Enabled = true; }
public WAVFrameSet(WAVSound sound, double frameDuration, double frameShift) { double soundDuration = sound.GetDuration(); this.frameDuration = frameDuration; this.frameShift = frameShift; this.frameList = new List <WAVSound>(); int numberOfFrames = (int)Math.Truncate((soundDuration - frameDuration + frameShift) / frameShift); this.startTimeList = new List <double>(); for (int ii = 0; ii < numberOfFrames; ii++) { double startTime = ii * frameShift; double endTime = startTime + frameDuration; WAVSound frame = sound.Extract(startTime, endTime); frameList.Add(frame); startTimeList.Add(startTime); } }
// First identifies the first index at which the data point is below the // threshold. Then finds the subsequent minimum. /* public int FindFirstMinimum(List<double> dataList, double threshold) * { * int startIndex = dataList.FindIndex(d => d < threshold); * if (startIndex < 0) { return -1; } * else * { * double currentValue = dataList[startIndex]; * double minimum = dataList[startIndex]; * int minimumIndex = startIndex; * int ii = minimumIndex + 1; * while ((currentValue < threshold) && (ii < dataList.Count)) * { * currentValue = dataList[ii]; * if (currentValue < minimum) * { * minimum = currentValue; * minimumIndex = ii; * } * ii++; * } * return minimumIndex; * } * } */ public void ComputePitchPeriods(WAVSound sound, double startTime, double endTime) // , double minimumPitchPeriod, double maximumPitchPeriod, // double frameShift) { pitchPeriodSpecification = new PitchPeriodSpecification(); double time = startTime; double actualEndTime = endTime; double duration = sound.GetDuration(); // At least to maximim pitch periods are required for the analysis if (actualEndTime > (duration - 2 * maximumPitchPeriod)) { actualEndTime = duration - 2 * maximumPitchPeriod; } while (time <= actualEndTime) { double pitchPeriod = ComputeFramePitchPeriod(sound, time); // , minimumPitchPeriod, maximumPitchPeriod); //, threshold); Tuple <double, double> timePitchPeriodTuple = new Tuple <double, double>(time, pitchPeriod); pitchPeriodSpecification.TimePitchPeriodTupleList.Add(timePitchPeriodTuple); time += frameShift; } }
private void ShowSound(WAVSound sound) { ClearHistory(); try // This (using try-catch) is ugly, but appears to be necessary for some hardware configurations { double duration = sound.GetDuration(); if (duration > viewingInterval) { WAVSound visibleSound = sound.Extract(duration - viewingInterval, duration); SetSound(visibleSound); } else { SetSound(sound); } } catch { // Nothing to do here.. } displayBusy = false; }
private void RunLoop() { Thread.Sleep(1); DateTime utteranceStartDateTime = DateTime.Now; // Just needed for initialization. DateTime utteranceEndDateTime = DateTime.MinValue; // Just needed for initialization. DateTime previousUtteranceStartDateTime = DateTime.MinValue; DateTime previousUtteranceEndDateTime = DateTime.MinValue; DateTime recordingStartDateTime; DateTime recordingEndDateTime; double utteranceStartTime = 0; // In seconds, measured from the start of the current recording. (=0 just for initialization). double utteranceEndTime; while (running) { Thread.Sleep(millisecondRecordingInterval); byte[] soundData = wavRecorder.GetAllRecordedBytes(out recordingStartDateTime, out recordingEndDateTime); if (soundData != null) { if (soundData.Length > 0) { WAVSound sound = new WAVSound("", wavRecorder.SampleRate, wavRecorder.NumberOfChannels, wavRecorder.NumberOfBitsPerSample); sound.AppendSamplesAsBytes(soundData); if (showSoundStream) { if (!displayBusy) { WAVSound soundToDisplay = sound.Copy(); // 20171207: Make a new copy here, since the code below may process the sound before visualization is completed. if (InvokeRequired) { this.BeginInvoke(new MethodInvoker(() => ShowSound(soundToDisplay))); } else { ShowSound(soundToDisplay); } } } // Next, remove all parts of the sound that have already been recognized, if any: if (previousUtteranceEndDateTime > recordingStartDateTime) { double extractionStartTime = (previousUtteranceEndDateTime - recordingStartDateTime).TotalSeconds; double extractionEndTime = sound.GetDuration(); sound = sound.Extract(extractionStartTime, extractionEndTime); // Debug code, remove /* if (sound == null) // Should not happen, unless the recognition thread is stopped using a breakpoint. * { * * } */ } if (!inUtterance) { utteranceStartTime = sound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold); if (utteranceStartTime > 0) { double duration = sound.GetDuration(); double timeToEnd = duration - utteranceStartTime; long ticksToEnd = TICKS_PER_SECOND * (long)(timeToEnd); utteranceStartDateTime = recordingEndDateTime.Subtract(new TimeSpan(ticksToEnd)); if (utteranceStartDateTime > previousUtteranceEndDateTime) // True (by construction) the FIRST time. { inUtterance = true; long utteranceStartTimeAsTicks = (long)(TICKS_PER_SECOND * utteranceStartTime); // Corrected 20170907 (1000000 -> 10000000) utteranceStartDateTime = recordingStartDateTime.Add(new TimeSpan(utteranceStartTimeAsTicks)); } } } else { double duration = sound.GetDuration(); WAVSound endOfSound = sound.Extract(duration - detectionSilenceInterval, duration); double startTimeInEndOfSound = endOfSound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold); if (startTimeInEndOfSound < 0) // <=> silence at the end of the sound { inUtterance = false; utteranceEndDateTime = recordingEndDateTime; // recordingStartDateTime.Add(new TimeSpan(utteranceEndTimeAsTicks)); previousUtteranceStartDateTime = utteranceStartDateTime; previousUtteranceEndDateTime = utteranceEndDateTime; // Monitor.Enter(recognitionLockObject); if (!recognizerBusy) { recognizerBusy = true; WAVSound soundToRecognize = sound.Extract(utteranceStartTime - extractionMargin, duration).Copy(); // Monitor.Exit(recognitionLockObject); RunRecognizer(soundToRecognize); } } } } } } }
// Note: it is assumed that both channels (left and right) are equal. public WAVSound ChangePitch(WAVSound sound, List <double> pitchMarkTimeList, double relativeStartPitch, double relativeEndPitch) { // First find the pitch mark indices in the original sound: List <int> originalPitchMarkIndexList = new List <int>(); foreach (double pitchMarkTime in pitchMarkTimeList) { int originalPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTime); originalPitchMarkIndexList.Add(originalPitchMarkIndex); } // Next, compute the index spacings of the pitch marks in the modified sound: double originalSoundDuration = sound.GetDuration(); List <int> modifiedPitchMarkIndexSpacingList = new List <int>(); modifiedPitchMarkTimeList = new List <double>(); double firstModifiedPitchMarkTime = pitchMarkTimeList[0]; // First pitch mark unchanged modifiedPitchMarkTimeList.Add(firstModifiedPitchMarkTime); for (int ii = 1; ii < originalPitchMarkIndexList.Count; ii++) { int originalPitchMarkSpacing = originalPitchMarkIndexList[ii] - originalPitchMarkIndexList[ii - 1]; double relativePitch = relativeStartPitch + (pitchMarkTimeList[ii] / originalSoundDuration) * (relativeEndPitch - relativeStartPitch); int modifiedPitchMarkIndexSpacing = (int)Math.Round(originalPitchMarkSpacing / relativePitch); modifiedPitchMarkIndexSpacingList.Add(modifiedPitchMarkIndexSpacing); double modifiedPitchMarkTime = modifiedPitchMarkTimeList.Last() + (double)modifiedPitchMarkIndexSpacing / (double)sound.SampleRate; modifiedPitchMarkTimeList.Add(modifiedPitchMarkTime); } // Now build the sound, keeping the original sound data over a fraction (topFraction) of the pitch periods // and interpolating between pitch periods: List <short> newSamples = new List <short>(); // Special treatment of the first pitch period: int firstPitchMarkIndex = originalPitchMarkIndexList[0]; // Position of the first pitch mark in the original sound int firstModifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[0]; // Spacing between the first and second pitch mark in the modified sound int firstTopEndIndex = firstPitchMarkIndex + (int)Math.Round(topFraction * firstModifiedPitchMarkIndexSpacing); for (int ii = 0; ii < firstTopEndIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } for (int iPitchMark = 1; iPitchMark < originalPitchMarkIndexList.Count; iPitchMark++) { // First add samples for the transition from the previous pitch period to the current one: int modifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[iPitchMark - 1]; // -1 since there are n-1 spacings for n pitch marks int transitionIndexDuration = (int)Math.Round((1 - 2 * topFraction) * modifiedPitchMarkIndexSpacing); int previousPitchMarkIndex = originalPitchMarkIndexList[iPitchMark - 1]; int previousTopEndIndex = previousPitchMarkIndex + (int)Math.Round(topFraction * modifiedPitchMarkIndexSpacing); int startIndexPreviousPitchPeriod = previousTopEndIndex; int currentPitchMarkIndex = originalPitchMarkIndexList[iPitchMark]; int currentTopStartIndex = currentPitchMarkIndex - (int)Math.Round(topFraction * modifiedPitchMarkIndexSpacing); for (int ii = 0; ii < transitionIndexDuration; ii++) { double alpha = (double)ii / (double)(transitionIndexDuration - 1); int previousPitchPeriodSampleIndex = previousTopEndIndex + ii; int currentPitchPeriodSampleIndex = currentTopStartIndex - transitionIndexDuration + ii; short newSample = (short)Math.Round(((1 - alpha) * sound.Samples[0][previousPitchPeriodSampleIndex] + alpha * sound.Samples[0][currentPitchPeriodSampleIndex])); newSamples.Add(newSample); } // Next, add samples around the top of the current pitch period: if (iPitchMark < (originalPitchMarkIndexList.Count - 1)) { int nextModifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[iPitchMark]; int currentTopEndIndex = currentPitchMarkIndex + (int)Math.Round(topFraction * nextModifiedPitchMarkIndexSpacing); for (int ii = currentTopStartIndex; ii < currentTopEndIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } } else // Special treatment of the final pitch period: { int endIndex = sound.Samples[0].Count; for (int ii = currentTopStartIndex; ii < endIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } } } // Finally, build the sound from the new samples: WAVSound newSound = new WAVSound(sound.Name, sound.SampleRate, sound.NumberOfChannels, sound.BitsPerSample); newSound.GenerateFromSamples(new List <List <short> >() { newSamples, newSamples }); return(newSound); }