// 20170803 #region Comments // This method uses Steps 2-4 of the YIN method (Cheveigne and Kawahara, 2002). // (Step 1 is only used (in the paper) for comparison, and Steps 5-6 only provide // minor absolute improvements. #endregion /* public double GetPitchPeriod(WAVSound sound, double time, double maximumPitchPeriod, double threshold) * { * int sampleIndex = sound.GetSampleIndexAtTime(time); * int maximumIndexDuration = (int)Math.Round(maximumPitchPeriod * sound.SampleRate); * double minimum = double.MaxValue; * double pitchPeriod = 0; * List<double> periodList = new List<double>(); * List<double> shiftedSquareDifferenceList = new List<double>(); * List<double> normalizedShiftedSquaredDifferenceList = new List<double>(); * periodList.Add(0); * shiftedSquareDifferenceList.Add(0); * normalizedShiftedSquaredDifferenceList.Add(1); * * for (int ii = 1; ii <= maximumIndexDuration; ii++) * { * int indexDuration = ii; * double shiftedSquareDifference = sound.GetShiftedSquareDifference(sampleIndex, ii, maximumIndexDuration); * shiftedSquareDifferenceList.Add(shiftedSquareDifference); * double period = indexDuration / (double)sound.SampleRate; * periodList.Add(period); * double average = shiftedSquareDifferenceList.Average(); * double normalizedShiftedSquareDifference = shiftedSquareDifference / average; * if (normalizedShiftedSquareDifference < minimum) * { * minimum = normalizedShiftedSquareDifference; * pitchPeriod = period; * } * normalizedShiftedSquaredDifferenceList.Add(normalizedShiftedSquareDifference); * } * int minimumIndex = FindFirstMinimum(normalizedShiftedSquaredDifferenceList, threshold); * if (minimumIndex > 0) // Otherwise use the global minimum, computed above. * { * pitchPeriod = minimumIndex / (double)sound.SampleRate; * } * return pitchPeriod; * } */ public double ComputeFramePitchPeriod(WAVSound sound, double time) // , double minimumPitchPeriod, double maximumPitchPeriod) { int sampleIndex = sound.GetSampleIndexAtTime(time); int minimumIndexDuration = (int)Math.Round(minimumPitchPeriod * sound.SampleRate); int maximumIndexDuration = (int)Math.Round(maximumPitchPeriod * sound.SampleRate); double minimumAverageMagnitudeDifference = double.MaxValue; int indexDurationAtMinimum = 0; for (int ii = minimumIndexDuration; ii <= maximumIndexDuration; ii++) { double averageMagnitudeDifference = sound.GetAbsoluteMagnitudeDifference(sampleIndex, ii, maximumIndexDuration); if (averageMagnitudeDifference < minimumAverageMagnitudeDifference) { minimumAverageMagnitudeDifference = averageMagnitudeDifference; indexDurationAtMinimum = ii; } } double pitchPeriod = indexDurationAtMinimum / (double)sound.SampleRate; return(pitchPeriod); }
// Note: it is assumed that both channels (left and right) are equal. public WAVSound ChangePitch(WAVSound sound, List <double> pitchMarkTimeList, double relativeStartPitch, double relativeEndPitch) { // First find the pitch mark indices in the original sound: List <int> originalPitchMarkIndexList = new List <int>(); foreach (double pitchMarkTime in pitchMarkTimeList) { int originalPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTime); originalPitchMarkIndexList.Add(originalPitchMarkIndex); } // Next, compute the index spacings of the pitch marks in the modified sound: double originalSoundDuration = sound.GetDuration(); List <int> modifiedPitchMarkIndexSpacingList = new List <int>(); modifiedPitchMarkTimeList = new List <double>(); double firstModifiedPitchMarkTime = pitchMarkTimeList[0]; // First pitch mark unchanged modifiedPitchMarkTimeList.Add(firstModifiedPitchMarkTime); for (int ii = 1; ii < originalPitchMarkIndexList.Count; ii++) { int originalPitchMarkSpacing = originalPitchMarkIndexList[ii] - originalPitchMarkIndexList[ii - 1]; double relativePitch = relativeStartPitch + (pitchMarkTimeList[ii] / originalSoundDuration) * (relativeEndPitch - relativeStartPitch); int modifiedPitchMarkIndexSpacing = (int)Math.Round(originalPitchMarkSpacing / relativePitch); modifiedPitchMarkIndexSpacingList.Add(modifiedPitchMarkIndexSpacing); double modifiedPitchMarkTime = modifiedPitchMarkTimeList.Last() + (double)modifiedPitchMarkIndexSpacing / (double)sound.SampleRate; modifiedPitchMarkTimeList.Add(modifiedPitchMarkTime); } // Now build the sound, keeping the original sound data over a fraction (topFraction) of the pitch periods // and interpolating between pitch periods: List <short> newSamples = new List <short>(); // Special treatment of the first pitch period: int firstPitchMarkIndex = originalPitchMarkIndexList[0]; // Position of the first pitch mark in the original sound int firstModifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[0]; // Spacing between the first and second pitch mark in the modified sound int firstTopEndIndex = firstPitchMarkIndex + (int)Math.Round(topFraction * firstModifiedPitchMarkIndexSpacing); for (int ii = 0; ii < firstTopEndIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } for (int iPitchMark = 1; iPitchMark < originalPitchMarkIndexList.Count; iPitchMark++) { // First add samples for the transition from the previous pitch period to the current one: int modifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[iPitchMark - 1]; // -1 since there are n-1 spacings for n pitch marks int transitionIndexDuration = (int)Math.Round((1 - 2 * topFraction) * modifiedPitchMarkIndexSpacing); int previousPitchMarkIndex = originalPitchMarkIndexList[iPitchMark - 1]; int previousTopEndIndex = previousPitchMarkIndex + (int)Math.Round(topFraction * modifiedPitchMarkIndexSpacing); int startIndexPreviousPitchPeriod = previousTopEndIndex; int currentPitchMarkIndex = originalPitchMarkIndexList[iPitchMark]; int currentTopStartIndex = currentPitchMarkIndex - (int)Math.Round(topFraction * modifiedPitchMarkIndexSpacing); for (int ii = 0; ii < transitionIndexDuration; ii++) { double alpha = (double)ii / (double)(transitionIndexDuration - 1); int previousPitchPeriodSampleIndex = previousTopEndIndex + ii; int currentPitchPeriodSampleIndex = currentTopStartIndex - transitionIndexDuration + ii; short newSample = (short)Math.Round(((1 - alpha) * sound.Samples[0][previousPitchPeriodSampleIndex] + alpha * sound.Samples[0][currentPitchPeriodSampleIndex])); newSamples.Add(newSample); } // Next, add samples around the top of the current pitch period: if (iPitchMark < (originalPitchMarkIndexList.Count - 1)) { int nextModifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[iPitchMark]; int currentTopEndIndex = currentPitchMarkIndex + (int)Math.Round(topFraction * nextModifiedPitchMarkIndexSpacing); for (int ii = currentTopStartIndex; ii < currentTopEndIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } } else // Special treatment of the final pitch period: { int endIndex = sound.Samples[0].Count; for (int ii = currentTopStartIndex; ii < endIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } } } // Finally, build the sound from the new samples: WAVSound newSound = new WAVSound(sound.Name, sound.SampleRate, sound.NumberOfChannels, sound.BitsPerSample); newSound.GenerateFromSamples(new List <List <short> >() { newSamples, newSamples }); return(newSound); }
public void FindPitchMarks(WAVSound sound, SpeechTypeSpecification speechTypeSpecification, PitchPeriodSpecification pitchPeriodSpecification) // , double peakSearchTimeRange, // double adjustmentTimeRange, double relativePeakThreshold, double energyComputationTimeRange) { List <Tuple <int, int, SpeechType> > segmentTypeList = speechTypeSpecification.GetSegmentTypes(); List <int> absoluteSampleList = sound.GetAbsoluteSamples(0); pitchMarkTimeList = new List <double>(); for (int iSegment = 0; iSegment < segmentTypeList.Count; iSegment++) { SpeechType segmentType = segmentTypeList[iSegment].Item3; if (segmentType == SpeechType.Voiced) { int startIndex = segmentTypeList[iSegment].Item1; int endIndex = segmentTypeList[iSegment].Item2; double startTime = speechTypeSpecification.TimeSpeechTypeTupleList[startIndex].Item1; double endTime = speechTypeSpecification.TimeSpeechTypeTupleList[endIndex].Item1; int startSearchIndex = sound.GetSampleIndexAtTime(startTime); int endSearchIndex = sound.GetSampleIndexAtTime(endTime); int peakIndexSearchRange = (int)Math.Round(peakSearchTimeRange * sound.SampleRate); int adjustmentIndexRange = (int)Math.Round(adjustmentTimeRange * sound.SampleRate); int indexOfAbsoluteMaximum = sound.GetIndexOfAbsoluteMaximum(startSearchIndex, endSearchIndex); int adjustedMainPitchMarkIndex = AdjustPitchMark(sound, indexOfAbsoluteMaximum, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange); double adjustedMainPitchMarkTime = sound.GetTimeAtSampleIndex(adjustedMainPitchMarkIndex); pitchMarkTimeList.Add(adjustedMainPitchMarkTime); Boolean inVoicedSegment = true; double previousPitchMarkTime = adjustedMainPitchMarkTime; // Next, move forward until the end of the voiced segment while (inVoicedSegment) { double pitchPeriod = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime); int deltaSample = (int)Math.Round(pitchPeriod * sound.SampleRate); int previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime); int pitchSampleIndex = previousSampleIndex + deltaSample; if (pitchSampleIndex + 2 * peakIndexSearchRange >= sound.Samples[0].Count) { break; } int currentSampleIndex = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange); double currentTime = sound.GetTimeAtSampleIndex(currentSampleIndex); int adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); //, relativePeakThreshold, energyComputationTimeRange); if (adjustedSampleIndex <= previousSampleIndex) { adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range) } double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex); // Make an incursion into the non-voiced segment pitchMarkTimeList.Add(adjustedTime); previousPitchMarkTime = adjustedTime; if (speechTypeSpecification.GetSpeechType(currentTime) != SpeechType.Voiced) { inVoicedSegment = false; } /* if (speechTypeSpecification.GetSpeechType(currentTime) == SpeechType.Voiced) * { * pitchMarkTimeList.Add(adjustedTime); * previousPitchMarkTime = adjustedTime; * } * else { inVoicedSegment = false; } */ } double voicedEndTime = pitchMarkTimeList.Last(); // Then continue half-way through any non-voiced segment followed by another voiced segment, // or until the end of the sound if no voiced segment follows: if (iSegment < segmentTypeList.Count) { double subsequenceVoicedSegmentStartTime = 0; Boolean hasSubsequentVoicedSegment = false; if (iSegment + 1 < segmentTypeList.Count) { for (int kk = iSegment + 1; kk < segmentTypeList.Count; kk++) { if (segmentTypeList[kk].Item3 == SpeechType.Voiced) { hasSubsequentVoicedSegment = true; int startSegmentIndex = segmentTypeList[kk].Item1; subsequenceVoicedSegmentStartTime = speechTypeSpecification.TimeSpeechTypeTupleList[startSegmentIndex].Item1; break; } } } if (!hasSubsequentVoicedSegment) { // No following voiced segment: Just continue to the end Boolean endReached = false; while (!endReached) { double pitchPeriod = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime); int deltaSample = (int)Math.Round(pitchPeriod * sound.SampleRate); int previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime); int pitchSampleIndex = previousSampleIndex + deltaSample; if (pitchSampleIndex + 2 * peakIndexSearchRange >= sound.Samples[0].Count) { endReached = true; break; } int currentSampleIndex = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange); double currentTime = sound.GetTimeAtSampleIndex(currentSampleIndex); int adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); //, relativePeakThreshold, energyComputationTimeRange); if (adjustedSampleIndex <= previousSampleIndex) { adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range) } double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex); pitchMarkTimeList.Add(adjustedTime); previousPitchMarkTime = adjustedTime; } } else // Proceed to the half-way mark of the interval from the end of the current voice segment to the beginning of the next. { double stopTime = voicedEndTime + (subsequenceVoicedSegmentStartTime - voicedEndTime) / 2; int stopTimeIndex = sound.GetSampleIndexAtTime(stopTime); Boolean endReached = false; while (!endReached) { double pitchPeriod = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime); int deltaSample = (int)Math.Round(pitchPeriod * sound.SampleRate); int previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime); int pitchSampleIndex = previousSampleIndex + deltaSample; if (pitchSampleIndex + 2 * peakIndexSearchRange >= stopTimeIndex) { endReached = true; break; } int currentSampleIndex = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange); double currentTime = sound.GetTimeAtSampleIndex(currentSampleIndex); int adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange); if (adjustedSampleIndex <= previousSampleIndex) { adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range) } double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex); pitchMarkTimeList.Add(adjustedTime); previousPitchMarkTime = adjustedTime; } } } // Then move backward until the beginning of the voiced segment inVoicedSegment = true; previousPitchMarkTime = adjustedMainPitchMarkTime; double voicedStartTime = 0; while (inVoicedSegment) { double pitch = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime); int deltaSample = -(int)Math.Round(pitch * sound.SampleRate); int previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime); int pitchSampleIndex = previousSampleIndex + deltaSample; if (pitchSampleIndex - 2 * peakIndexSearchRange < 0) { break; } int currentSampleIndex = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange); double currentTime = sound.GetTimeAtSampleIndex(currentSampleIndex); int adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, peakIndexSearchRange); // , relativePeakThreshold, energyComputationTimeRange); if (adjustedSampleIndex <= previousSampleIndex) { adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range) } double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex); // Make an incursion into the non-voiced segment pitchMarkTimeList.Add(adjustedTime); previousPitchMarkTime = adjustedTime; if (speechTypeSpecification.GetSpeechType(currentTime) != SpeechType.Voiced) { inVoicedSegment = false; voicedStartTime = adjustedTime; } } // Then continue half-way through any non-voiced segment preceded by another voiced segment, // or until the beginning of the sound if no voiced segment follows: if (iSegment > 0) { double priorVoicedSegmentEndTime = 0; Boolean hasPriorVoicedSegment = false; if (iSegment - 1 > 0) { for (int kk = iSegment - 1; kk >= 0; kk--) { if (segmentTypeList[kk].Item3 == SpeechType.Voiced) { hasPriorVoicedSegment = true; int endSegmentIndex = segmentTypeList[kk].Item2; priorVoicedSegmentEndTime = speechTypeSpecification.TimeSpeechTypeTupleList[endSegmentIndex].Item1; break; } } } if (!hasPriorVoicedSegment) { // No following voiced segment: Just continue to the end Boolean endReached = false; while (!endReached) { double pitchPeriod = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime); int deltaSample = -(int)Math.Round(pitchPeriod * sound.SampleRate); int previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime); int pitchSampleIndex = previousSampleIndex + deltaSample; if (pitchSampleIndex - 2 * peakIndexSearchRange < 0) { endReached = true; break; } int currentSampleIndex = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange); double currentTime = sound.GetTimeAtSampleIndex(currentSampleIndex); int adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange); if (adjustedSampleIndex <= previousSampleIndex) { adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range) } double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex); pitchMarkTimeList.Add(adjustedTime); previousPitchMarkTime = adjustedTime; } } else // Proceed to the half-way mark of the interval from the end of the current voice segment to the beginning of the next. { double stopTime = voicedStartTime - (voicedStartTime - priorVoicedSegmentEndTime) / 2; int stopTimeIndex = sound.GetSampleIndexAtTime(stopTime); Boolean endReached = false; while (!endReached) { double pitchPeriod = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime); int deltaSample = -(int)Math.Round(pitchPeriod * sound.SampleRate); int previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime); int pitchSampleIndex = previousSampleIndex + deltaSample; if (pitchSampleIndex - 2 * peakIndexSearchRange <= stopTimeIndex) { endReached = true; break; } int currentSampleIndex = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange); double currentTime = sound.GetTimeAtSampleIndex(currentSampleIndex); int adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange); if (adjustedSampleIndex <= previousSampleIndex) { adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range) } double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex); pitchMarkTimeList.Add(adjustedTime); previousPitchMarkTime = adjustedTime; } } } } } pitchMarkTimeList.Sort(); // Finally, remove any pitch marks that are too close (should only happen in non-voiced segments) double minimumPitchPeriod = pitchPeriodSpecification.GetMinimumPitchPeriod(); int index = 1; while (index < pitchMarkTimeList.Count) { double previousTime = pitchMarkTimeList[index - 1]; double currentTime = pitchMarkTimeList[index]; SpeechType previousSpeechType = speechTypeSpecification.GetSpeechType(previousTime); SpeechType currentSpeechType = speechTypeSpecification.GetSpeechType(currentTime); if ((previousSpeechType != SpeechType.Voiced) && (currentSpeechType != SpeechType.Voiced)) { double deltaTime = currentTime - previousTime; if (deltaTime < MINIMUM_UNVOICED_PITCHMARK_SPACING) { pitchMarkTimeList.RemoveAt(index); } else { index++; } } else { index++; } } }
// This method makes an approximation, namely that the pitch mark interval is roughly constant. // Usually, this will give a duration accurate to a few per cent (sufficient!) relative to the desired duration. public WAVSound ChangeDuration(WAVSound sound, List <double> pitchMarkTimeList, double relativeDuration) { /* List<double> pitchPeriodList = new List<double>(); * for (int ii = 1; ii < pitchMarkTimeList.Count; ii++) * { * double pitchPeriod = pitchMarkTimeList[ii] - pitchMarkTimeList[ii - 1]; * pitchPeriodList.Add(pitchPeriod); * } * double averagePitchPeriod = pitchPeriodList.Average(); */ List <short> newSamples = new List <short>(); int firstPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTimeList[0]); for (int ii = 0; ii < firstPitchMarkIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } if (relativeDuration <= 1) { int removalStepInterval = (int)Math.Round(1 / (1 - relativeDuration)); int pitchIndex = 1; while (pitchIndex < pitchMarkTimeList.Count) { if ((pitchIndex % removalStepInterval) == 0) { // Nothing to do here: Simply avoid adding these samples } else { int previousPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTimeList[pitchIndex - 1]); int currentPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTimeList[pitchIndex]); for (int ii = previousPitchMarkIndex; ii < currentPitchMarkIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } } pitchIndex++; } } else if (relativeDuration > 1) { int additionStepInterval = (int)Math.Round(1 / (relativeDuration - 1)); int pitchIndex = 1; while (pitchIndex < pitchMarkTimeList.Count) { if ((pitchIndex % additionStepInterval) == 0) { // Insert the samples for this pitch period twice: int previousPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTimeList[pitchIndex - 1]); int currentPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTimeList[pitchIndex]); for (int ii = previousPitchMarkIndex; ii < currentPitchMarkIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } for (int ii = previousPitchMarkIndex; ii < currentPitchMarkIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } } else { int previousPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTimeList[pitchIndex - 1]); int currentPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTimeList[pitchIndex]); for (int ii = previousPitchMarkIndex; ii < currentPitchMarkIndex; ii++) { newSamples.Add(sound.Samples[0][ii]); } } pitchIndex++; } } // Finally, build the sound from the new samples: WAVSound newSound = new WAVSound(sound.Name, sound.SampleRate, sound.NumberOfChannels, sound.BitsPerSample); newSound.GenerateFromSamples(new List <List <short> >() { newSamples, newSamples }); return(newSound); }