Ejemplo n.º 1
0
        public void FindSpeechTypeVariation(WAVSound sound)
        // , int channel, double frameDuration, double frameShift,
        //                      double lowPassCutoffFrequency, double lowPassRatioThreshold, double energyThreshold, double silenceThreshold)
        {
            WAVFrameSet frameSet = new WAVFrameSet(sound, frameDuration, frameShift);

            speechTypeSpecification = new SpeechTypeSpecification();
            double time = 0;

            for (int ii = 0; ii < frameSet.FrameList.Count; ii++)
            {
                WAVSound   frame      = frameSet.FrameList[ii];
                SpeechType speechType = this.GetFrameSpeechType(frame); //
                // SpeechType speechType = this.GetFrameSpeechType(frame, channel, lowPassCutoffFrequency, lowPassRatioThreshold, energyThreshold, silenceThreshold);
                time = frameSet.StartTimeList[ii] + frameDuration / 2;  // The speech type is assigned to the center of the frame
                speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(time, speechType));
            }
            // Finally, to make sure that the speech type can be interpolated over the entire sound, set the
            // end values:
            SpeechType firstSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList[0].Item2;

            speechTypeSpecification.TimeSpeechTypeTupleList.Insert(0, new Tuple <double, SpeechType>(0, firstSpeechType));
            SpeechType lastSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item2;
            double     duration       = sound.GetDuration();

            if (speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item1 < duration) // Will ALMOST always be the case, unless the duration is an exact multiple of the frame shift
            {
                speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(duration, lastSpeechType));
            }
            for (int jj = 0; jj < numberOfAdjustmentSteps; jj++)
            {
                Adjust();
            }
        }
Ejemplo n.º 2
0
        public WAVSound Modify(WAVSound sound, double relativeStartPitch, double relativeEndPitch, Boolean adjustDuration, double relativeDuration)
        {
            // First, find the speech type variation:
            //  speechTypeEstimator = new SpeechTypeEstimator();
            speechTypeEstimator.FindSpeechTypeVariation(sound);

            /*    speechTypeEstimator.FindSpeechTypeVariation(sound, 0, frameDuration, frameShift, speechTypeLowPassCutoffFrequency, speechTypeLowPassRatioThreshold,
             *      speechTypeEnergyThreshold, speechTypeSilenceThreshold);
             *  speechTypeEstimator.Adjust(3);
             *  speechTypeEstimator.Adjust(3); // repeat the adjustment to remove double errors.  */
            SpeechTypeSpecification speechTypeSpecification = speechTypeEstimator.SpeechTypeSpecification;

            // Next, find the pitch periods:
            PitchPeriodEstimator pitchPeriodEstimator = new PitchPeriodEstimator();

            pitchPeriodEstimator.ComputePitchPeriods(sound, 0.0, sound.GetDuration()); //, minimumPitchPeriod, maximumPitchPeriod, frameShift); // 0.0120, 0.01, 0.03);
            pitchPeriodEstimator.AdjustAndInterpolate(speechTypeSpecification);        //, pitchPeriodDeltaTime, setUnvoicedPitch); // 0.005, true);
            PitchPeriodSpecification pitchPeriodSpecification = pitchPeriodEstimator.PitchPeriodSpecification;

            // Then, find the pitch marks:
            pitchMarkEstimator = new PitchMarkEstimator();
            pitchMarkEstimator.FindPitchMarks(sound, speechTypeSpecification, pitchPeriodSpecification); // , 0.0025, 0.0025, 0.45, 0.002);
            List <double> pitchMarkTimeList = pitchMarkEstimator.PitchMarkTimeList;

            // Then, change the pitch of the sound
            double   originalDuration       = sound.GetDuration();
            double   desiredDuration        = originalDuration * relativeDuration;
            double   actualRelativeDuration = relativeDuration; // Valid if the pitch is unchanged ...
            WAVSound pitchChangedSound;

            if ((Math.Abs(relativeStartPitch - 1) > double.Epsilon) || (Math.Abs(relativeEndPitch - 1) > double.Epsilon)) // To save some time, if only duration is to be changed..
            {
                pitchChangedSound = ChangePitch(sound, pitchMarkTimeList, relativeStartPitch, relativeEndPitch);
                // The pitch change also changes the duration of the sound:
                double newDuration = pitchChangedSound.GetDuration();
                actualRelativeDuration = desiredDuration / newDuration; // ...but if the pitch is changed, the duration changes too.
            }
            else
            {
                pitchChangedSound         = sound;             // No copying needed here, a reference is sufficient.
                modifiedPitchMarkTimeList = pitchMarkTimeList; // No pitch change => use original pitch marks.
            }

            // If the adjustDuration is true, change the duration, using the stored pitchmark time list (to avoid repeating the three steps above):
            if (adjustDuration)
            {
                WAVSound durationChangedSound = ChangeDuration(pitchChangedSound, modifiedPitchMarkTimeList, actualRelativeDuration);
                return(durationChangedSound);
            }
            else
            {
                return(pitchChangedSound);
            }
        }
Ejemplo n.º 3
0
        public void AdjustAndInterpolate(SpeechTypeSpecification speechTypeSpecification) // , double deltaTime, Boolean setUnvoicedPitch)
        {
            // Carry out median filtering to remove single errors
            List <double> correctedPitchValues = new List <double>();

            correctedPitchValues.Add(pitchPeriodSpecification.TimePitchPeriodTupleList[0].Item2);
            for (int ii = 1; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count - 1; ii++)
            {
                List <double> rawPitchValues = new List <double>()
                {
                    pitchPeriodSpecification.TimePitchPeriodTupleList[ii - 1].Item2,
                    pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item2,
                    pitchPeriodSpecification.TimePitchPeriodTupleList[ii + 1].Item2
                };
                rawPitchValues.Sort();
                correctedPitchValues.Add(rawPitchValues[1]); // Median
            }
            // Finally adjust the end points (which are not touched by the initial median filtering)
            if (pitchPeriodSpecification.TimePitchPeriodTupleList.Count > 2)
            {
                List <double> rawPitchValues = new List <double>()
                {
                    pitchPeriodSpecification.TimePitchPeriodTupleList[0].Item2,
                    pitchPeriodSpecification.TimePitchPeriodTupleList[1].Item2,
                    pitchPeriodSpecification.TimePitchPeriodTupleList[2].Item2
                };
                rawPitchValues.Sort();
                correctedPitchValues[0] = rawPitchValues[1];
                int lastIndex = pitchPeriodSpecification.TimePitchPeriodTupleList.Count - 1;
                rawPitchValues = new List <double>()
                {
                    pitchPeriodSpecification.TimePitchPeriodTupleList[lastIndex].Item2,
                    pitchPeriodSpecification.TimePitchPeriodTupleList[lastIndex - 1].Item2,
                    pitchPeriodSpecification.TimePitchPeriodTupleList[lastIndex - 2].Item2
                };
                rawPitchValues.Sort();
                correctedPitchValues.Add(rawPitchValues[1]);
            }
            for (int ii = 0; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count - 1; ii++)
            {
                pitchPeriodSpecification.TimePitchPeriodTupleList[ii] =
                    new Tuple <double, double>(pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item1,
                                               correctedPitchValues[ii]);
            }
            // Extend (extrapolate) the pitch period specification so that it runs to the end of the sound:
            double lastTime       = speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item1;
            int    lastPitchIndex = pitchPeriodSpecification.TimePitchPeriodTupleList.Count - 1;
            double lastPitchTime  = pitchPeriodSpecification.TimePitchPeriodTupleList[lastPitchIndex].Item1;

            if (lastTime > lastPitchTime) // Should always be the case, but just to be sure ...
            {
                double lastPitch = pitchPeriodSpecification.TimePitchPeriodTupleList[lastPitchIndex].Item2;
                pitchPeriodSpecification.TimePitchPeriodTupleList.Add(new Tuple <double, double>(lastTime, lastPitch));
            }

            // Next, resample (upsample) the pitch period specification
            List <double> timeList  = new List <double>();
            List <double> pitchList = new List <double>();

            for (int ii = 0; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count; ii++)
            {
                double time  = pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item1;
                double pitch = pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item2;
                timeList.Add(time);
                pitchList.Add(pitch);
            }
            List <List <double> > timePitchList = new List <List <double> >()
            {
                timeList, pitchList
            };
            int numberOfPoints = (int)Math.Round(lastTime / deltaTime);
            List <List <double> > interpolatedTimePitchList = LinearInterpolation.Interpolate(timePitchList, numberOfPoints);

            pitchPeriodSpecification = new PitchPeriodSpecification();
            for (int ii = 0; ii < interpolatedTimePitchList[0].Count; ii++)
            {
                double time  = interpolatedTimePitchList[0][ii];
                double pitch = interpolatedTimePitchList[1][ii];
                pitchPeriodSpecification.TimePitchPeriodTupleList.Add(new Tuple <double, double>(time, pitch));
            }

            // Optionally (usually true) hard-set the (anyway rather arbitrary) pitch period for
            // unvoiced parts of the sound, by extending the pitch period from surrounding
            // voiced parts. This might cause occasional jumps (in the middle of an unvoiced
            // section), but those jumps are reoved in the subsequent lowpass filtering

            if (setUnvoicedPitch)
            {
                double     previousTime       = pitchPeriodSpecification.TimePitchPeriodTupleList[0].Item1;
                SpeechType previousSpeechType = speechTypeSpecification.GetSpeechType(previousTime);
                int        firstChangeIndex   = 0; // Will be changed later - must initialize here.
                int        lastChangeIndex    = -1;
                double     previousPitch;          // Must define here for use after the loop as well.
                for (int ii = 1; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count; ii++)
                {
                    double     time       = pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item1;
                    SpeechType speechType = speechTypeSpecification.GetSpeechType(time);
                    if ((previousSpeechType == SpeechType.Voiced) && (speechType != SpeechType.Voiced))
                    {
                        firstChangeIndex = ii;
                        lastChangeIndex  = -1; // Not yet assigned. The value -1 is used for handling cases where the
                                               // sound remains not voiced until the end (see below).
                    }
                    else if ((previousSpeechType != SpeechType.Voiced) && (speechType == SpeechType.Voiced))
                    {
                        lastChangeIndex = ii - 1;
                        int middlexIndex = (firstChangeIndex + lastChangeIndex) / 2; // integer division
                                                                                     // assign the preceding pitch to the first half of the interval (unless firstChangeIndex = 0, meaning
                                                                                     // that the sound started with an unvoiced segment), and the  subsequent pitch to the second half of
                                                                                     // the interval:
                        double subsequentPitch = pitchPeriodSpecification.TimePitchPeriodTupleList[lastChangeIndex].Item2;
                        previousPitch = subsequentPitch;
                        if (firstChangeIndex > 0)
                        {
                            previousPitch = pitchPeriodSpecification.TimePitchPeriodTupleList[firstChangeIndex - 1].Item2;
                        }
                        for (int jj = firstChangeIndex; jj < middlexIndex; jj++)
                        {
                            time = pitchPeriodSpecification.TimePitchPeriodTupleList[jj].Item1;
                            pitchPeriodSpecification.TimePitchPeriodTupleList[jj] = new Tuple <double, double>(time, previousPitch);
                        }
                        for (int jj = middlexIndex; jj <= lastChangeIndex; jj++)
                        {
                            time = pitchPeriodSpecification.TimePitchPeriodTupleList[jj].Item1;
                            pitchPeriodSpecification.TimePitchPeriodTupleList[jj] = new Tuple <double, double>(time, subsequentPitch);
                        }
                    }
                    previousTime       = time;
                    previousSpeechType = speechType;
                }
                // At the end, if lastChangeIndex = -1, then the sound remained not voiced from the latest
                // change until the end. Thus:
                if ((lastChangeIndex == -1) && (firstChangeIndex > 0))
                {
                    previousPitch = pitchPeriodSpecification.TimePitchPeriodTupleList[firstChangeIndex - 1].Item2;
                    for (int jj = firstChangeIndex; jj < pitchPeriodSpecification.TimePitchPeriodTupleList.Count; jj++)
                    {
                        double time = pitchPeriodSpecification.TimePitchPeriodTupleList[jj].Item1;
                        pitchPeriodSpecification.TimePitchPeriodTupleList[jj] = new Tuple <double, double>(time, previousPitch);
                    }
                }

                // Then, finally, low-pass filter the interpolated list, and assign the result:
                AveragingFilter averagingFilter = new AveragingFilter();
                List <double>   inputList       = new List <double>();
                for (int ii = 0; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count; ii++)
                {
                    double input = pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item2;
                    inputList.Add(input);
                }
                List <double> outputList = averagingFilter.Run(inputList);
                for (int ii = 0; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count; ii++)
                {
                    double filteredPitch = outputList[ii];
                    double time          = pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item1;
                    pitchPeriodSpecification.TimePitchPeriodTupleList[ii] = new Tuple <double, double>(time, filteredPitch);
                }

                /*     FirstOrderLowPassFilter lowPassFilter = new FirstOrderLowPassFilter();
                 *   lowPassFilter.SetAlpha(0.9); // To do: Parameterize.
                 *   for (int ii = 0; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count; ii++)
                 *   {
                 *       double input = pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item2;
                 *       lowPassFilter.Step(input);
                 *   }
                 *   for (int ii = 0; ii < pitchPeriodSpecification.TimePitchPeriodTupleList.Count; ii++)
                 *   {
                 *       double filteredPitch = lowPassFilter.OutputList[ii];
                 *       double time = pitchPeriodSpecification.TimePitchPeriodTupleList[ii].Item1;
                 *       pitchPeriodSpecification.TimePitchPeriodTupleList[ii] = new Tuple<double, double>(time, filteredPitch);
                 *   }  */
            }
        }
        public void FindPitchMarks(WAVSound sound, SpeechTypeSpecification speechTypeSpecification, PitchPeriodSpecification pitchPeriodSpecification)
        //  , double peakSearchTimeRange,
        //  double adjustmentTimeRange, double relativePeakThreshold, double energyComputationTimeRange)
        {
            List <Tuple <int, int, SpeechType> > segmentTypeList = speechTypeSpecification.GetSegmentTypes();
            List <int> absoluteSampleList = sound.GetAbsoluteSamples(0);

            pitchMarkTimeList = new List <double>();
            for (int iSegment = 0; iSegment < segmentTypeList.Count; iSegment++)
            {
                SpeechType segmentType = segmentTypeList[iSegment].Item3;
                if (segmentType == SpeechType.Voiced)
                {
                    int    startIndex             = segmentTypeList[iSegment].Item1;
                    int    endIndex               = segmentTypeList[iSegment].Item2;
                    double startTime              = speechTypeSpecification.TimeSpeechTypeTupleList[startIndex].Item1;
                    double endTime                = speechTypeSpecification.TimeSpeechTypeTupleList[endIndex].Item1;
                    int    startSearchIndex       = sound.GetSampleIndexAtTime(startTime);
                    int    endSearchIndex         = sound.GetSampleIndexAtTime(endTime);
                    int    peakIndexSearchRange   = (int)Math.Round(peakSearchTimeRange * sound.SampleRate);
                    int    adjustmentIndexRange   = (int)Math.Round(adjustmentTimeRange * sound.SampleRate);
                    int    indexOfAbsoluteMaximum = sound.GetIndexOfAbsoluteMaximum(startSearchIndex, endSearchIndex);

                    int    adjustedMainPitchMarkIndex = AdjustPitchMark(sound, indexOfAbsoluteMaximum, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange);
                    double adjustedMainPitchMarkTime  = sound.GetTimeAtSampleIndex(adjustedMainPitchMarkIndex);
                    pitchMarkTimeList.Add(adjustedMainPitchMarkTime);


                    Boolean inVoicedSegment       = true;
                    double  previousPitchMarkTime = adjustedMainPitchMarkTime;

                    // Next, move forward until the end of the voiced segment
                    while (inVoicedSegment)
                    {
                        double pitchPeriod         = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime);
                        int    deltaSample         = (int)Math.Round(pitchPeriod * sound.SampleRate);
                        int    previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime);
                        int    pitchSampleIndex    = previousSampleIndex + deltaSample;
                        if (pitchSampleIndex + 2 * peakIndexSearchRange >= sound.Samples[0].Count)
                        {
                            break;
                        }
                        int    currentSampleIndex  = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange);
                        double currentTime         = sound.GetTimeAtSampleIndex(currentSampleIndex);
                        int    adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); //, relativePeakThreshold, energyComputationTimeRange);
                        if (adjustedSampleIndex <= previousSampleIndex)
                        {
                            adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range)
                        }
                        double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex);
                        // Make an incursion into the non-voiced segment
                        pitchMarkTimeList.Add(adjustedTime);
                        previousPitchMarkTime = adjustedTime;
                        if (speechTypeSpecification.GetSpeechType(currentTime) != SpeechType.Voiced)
                        {
                            inVoicedSegment = false;
                        }

                        /*       if (speechTypeSpecification.GetSpeechType(currentTime) == SpeechType.Voiced)
                         *     {
                         *         pitchMarkTimeList.Add(adjustedTime);
                         *         previousPitchMarkTime = adjustedTime;
                         *     }
                         *     else { inVoicedSegment = false; }  */
                    }
                    double voicedEndTime = pitchMarkTimeList.Last();
                    // Then continue half-way through any non-voiced segment followed by another voiced segment,
                    // or until the end of the sound if no voiced segment follows:
                    if (iSegment < segmentTypeList.Count)
                    {
                        double  subsequenceVoicedSegmentStartTime = 0;
                        Boolean hasSubsequentVoicedSegment        = false;
                        if (iSegment + 1 < segmentTypeList.Count)
                        {
                            for (int kk = iSegment + 1; kk < segmentTypeList.Count; kk++)
                            {
                                if (segmentTypeList[kk].Item3 == SpeechType.Voiced)
                                {
                                    hasSubsequentVoicedSegment = true;
                                    int startSegmentIndex = segmentTypeList[kk].Item1;
                                    subsequenceVoicedSegmentStartTime = speechTypeSpecification.TimeSpeechTypeTupleList[startSegmentIndex].Item1;
                                    break;
                                }
                            }
                        }
                        if (!hasSubsequentVoicedSegment)
                        {
                            // No following voiced segment: Just continue to the end
                            Boolean endReached = false;
                            while (!endReached)
                            {
                                double pitchPeriod         = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime);
                                int    deltaSample         = (int)Math.Round(pitchPeriod * sound.SampleRate);
                                int    previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime);
                                int    pitchSampleIndex    = previousSampleIndex + deltaSample;
                                if (pitchSampleIndex + 2 * peakIndexSearchRange >= sound.Samples[0].Count)
                                {
                                    endReached = true;
                                    break;
                                }
                                int    currentSampleIndex  = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange);
                                double currentTime         = sound.GetTimeAtSampleIndex(currentSampleIndex);
                                int    adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); //, relativePeakThreshold, energyComputationTimeRange);
                                if (adjustedSampleIndex <= previousSampleIndex)
                                {
                                    adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range)
                                }
                                double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex);
                                pitchMarkTimeList.Add(adjustedTime);
                                previousPitchMarkTime = adjustedTime;
                            }
                        }
                        else  // Proceed to the half-way mark of the interval from the end of the current voice segment to the beginning of the next.
                        {
                            double  stopTime      = voicedEndTime + (subsequenceVoicedSegmentStartTime - voicedEndTime) / 2;
                            int     stopTimeIndex = sound.GetSampleIndexAtTime(stopTime);
                            Boolean endReached    = false;
                            while (!endReached)
                            {
                                double pitchPeriod         = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime);
                                int    deltaSample         = (int)Math.Round(pitchPeriod * sound.SampleRate);
                                int    previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime);
                                int    pitchSampleIndex    = previousSampleIndex + deltaSample;
                                if (pitchSampleIndex + 2 * peakIndexSearchRange >= stopTimeIndex)
                                {
                                    endReached = true;
                                    break;
                                }
                                int    currentSampleIndex  = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange);
                                double currentTime         = sound.GetTimeAtSampleIndex(currentSampleIndex);
                                int    adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange);
                                if (adjustedSampleIndex <= previousSampleIndex)
                                {
                                    adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range)
                                }
                                double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex);
                                pitchMarkTimeList.Add(adjustedTime);
                                previousPitchMarkTime = adjustedTime;
                            }
                        }
                    }

                    // Then move backward until the beginning of the voiced segment
                    inVoicedSegment       = true;
                    previousPitchMarkTime = adjustedMainPitchMarkTime;
                    double voicedStartTime = 0;
                    while (inVoicedSegment)
                    {
                        double pitch               = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime);
                        int    deltaSample         = -(int)Math.Round(pitch * sound.SampleRate);
                        int    previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime);
                        int    pitchSampleIndex    = previousSampleIndex + deltaSample;
                        if (pitchSampleIndex - 2 * peakIndexSearchRange < 0)
                        {
                            break;
                        }
                        int    currentSampleIndex  = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange);
                        double currentTime         = sound.GetTimeAtSampleIndex(currentSampleIndex);
                        int    adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, peakIndexSearchRange); // , relativePeakThreshold, energyComputationTimeRange);
                        if (adjustedSampleIndex <= previousSampleIndex)
                        {
                            adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range)
                        }
                        double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex);
                        // Make an incursion into the non-voiced segment
                        pitchMarkTimeList.Add(adjustedTime);
                        previousPitchMarkTime = adjustedTime;
                        if (speechTypeSpecification.GetSpeechType(currentTime) != SpeechType.Voiced)
                        {
                            inVoicedSegment = false;
                            voicedStartTime = adjustedTime;
                        }
                    }

                    // Then continue half-way through any non-voiced segment preceded by another voiced segment,
                    // or until the beginning of the sound if no voiced segment follows:
                    if (iSegment > 0)
                    {
                        double  priorVoicedSegmentEndTime = 0;
                        Boolean hasPriorVoicedSegment     = false;
                        if (iSegment - 1 > 0)
                        {
                            for (int kk = iSegment - 1; kk >= 0; kk--)
                            {
                                if (segmentTypeList[kk].Item3 == SpeechType.Voiced)
                                {
                                    hasPriorVoicedSegment = true;
                                    int endSegmentIndex = segmentTypeList[kk].Item2;
                                    priorVoicedSegmentEndTime = speechTypeSpecification.TimeSpeechTypeTupleList[endSegmentIndex].Item1;
                                    break;
                                }
                            }
                        }
                        if (!hasPriorVoicedSegment)
                        {
                            // No following voiced segment: Just continue to the end
                            Boolean endReached = false;
                            while (!endReached)
                            {
                                double pitchPeriod         = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime);
                                int    deltaSample         = -(int)Math.Round(pitchPeriod * sound.SampleRate);
                                int    previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime);
                                int    pitchSampleIndex    = previousSampleIndex + deltaSample;
                                if (pitchSampleIndex - 2 * peakIndexSearchRange < 0)
                                {
                                    endReached = true;
                                    break;
                                }
                                int    currentSampleIndex  = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange);
                                double currentTime         = sound.GetTimeAtSampleIndex(currentSampleIndex);
                                int    adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange);
                                if (adjustedSampleIndex <= previousSampleIndex)
                                {
                                    adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range)
                                }
                                double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex);
                                pitchMarkTimeList.Add(adjustedTime);
                                previousPitchMarkTime = adjustedTime;
                            }
                        }
                        else  // Proceed to the half-way mark of the interval from the end of the current voice segment to the beginning of the next.
                        {
                            double  stopTime      = voicedStartTime - (voicedStartTime - priorVoicedSegmentEndTime) / 2;
                            int     stopTimeIndex = sound.GetSampleIndexAtTime(stopTime);
                            Boolean endReached    = false;
                            while (!endReached)
                            {
                                double pitchPeriod         = pitchPeriodSpecification.GetPitchPeriod(previousPitchMarkTime);
                                int    deltaSample         = -(int)Math.Round(pitchPeriod * sound.SampleRate);
                                int    previousSampleIndex = sound.GetSampleIndexAtTime(previousPitchMarkTime);
                                int    pitchSampleIndex    = previousSampleIndex + deltaSample;
                                if (pitchSampleIndex - 2 * peakIndexSearchRange <= stopTimeIndex)
                                {
                                    endReached = true;
                                    break;
                                }
                                int    currentSampleIndex  = sound.GetIndexOfAbsoluteMaximum(pitchSampleIndex - peakIndexSearchRange, pitchSampleIndex + peakIndexSearchRange);
                                double currentTime         = sound.GetTimeAtSampleIndex(currentSampleIndex);
                                int    adjustedSampleIndex = AdjustPitchMark(sound, currentSampleIndex, adjustmentIndexRange); // , relativePeakThreshold, energyComputationTimeRange);
                                if (adjustedSampleIndex <= previousSampleIndex)
                                {
                                    adjustedSampleIndex = currentSampleIndex; // Emergency fallback in cases where the search gets stuck (can happen if the pitch period is too small relative to the search range)
                                }
                                double adjustedTime = sound.GetTimeAtSampleIndex(adjustedSampleIndex);
                                pitchMarkTimeList.Add(adjustedTime);
                                previousPitchMarkTime = adjustedTime;
                            }
                        }
                    }
                }
            }
            pitchMarkTimeList.Sort();

            // Finally, remove any pitch marks that are too close (should only happen in non-voiced segments)
            double minimumPitchPeriod = pitchPeriodSpecification.GetMinimumPitchPeriod();
            int    index = 1;

            while (index < pitchMarkTimeList.Count)
            {
                double     previousTime       = pitchMarkTimeList[index - 1];
                double     currentTime        = pitchMarkTimeList[index];
                SpeechType previousSpeechType = speechTypeSpecification.GetSpeechType(previousTime);
                SpeechType currentSpeechType  = speechTypeSpecification.GetSpeechType(currentTime);
                if ((previousSpeechType != SpeechType.Voiced) && (currentSpeechType != SpeechType.Voiced))
                {
                    double deltaTime = currentTime - previousTime;
                    if (deltaTime < MINIMUM_UNVOICED_PITCHMARK_SPACING)
                    {
                        pitchMarkTimeList.RemoveAt(index);
                    }
                    else
                    {
                        index++;
                    }
                }
                else
                {
                    index++;
                }
            }
        }
Ejemplo n.º 5
0
        private void Adjust() // int adjustmentMinimumIndexDuration)
        {
            if (speechTypeSpecification.TimeSpeechTypeTupleList.Count == 0)
            {
                return;
            }

            // Divide the speech types into segments

            int index = 0;
            List <SpeechTypeSpecification> segmentSpecificationList    = new List <SpeechTypeSpecification>();
            SpeechTypeSpecification        currentSegmentSpecification = new SpeechTypeSpecification();
            SpeechType currentSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList[index].Item2;
            double     currentTime       = speechTypeSpecification.TimeSpeechTypeTupleList[index].Item1;

            currentSegmentSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(currentTime, currentSpeechType));
            while (index < speechTypeSpecification.TimeSpeechTypeTupleList.Count)
            {
                index++;
                if (index >= speechTypeSpecification.TimeSpeechTypeTupleList.Count)
                {
                    break;
                }
                SpeechType speechType = speechTypeSpecification.TimeSpeechTypeTupleList[index].Item2;
                double     time       = speechTypeSpecification.TimeSpeechTypeTupleList[index].Item1;
                if (speechType == currentSpeechType)
                {
                    currentSegmentSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(time, speechType));
                }
                else
                {
                    segmentSpecificationList.Add(currentSegmentSpecification);
                    currentSegmentSpecification = new SpeechTypeSpecification();
                    currentSpeechType           = speechTypeSpecification.TimeSpeechTypeTupleList[index].Item2;
                    currentTime = speechTypeSpecification.TimeSpeechTypeTupleList[index].Item1;
                    currentSegmentSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(currentTime, currentSpeechType));
                }
            }

            // Adjustment:

            if (currentSegmentSpecification.TimeSpeechTypeTupleList.Count > 0)  // Make sure to add the last segment
            {
                segmentSpecificationList.Add(currentSegmentSpecification);
            }
            if (segmentSpecificationList.Count == 2)
            {
                SpeechTypeSpecification firstSegmentSpecification  = segmentSpecificationList[0];
                SpeechTypeSpecification secondSegmentSpecification = segmentSpecificationList[1];
                if ((firstSegmentSpecification.TimeSpeechTypeTupleList.Count < adjustmentMinimumIndexDuration) &&
                    (secondSegmentSpecification.TimeSpeechTypeTupleList.Count >= adjustmentMinimumIndexDuration))
                {
                    SpeechType speechType = secondSegmentSpecification.TimeSpeechTypeTupleList[0].Item2;
                    firstSegmentSpecification.SetUniformSpeechType(speechType);
                }
                else if ((firstSegmentSpecification.TimeSpeechTypeTupleList.Count >= adjustmentMinimumIndexDuration) &&
                         (secondSegmentSpecification.TimeSpeechTypeTupleList.Count < adjustmentMinimumIndexDuration))
                {
                    SpeechType speechType = firstSegmentSpecification.TimeSpeechTypeTupleList[0].Item2;
                    secondSegmentSpecification.SetUniformSpeechType(speechType);
                }
            }
            else if (segmentSpecificationList.Count > 2)
            {
                // Now remove any segments shorter than the required minimum length
                SpeechTypeSpecification firstSegmentSpecification  = segmentSpecificationList[0];
                SpeechTypeSpecification secondSegmentSpecification = segmentSpecificationList[1];
                if ((firstSegmentSpecification.TimeSpeechTypeTupleList.Count < adjustmentMinimumIndexDuration) &&
                    (secondSegmentSpecification.TimeSpeechTypeTupleList.Count >= adjustmentMinimumIndexDuration))
                {
                    SpeechType speechType = secondSegmentSpecification.TimeSpeechTypeTupleList[0].Item2;
                    firstSegmentSpecification.SetUniformSpeechType(speechType);
                }

                for (int ii = 1; ii < segmentSpecificationList.Count - 1; ii++)
                {
                    SpeechTypeSpecification segmentSpecification = segmentSpecificationList[ii];
                    if (segmentSpecification.TimeSpeechTypeTupleList.Count < adjustmentMinimumIndexDuration)
                    {
                        SpeechTypeSpecification previousSegmentSpecification = segmentSpecificationList[ii - 1];
                        SpeechTypeSpecification nextSegmentSpecification     = segmentSpecificationList[ii + 1];
                        if (previousSegmentSpecification.TimeSpeechTypeTupleList.Count > nextSegmentSpecification.TimeSpeechTypeTupleList.Count)
                        {
                            SpeechType previousSpeechType = previousSegmentSpecification.TimeSpeechTypeTupleList[0].Item2;
                            segmentSpecification.SetUniformSpeechType(previousSpeechType);
                        }
                        else
                        {
                            SpeechType nextSpeechType = nextSegmentSpecification.TimeSpeechTypeTupleList[0].Item2;
                            segmentSpecification.SetUniformSpeechType(nextSpeechType);
                        }
                    }
                }

                SpeechTypeSpecification lastSegmentSpecification        = segmentSpecificationList.Last();
                SpeechTypeSpecification penultimateSegmentSpecification = segmentSpecificationList[segmentSpecificationList.Count - 2];
                if ((lastSegmentSpecification.TimeSpeechTypeTupleList.Count < adjustmentMinimumIndexDuration) &&
                    (penultimateSegmentSpecification.TimeSpeechTypeTupleList.Count >= adjustmentMinimumIndexDuration))
                {
                    SpeechType speechType = penultimateSegmentSpecification.TimeSpeechTypeTupleList[0].Item2;
                    lastSegmentSpecification.SetUniformSpeechType(speechType);
                }
            }

            // Finally, assign the modified speech types to the overall speech type specification:
            index = 0;
            foreach (SpeechTypeSpecification segmentSpecification in segmentSpecificationList)
            {
                for (int jj = 0; jj < segmentSpecification.TimeSpeechTypeTupleList.Count; jj++)
                {
                    SpeechType speechType = segmentSpecification.TimeSpeechTypeTupleList[jj].Item2;
                    speechTypeSpecification.SetSpeechType(index, speechType);
                    index++;
                }
            }
        }