Exemple #1
0
        public WAVSound Modify(WAVSound sound, double relativeStartPitch, double relativeEndPitch, Boolean adjustDuration, double relativeDuration)
        {
            // First, find the speech type variation:
            //  speechTypeEstimator = new SpeechTypeEstimator();
            speechTypeEstimator.FindSpeechTypeVariation(sound);

            /*    speechTypeEstimator.FindSpeechTypeVariation(sound, 0, frameDuration, frameShift, speechTypeLowPassCutoffFrequency, speechTypeLowPassRatioThreshold,
             *      speechTypeEnergyThreshold, speechTypeSilenceThreshold);
             *  speechTypeEstimator.Adjust(3);
             *  speechTypeEstimator.Adjust(3); // repeat the adjustment to remove double errors.  */
            SpeechTypeSpecification speechTypeSpecification = speechTypeEstimator.SpeechTypeSpecification;

            // Next, find the pitch periods:
            PitchPeriodEstimator pitchPeriodEstimator = new PitchPeriodEstimator();

            pitchPeriodEstimator.ComputePitchPeriods(sound, 0.0, sound.GetDuration()); //, minimumPitchPeriod, maximumPitchPeriod, frameShift); // 0.0120, 0.01, 0.03);
            pitchPeriodEstimator.AdjustAndInterpolate(speechTypeSpecification);        //, pitchPeriodDeltaTime, setUnvoicedPitch); // 0.005, true);
            PitchPeriodSpecification pitchPeriodSpecification = pitchPeriodEstimator.PitchPeriodSpecification;

            // Then, find the pitch marks:
            pitchMarkEstimator = new PitchMarkEstimator();
            pitchMarkEstimator.FindPitchMarks(sound, speechTypeSpecification, pitchPeriodSpecification); // , 0.0025, 0.0025, 0.45, 0.002);
            List <double> pitchMarkTimeList = pitchMarkEstimator.PitchMarkTimeList;

            // Then, change the pitch of the sound
            double   originalDuration       = sound.GetDuration();
            double   desiredDuration        = originalDuration * relativeDuration;
            double   actualRelativeDuration = relativeDuration; // Valid if the pitch is unchanged ...
            WAVSound pitchChangedSound;

            if ((Math.Abs(relativeStartPitch - 1) > double.Epsilon) || (Math.Abs(relativeEndPitch - 1) > double.Epsilon)) // To save some time, if only duration is to be changed..
            {
                pitchChangedSound = ChangePitch(sound, pitchMarkTimeList, relativeStartPitch, relativeEndPitch);
                // The pitch change also changes the duration of the sound:
                double newDuration = pitchChangedSound.GetDuration();
                actualRelativeDuration = desiredDuration / newDuration; // ...but if the pitch is changed, the duration changes too.
            }
            else
            {
                pitchChangedSound         = sound;             // No copying needed here, a reference is sufficient.
                modifiedPitchMarkTimeList = pitchMarkTimeList; // No pitch change => use original pitch marks.
            }

            // If the adjustDuration is true, change the duration, using the stored pitchmark time list (to avoid repeating the three steps above):
            if (adjustDuration)
            {
                WAVSound durationChangedSound = ChangeDuration(pitchChangedSound, modifiedPitchMarkTimeList, actualRelativeDuration);
                return(durationChangedSound);
            }
            else
            {
                return(pitchChangedSound);
            }
        }
        public void SetSound(WAVSound sound)
        {
            this.sound = sound;
            if (sound == null)
            {
                return;
            }                              // 20160912
            if (soundSequenceList == null)
            {
                soundSequenceList = new List <WAVSound>();
            }
            soundSequenceList.Add(this.sound.Copy());
            this.xMin        = 0;
            this.xMax        = (float)sound.GetDuration();
            scrollbarVisible = false;
            SetRange(xMin, xMax, MINIMUM_SAMPLE_VALUE, MAXIMUM_SAMPLE_VALUE);
            OnViewingAreaChanged();
            horizontalTickMarkList = new List <float>();
            float tickMarkPosition = (float)xMin;

            while (tickMarkPosition <= xMax)
            {
                horizontalTickMarkList.Add(tickMarkPosition);
                tickMarkPosition += tickMarkSpacing;
            }
            OnAssignedSoundChanged();
            Invalidate();
        }
Exemple #3
0
        public void SetSound(WAVSound sound)
        {
            this.sound = sound;
            if (sound == null)
            {
                Refresh();
                return;
            }
            SetRange(0, sound.GetDuration(), -32768, 32768);
            horizontalTickMarkList = new List <double>();
            if (!pitchPanelVisible)
            {
                soundPanelFraction = 1;
            }
            nominalTopPanelHeight = (int)Math.Round(soundPanelFraction * this.Height);
            soundPanelHeight      = nominalTopPanelHeight - dividerHeight;
            pitchPanelHeight      = this.Height - soundPanelHeight - dividerHeight;
            double tickMarkPosition = xMin;

            while (tickMarkPosition <= xMax)
            {
                horizontalTickMarkList.Add(tickMarkPosition);
                tickMarkPosition += tickMarkSpacing;
            }
            this.zoomLevel   = 1;
            scrollbarVisible = false;
            PlotSoundAndPitch();
        }
Exemple #4
0
        public void FindSpeechTypeVariation(WAVSound sound)
        // , int channel, double frameDuration, double frameShift,
        //                      double lowPassCutoffFrequency, double lowPassRatioThreshold, double energyThreshold, double silenceThreshold)
        {
            WAVFrameSet frameSet = new WAVFrameSet(sound, frameDuration, frameShift);

            speechTypeSpecification = new SpeechTypeSpecification();
            double time = 0;

            for (int ii = 0; ii < frameSet.FrameList.Count; ii++)
            {
                WAVSound   frame      = frameSet.FrameList[ii];
                SpeechType speechType = this.GetFrameSpeechType(frame); //
                // SpeechType speechType = this.GetFrameSpeechType(frame, channel, lowPassCutoffFrequency, lowPassRatioThreshold, energyThreshold, silenceThreshold);
                time = frameSet.StartTimeList[ii] + frameDuration / 2;  // The speech type is assigned to the center of the frame
                speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(time, speechType));
            }
            // Finally, to make sure that the speech type can be interpolated over the entire sound, set the
            // end values:
            SpeechType firstSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList[0].Item2;

            speechTypeSpecification.TimeSpeechTypeTupleList.Insert(0, new Tuple <double, SpeechType>(0, firstSpeechType));
            SpeechType lastSpeechType = speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item2;
            double     duration       = sound.GetDuration();

            if (speechTypeSpecification.TimeSpeechTypeTupleList.Last().Item1 < duration) // Will ALMOST always be the case, unless the duration is an exact multiple of the frame shift
            {
                speechTypeSpecification.TimeSpeechTypeTupleList.Add(new Tuple <double, SpeechType>(duration, lastSpeechType));
            }
            for (int jj = 0; jj < numberOfAdjustmentSteps; jj++)
            {
                Adjust();
            }
        }
 public void SetSound(WAVSound sound, List <double> pitchList)
 {
     this.sound     = sound;
     this.pitchList = pitchList;
     SetRange(0, sound.GetDuration(), 0, 1);
     PlotSoundAndPitch();
 }
        private void speakButton_Click(object sender, EventArgs e)
        {
            string sentence = sentenceTextBox.Text;

            if (sentence != "")
            {
                speechVisualizer.MarkerList = new List <SoundMarker>();
                speechVisualizer.SetPitchPeriodSpecification(null);

                string voiceName = voiceSelectionComboBox.SelectedItem.ToString();
                speechSynthesizer.SetOutputToWaveFile("./tmpOutput.wav", new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, AudioChannel.Mono));
                speechSynthesizer.Speak(sentence);
                speechSynthesizer.SetOutputToDefaultAudioDevice();
                speechSynthesizer.SelectVoice(voiceName);
                speechSynthesizer.Speak(sentence);
                currentSound = new WAVSound();
                currentSound.LoadFromFile("./tmpOutput.wav");

                double startTime = currentSound.GetFirstTimeAboveThreshold(0, 10, 20);
                double endTime   = currentSound.GetLastTimeAboveThreshold(0, 10, 20);
                currentSound = currentSound.Extract(startTime, endTime);
                speechVisualizer.SetRange(0, currentSound.GetDuration(), -32768, 32768);
                speechVisualizer.SetSound(currentSound);
                speechVisualizer.Invalidate();

                soundTypeIdentificationButton.Enabled = true;
                playSoundButton.Enabled            = true;
                modifySoundButton.Enabled          = true;
                saveSoundToolStripMenuItem.Enabled = true;
            }
        }
        private void modifySoundButton_Click(object sender, EventArgs e)
        {
            speechVisualizer.MarkerList = new List <SoundMarker>();

            speechModifier.TopFraction = double.Parse(topFractionTextBox.Text);
            double  relativeStartPitch = double.Parse(relativeStartPitchTextBox.Text);
            double  relativeEndPitch   = double.Parse(relativeEndPitchTextBox.Text);
            Boolean adjustDuration     = Boolean.Parse(adjustDurationComboBox.SelectedItem.ToString());
            double  relativeDuration   = double.Parse(relativeDurationTextBox.Text); // Only relevant if adjustDuration = true.

            WAVSound modifiedSound = speechModifier.Modify(currentSound, relativeStartPitch, relativeEndPitch, adjustDuration, relativeDuration);


            //   modifiedSound.MedianFilter(5);
            //  modifiedSound.LowPassFilter(1500);
            //  modifiedSound.SetMaximumNonClippingVolume();

            //   modifiedSound.SetMaximumNonClippingVolume();
            SoundPlayer soundPlayer = new SoundPlayer();

            modifiedSound.GenerateMemoryStream();
            modifiedSound.WAVMemoryStream.Position = 0; // Manually rewind stream
            soundPlayer.Stream = null;
            soundPlayer.Stream = modifiedSound.WAVMemoryStream;
            soundPlayer.PlaySync();
            speechVisualizer.SetRange(0, modifiedSound.GetDuration(), -32768, 32768);
            speechVisualizer.SetPitchPeriodSpecification(null);
            speechVisualizer.SetSound(modifiedSound);

            currentSound = modifiedSound.Copy();

            soundTypeIdentificationButton.Enabled = true;
            findPitchPeriodsButton.Enabled        = false;
            findPitchMarksButton.Enabled          = false;
        }
        private void findPitchPeriodsButton_Click(object sender, EventArgs e)
        {
            findPitchPeriodsButton.Enabled = false;
            PitchPeriodEstimator pitchPeriodEstimator = speechModifier.PitchPeriodEstimator;

            pitchPeriodEstimator.ComputePitchPeriods(currentSound, 0.0, currentSound.GetDuration()); //, 0.0050, 0.0120, 0.01);
            pitchPeriodEstimator.AdjustAndInterpolate(speechTypeSpecification);                      // , 0.005, true);
            speechVisualizer.SetPitchPeriodSpecification(pitchPeriodEstimator.PitchPeriodSpecification);
            pitchPeriodSpecification     = pitchPeriodEstimator.PitchPeriodSpecification;
            findPitchMarksButton.Enabled = true;
        }
Exemple #9
0
        public WAVFrameSet(WAVSound sound, double frameDuration, double frameShift)
        {
            double soundDuration = sound.GetDuration();

            this.frameDuration = frameDuration;
            this.frameShift    = frameShift;
            this.frameList     = new List <WAVSound>();
            int numberOfFrames = (int)Math.Truncate((soundDuration - frameDuration + frameShift) / frameShift);

            this.startTimeList = new List <double>();
            for (int ii = 0; ii < numberOfFrames; ii++)
            {
                double   startTime = ii * frameShift;
                double   endTime   = startTime + frameDuration;
                WAVSound frame     = sound.Extract(startTime, endTime);
                frameList.Add(frame);
                startTimeList.Add(startTime);
            }
        }
Exemple #10
0
        // First identifies the first index at which the data point is below the
        // threshold. Then finds the subsequent minimum.

        /*     public int FindFirstMinimum(List<double> dataList, double threshold)
         *   {
         *       int startIndex = dataList.FindIndex(d => d < threshold);
         *       if (startIndex < 0) { return -1; }
         *       else
         *       {
         *           double currentValue = dataList[startIndex];
         *           double minimum = dataList[startIndex];
         *           int minimumIndex = startIndex;
         *           int ii = minimumIndex + 1;
         *           while ((currentValue < threshold) && (ii < dataList.Count))
         *           {
         *               currentValue = dataList[ii];
         *               if (currentValue < minimum)
         *               {
         *                   minimum = currentValue;
         *                   minimumIndex = ii;
         *               }
         *               ii++;
         *           }
         *           return minimumIndex;
         *       }
         *   }  */


        public void ComputePitchPeriods(WAVSound sound, double startTime, double endTime)
        // , double minimumPitchPeriod, double maximumPitchPeriod,
        //                           double frameShift)
        {
            pitchPeriodSpecification = new PitchPeriodSpecification();
            double time          = startTime;
            double actualEndTime = endTime;
            double duration      = sound.GetDuration();

            // At least to maximim pitch periods are required for the analysis
            if (actualEndTime > (duration - 2 * maximumPitchPeriod))
            {
                actualEndTime = duration - 2 * maximumPitchPeriod;
            }
            while (time <= actualEndTime)
            {
                double pitchPeriod = ComputeFramePitchPeriod(sound, time); // , minimumPitchPeriod, maximumPitchPeriod); //, threshold);
                Tuple <double, double> timePitchPeriodTuple = new Tuple <double, double>(time, pitchPeriod);
                pitchPeriodSpecification.TimePitchPeriodTupleList.Add(timePitchPeriodTuple);
                time += frameShift;
            }
        }
 private void ShowSound(WAVSound sound)
 {
     ClearHistory();
     try  // This (using try-catch) is ugly, but appears to be necessary for some hardware configurations
     {
         double duration = sound.GetDuration();
         if (duration > viewingInterval)
         {
             WAVSound visibleSound = sound.Extract(duration - viewingInterval, duration);
             SetSound(visibleSound);
         }
         else
         {
             SetSound(sound);
         }
     }
     catch
     {
         // Nothing to do here..
     }
     displayBusy = false;
 }
        private void RunLoop()
        {
            Thread.Sleep(1);
            DateTime utteranceStartDateTime         = DateTime.Now;      // Just needed for initialization.
            DateTime utteranceEndDateTime           = DateTime.MinValue; // Just needed for initialization.
            DateTime previousUtteranceStartDateTime = DateTime.MinValue;
            DateTime previousUtteranceEndDateTime   = DateTime.MinValue;
            DateTime recordingStartDateTime;
            DateTime recordingEndDateTime;
            double   utteranceStartTime = 0; // In seconds, measured from the start of the current recording.  (=0 just for initialization).
            double   utteranceEndTime;

            while (running)
            {
                Thread.Sleep(millisecondRecordingInterval);
                byte[] soundData = wavRecorder.GetAllRecordedBytes(out recordingStartDateTime, out recordingEndDateTime);
                if (soundData != null)
                {
                    if (soundData.Length > 0)
                    {
                        WAVSound sound = new WAVSound("", wavRecorder.SampleRate, wavRecorder.NumberOfChannels, wavRecorder.NumberOfBitsPerSample);
                        sound.AppendSamplesAsBytes(soundData);
                        if (showSoundStream)
                        {
                            if (!displayBusy)
                            {
                                WAVSound soundToDisplay = sound.Copy(); // 20171207: Make a new copy here, since the code below may process the sound before visualization is completed.
                                if (InvokeRequired)
                                {
                                    this.BeginInvoke(new MethodInvoker(() => ShowSound(soundToDisplay)));
                                }
                                else
                                {
                                    ShowSound(soundToDisplay);
                                }
                            }
                        }

                        // Next, remove all parts of the sound that have already been recognized, if any:
                        if (previousUtteranceEndDateTime > recordingStartDateTime)
                        {
                            double extractionStartTime = (previousUtteranceEndDateTime - recordingStartDateTime).TotalSeconds;
                            double extractionEndTime   = sound.GetDuration();
                            sound = sound.Extract(extractionStartTime, extractionEndTime);

                            // Debug code, remove

                            /*   if (sound == null)  // Should not happen, unless the recognition thread is stopped using a breakpoint.
                             *  {
                             *
                             *  }  */
                        }

                        if (!inUtterance)
                        {
                            utteranceStartTime = sound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold);
                            if (utteranceStartTime > 0)
                            {
                                double duration   = sound.GetDuration();
                                double timeToEnd  = duration - utteranceStartTime;
                                long   ticksToEnd = TICKS_PER_SECOND * (long)(timeToEnd);
                                utteranceStartDateTime = recordingEndDateTime.Subtract(new TimeSpan(ticksToEnd));
                                if (utteranceStartDateTime > previousUtteranceEndDateTime) // True (by construction) the FIRST time.
                                {
                                    inUtterance = true;
                                    long utteranceStartTimeAsTicks = (long)(TICKS_PER_SECOND * utteranceStartTime);  // Corrected 20170907 (1000000 -> 10000000)
                                    utteranceStartDateTime = recordingStartDateTime.Add(new TimeSpan(utteranceStartTimeAsTicks));
                                }
                            }
                        }
                        else
                        {
                            double   duration              = sound.GetDuration();
                            WAVSound endOfSound            = sound.Extract(duration - detectionSilenceInterval, duration);
                            double   startTimeInEndOfSound = endOfSound.GetFirstTimeAboveThreshold(0, movingAverageLength, detectionThreshold);
                            if (startTimeInEndOfSound < 0)  //  <=> silence at the end of the sound
                            {
                                inUtterance                    = false;
                                utteranceEndDateTime           = recordingEndDateTime; // recordingStartDateTime.Add(new TimeSpan(utteranceEndTimeAsTicks));
                                previousUtteranceStartDateTime = utteranceStartDateTime;
                                previousUtteranceEndDateTime   = utteranceEndDateTime;
                                //    Monitor.Enter(recognitionLockObject);
                                if (!recognizerBusy)
                                {
                                    recognizerBusy = true;
                                    WAVSound soundToRecognize = sound.Extract(utteranceStartTime - extractionMargin, duration).Copy();
                                    //    Monitor.Exit(recognitionLockObject);
                                    RunRecognizer(soundToRecognize);
                                }
                            }
                        }
                    }
                }
            }
        }
Exemple #13
0
        // Note: it is assumed that both channels (left and right) are equal.
        public WAVSound ChangePitch(WAVSound sound, List <double> pitchMarkTimeList, double relativeStartPitch, double relativeEndPitch)
        {
            // First find the pitch mark indices in the original sound:
            List <int> originalPitchMarkIndexList = new List <int>();

            foreach (double pitchMarkTime in pitchMarkTimeList)
            {
                int originalPitchMarkIndex = sound.GetSampleIndexAtTime(pitchMarkTime);
                originalPitchMarkIndexList.Add(originalPitchMarkIndex);
            }

            // Next, compute the index spacings of the pitch marks in the modified sound:
            double     originalSoundDuration             = sound.GetDuration();
            List <int> modifiedPitchMarkIndexSpacingList = new List <int>();

            modifiedPitchMarkTimeList = new List <double>();
            double firstModifiedPitchMarkTime = pitchMarkTimeList[0]; // First pitch mark unchanged

            modifiedPitchMarkTimeList.Add(firstModifiedPitchMarkTime);
            for (int ii = 1; ii < originalPitchMarkIndexList.Count; ii++)
            {
                int    originalPitchMarkSpacing      = originalPitchMarkIndexList[ii] - originalPitchMarkIndexList[ii - 1];
                double relativePitch                 = relativeStartPitch + (pitchMarkTimeList[ii] / originalSoundDuration) * (relativeEndPitch - relativeStartPitch);
                int    modifiedPitchMarkIndexSpacing = (int)Math.Round(originalPitchMarkSpacing / relativePitch);
                modifiedPitchMarkIndexSpacingList.Add(modifiedPitchMarkIndexSpacing);
                double modifiedPitchMarkTime = modifiedPitchMarkTimeList.Last() + (double)modifiedPitchMarkIndexSpacing / (double)sound.SampleRate;
                modifiedPitchMarkTimeList.Add(modifiedPitchMarkTime);
            }

            // Now build the sound, keeping the original sound data over a fraction (topFraction) of the pitch periods
            // and interpolating between pitch periods:
            List <short> newSamples = new List <short>();

            //  Special treatment of the first pitch period:
            int firstPitchMarkIndex = originalPitchMarkIndexList[0];                       // Position of the first pitch mark in the original sound
            int firstModifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[0]; // Spacing between the first and second pitch mark in the modified sound
            int firstTopEndIndex = firstPitchMarkIndex + (int)Math.Round(topFraction * firstModifiedPitchMarkIndexSpacing);

            for (int ii = 0; ii < firstTopEndIndex; ii++)
            {
                newSamples.Add(sound.Samples[0][ii]);
            }

            for (int iPitchMark = 1; iPitchMark < originalPitchMarkIndexList.Count; iPitchMark++)
            {
                // First add samples for the transition from the previous pitch period to the current one:
                int modifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[iPitchMark - 1]; // -1 since there are n-1 spacings for n pitch marks
                int transitionIndexDuration       = (int)Math.Round((1 - 2 * topFraction) * modifiedPitchMarkIndexSpacing);
                int previousPitchMarkIndex        = originalPitchMarkIndexList[iPitchMark - 1];
                int previousTopEndIndex           = previousPitchMarkIndex + (int)Math.Round(topFraction * modifiedPitchMarkIndexSpacing);
                int startIndexPreviousPitchPeriod = previousTopEndIndex;
                int currentPitchMarkIndex         = originalPitchMarkIndexList[iPitchMark];
                int currentTopStartIndex          = currentPitchMarkIndex - (int)Math.Round(topFraction * modifiedPitchMarkIndexSpacing);
                for (int ii = 0; ii < transitionIndexDuration; ii++)
                {
                    double alpha = (double)ii / (double)(transitionIndexDuration - 1);
                    int    previousPitchPeriodSampleIndex = previousTopEndIndex + ii;
                    int    currentPitchPeriodSampleIndex  = currentTopStartIndex - transitionIndexDuration + ii;
                    short  newSample = (short)Math.Round(((1 - alpha) * sound.Samples[0][previousPitchPeriodSampleIndex] +
                                                          alpha * sound.Samples[0][currentPitchPeriodSampleIndex]));
                    newSamples.Add(newSample);
                }
                // Next, add samples around the top of the current pitch period:
                if (iPitchMark < (originalPitchMarkIndexList.Count - 1))
                {
                    int nextModifiedPitchMarkIndexSpacing = modifiedPitchMarkIndexSpacingList[iPitchMark];
                    int currentTopEndIndex = currentPitchMarkIndex + (int)Math.Round(topFraction * nextModifiedPitchMarkIndexSpacing);
                    for (int ii = currentTopStartIndex; ii < currentTopEndIndex; ii++)
                    {
                        newSamples.Add(sound.Samples[0][ii]);
                    }
                }
                else // Special treatment of the final pitch period:
                {
                    int endIndex = sound.Samples[0].Count;
                    for (int ii = currentTopStartIndex; ii < endIndex; ii++)
                    {
                        newSamples.Add(sound.Samples[0][ii]);
                    }
                }
            }

            // Finally, build the sound from the new samples:
            WAVSound newSound = new WAVSound(sound.Name, sound.SampleRate, sound.NumberOfChannels, sound.BitsPerSample);

            newSound.GenerateFromSamples(new List <List <short> >()
            {
                newSamples, newSamples
            });

            return(newSound);
        }