예제 #1
0
        // NewDetection

        // Detects the maximum size of noise level in a silent sample file
        public static long GetSilenceAmplitude(Stream assetStream, AudioLibPCMFormat audioPCMFormat)
        {
            CancelOperation = false;
            //m_AudioAsset = RefAsset.AudioMediaData;
            BinaryReader brRef = new BinaryReader(assetStream);

            // creates counter of size equal to clip size
            //long lSize = RefAsset.AudioMediaData.PCMFormat.Data.ConvertTimeToBytes(RefAsset.AudioMediaData.AudioDuration.AsLocalUnits);
            long lSize = audioPCMFormat.AdjustByteToBlockAlignFrameSize(assetStream.Length);

            // Block size of audio chunck which is least count of detection
            int Block;

            // determine the Block  size
            if (audioPCMFormat.SampleRate > 22500)
            {
                Block = 192;
            }
            else
            {
                Block = 96;
            }

            //set reading position after the header

            long lLargest = 0;
            long lBlockSum;

            // adjust the  lSize to avoid reading beyond file length
            //lSize = ((lSize / Block) * Block) - 4;

            // Experiment starts here
            double BlockTime     = 25;
            double assetTimeInMS = audioPCMFormat.ConvertBytesToTime(assetStream.Length) / AudioLibPCMFormat.TIME_UNIT;
            //Console.WriteLine("assetTimeInMS " + assetTimeInMS);
            long Iterations  = Convert.ToInt64(assetTimeInMS / BlockTime);
            long SampleCount = Convert.ToInt64((int)audioPCMFormat.SampleRate / (1000 / BlockTime));

            long lCurrentSum = 0;
            long lSumPrev    = 0;


            for (long j = 0; j < Iterations - 1; j++)
            {
                //  BlockSum is function to retrieve average amplitude in  Block
                //lCurrentSum  = GetAverageSampleValue(brRef, SampleCount)  ;
                lCurrentSum = GetAvragePeakValue(assetStream, SampleCount, audioPCMFormat);
                lBlockSum   = Convert.ToInt64((lCurrentSum + lSumPrev) / 2);
                lSumPrev    = lCurrentSum;

                if (lLargest < lBlockSum)
                {
                    lLargest = lBlockSum;
                }
                if (CancelOperation)
                {
                    break;
                }
            }
            long SilVal = Convert.ToInt64(lLargest);

            brRef.Close();

            return(SilVal);
        }
예제 #2
0
        public static long RemoveSilenceFromEnd(Stream assetStream, AudioLibPCMFormat audioPCMFormat, long threshold, double GapLength, double before)
        {
            GapLength = audioPCMFormat.ConvertTimeToBytes((long)GapLength);
            before    = audioPCMFormat.ConvertTimeToBytes((long)before);

            CancelOperation = false;
            double assetTimeInMS = audioPCMFormat.ConvertBytesToTime(assetStream.Length) / AudioLibPCMFormat.TIME_UNIT;

            GapLength = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)GapLength);
            before    = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)before);

            long   lSum = 0;
            double detectedSilenceTime = 0;

            // flags to indicate phrases and silence
            bool boolPhraseDetected = false;


            double BlockTime = 25; // milliseconds

            long   Iterations  = Convert.ToInt64(assetTimeInMS / BlockTime);
            long   SampleCount = Convert.ToInt64(audioPCMFormat.SampleRate / (1000 / BlockTime));
            double errorCompensatingCoefficient = GetErrorCompensatingConstant(SampleCount, audioPCMFormat);

            long lCurrentSum       = 0;
            long lSumPrev          = 0;
            bool IsSilenceDetected = false;
            long phraseMarkTimeForDeletingSilence = 0;

            BinaryReader br = new BinaryReader(assetStream);

            for (long j = 0; j < Iterations - 1; j++)
            {
                if (CancelOperation)
                {
                    return(0);
                }
                // decodes audio chunck inside block
                lCurrentSum = GetAvragePeakValue(assetStream, SampleCount, audioPCMFormat);
                lSum        = (lCurrentSum + lSumPrev) / 2;
                lSumPrev    = lCurrentSum;

                if (lSum < threshold)
                {
                    if (!IsSilenceDetected)
                    {
                        phraseMarkTimeForDeletingSilence = audioPCMFormat.ConvertBytesToTime(Convert.ToInt64(errorCompensatingCoefficient * (j)) * SampleCount * audioPCMFormat.BlockAlign) / AudioLibPCMFormat.TIME_UNIT;
                        IsSilenceDetected = true;
                    }
                }
                else
                {
                    IsSilenceDetected = false;
                    phraseMarkTimeForDeletingSilence = 0;
                }
            }
            br.Close();

            if (phraseMarkTimeForDeletingSilence != 0)
            {
                boolPhraseDetected  = true;
                detectedSilenceTime = phraseMarkTimeForDeletingSilence + before;
            }
            long detectedPhraseTimingsInTimeUnits = 0;

            if (boolPhraseDetected == false)
            {
                return(0);
            }
            else
            {
                detectedPhraseTimingsInTimeUnits = Convert.ToInt64(detectedSilenceTime * AudioLibPCMFormat.TIME_UNIT);
            }

            return(detectedPhraseTimingsInTimeUnits);
        }
예제 #3
0
        /// <summary>
        /// Detects phrases of the asset for which stream is provided and returns timing list of detected phrases in local units
        /// </summary>
        /// <param name="assetStream"></param>
        /// <param name="audioPCMFormat"></param>
        /// <param name="threshold"></param>
        /// <param name="GapLength"></param>
        /// <param name="before"></param>
        /// <returns></returns>
        private static List <long> ApplyPhraseDetection(Stream assetStream, AudioLibPCMFormat audioPCMFormat, long threshold, double GapLength, double before)
        {
            CancelOperation = false;
            //m_AudioAsset = ManagedAsset.AudioMediaData;
            double assetTimeInMS = audioPCMFormat.ConvertBytesToTime(assetStream.Length) / AudioLibPCMFormat.TIME_UNIT;

            GapLength = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)GapLength);
            before    = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)before);

            int Block = 0;

            // determine the Block  size
            if (audioPCMFormat.SampleRate > 22500)
            {
                Block = 192;
            }
            else
            {
                Block = 96;
            }


            // count chunck of silence which trigger phrase detection
            long          lCountSilGap             = (long)(2 * GapLength) / Block; // multiplied by two because j counter is incremented by 2
            long          lSum                     = 0;
            List <double> detectedPhraseTimingList = new List <double>();
            long          lCheck                   = 0;

            // flags to indicate phrases and silence
            bool boolPhraseDetected      = false;
            bool boolBeginPhraseDetected = false;


            double BlockTime        = 25; // milliseconds
            double BeforePhraseInMS = audioPCMFormat.ConvertBytesToTime((long)before) / AudioLibPCMFormat.TIME_UNIT;

            //Console.WriteLine ("before , silgap " + BeforePhraseInMS+" , " + GapLength  ) ;
            lCountSilGap = Convert.ToInt64((audioPCMFormat.ConvertBytesToTime((long)GapLength) / AudioLibPCMFormat.TIME_UNIT) / BlockTime);

            long   Iterations  = Convert.ToInt64(assetTimeInMS / BlockTime);
            long   SampleCount = Convert.ToInt64(audioPCMFormat.SampleRate / (1000 / BlockTime));
            double errorCompensatingCoefficient = GetErrorCompensatingConstant(SampleCount, audioPCMFormat);
            long   SpeechBlockCount             = 0;

            long lCurrentSum = 0;
            long lSumPrev    = 0;

            BinaryReader br = new BinaryReader(assetStream);

            bool PhraseNominated = false;
            long SpeechChunkSize = 5;
            long Counter         = 0;

            for (long j = 0; j < Iterations - 1; j++)
            {
                if (CancelOperation)
                {
                    return(null);
                }
                // decodes audio chunck inside block
                //lCurrentSum = GetAverageSampleValue(br, SampleCount);
                lCurrentSum = GetAvragePeakValue(assetStream, SampleCount, audioPCMFormat);
                lSum        = (lCurrentSum + lSumPrev) / 2;
                lSumPrev    = lCurrentSum;

                // conditional triggering of phrase detection
                if (lSum < threshold)
                {
                    lCheck++;

                    SpeechBlockCount = 0;
                }
                else
                {
                    if (j < lCountSilGap && boolBeginPhraseDetected == false)
                    {
                        boolBeginPhraseDetected = true;
                        detectedPhraseTimingList.Add(Convert.ToInt64(0));
                        boolPhraseDetected = true;
                        lCheck             = 0;
                    }


                    // checks the length of silence
                    if (lCheck > lCountSilGap)
                    {
                        PhraseNominated = true;
                        lCheck          = 0;
                    }
                    if (PhraseNominated)
                    {
                        SpeechBlockCount++;
                    }

                    if (SpeechBlockCount >= SpeechChunkSize && Counter >= 4)
                    {
                        //sets the detection flag
                        boolPhraseDetected = true;

                        // changing following time calculations to reduce concatination of rounding off errors
                        //alPhrases.Add(((j - Counter) * BlockTime) - BeforePhraseInMS);
                        //double phraseMarkTime = ObiCalculationFunctions.ConvertByteToTime (Convert.ToInt64(errorCompensatingCoefficient  * (j - Counter)) * SampleCount * m_AudioAsset.PCMFormat.Data.BlockAlign,
                        //(int) m_AudioAsset.PCMFormat.Data.SampleRate,
                        //(int) m_AudioAsset.PCMFormat.Data.BlockAlign);
                        long phraseMarkTime = audioPCMFormat.ConvertBytesToTime(Convert.ToInt64(errorCompensatingCoefficient * (j - Counter)) * SampleCount * audioPCMFormat.BlockAlign) / AudioLibPCMFormat.TIME_UNIT;
                        //Console.WriteLine("mark time :" + phraseMarkTime);
                        detectedPhraseTimingList.Add(phraseMarkTime - BeforePhraseInMS);

                        SpeechBlockCount = 0;
                        Counter          = 0;
                        PhraseNominated  = false;
                    }
                    lCheck = 0;
                }
                if (PhraseNominated)
                {
                    Counter++;
                }
                // end outer For
            }
            br.Close();

            List <long> detectedPhraseTimingsInTimeUnits = new List <long>();

            if (boolPhraseDetected == false)
            {
                return(null);
            }
            else
            {
                for (int i = 0; i < detectedPhraseTimingList.Count; i++)
                {
                    detectedPhraseTimingsInTimeUnits.Add(Convert.ToInt64(detectedPhraseTimingList[i] * AudioLibPCMFormat.TIME_UNIT));
                }
            }

            return(detectedPhraseTimingsInTimeUnits);
        }
예제 #4
0
        public void PlayBytes(StreamProviderDelegate currentAudioStreamProvider,
                              long dataLength, AudioLibPCMFormat pcmInfo,
                              long bytesFrom, long bytesTo)
        {
            if (pcmInfo == null)
            {
                throw new ArgumentNullException("PCM format cannot be null !");
            }

            if (currentAudioStreamProvider == null)
            {
                throw new ArgumentNullException("Stream cannot be null !");
            }
            if (dataLength <= 0)
            {
                throw new ArgumentOutOfRangeException("Duration cannot be <= 0 !");
            }

            if (CurrentState == State.NotReady)
            {
                return;
            }

            if (CurrentState != State.Stopped)
            {
                Debug.Fail("Attempting to play when not stopped ? " + CurrentState);
                return;
            }


#if USE_SOUNDTOUCH
            if (false && pcmInfo.NumberOfChannels > 1)
            {
                m_UseSoundTouch = false; //TODO: stereo all scrambled with SoundTouch !!
            }
            else
            {
                m_UseSoundTouch = m_UseSoundTouchBackup;
            }
#endif // USE_SOUNDTOUCH

            m_CurrentAudioStreamProvider = currentAudioStreamProvider;
            m_CurrentAudioStream         = m_CurrentAudioStreamProvider();
            m_CurrentAudioPCMFormat      = pcmInfo;
            m_CurrentAudioDataLength     = dataLength;



            long startPosition = 0;
            if (bytesFrom > 0)
            {
                startPosition = m_CurrentAudioPCMFormat.AdjustByteToBlockAlignFrameSize(bytesFrom);
            }

            long endPosition = 0;
            if (bytesTo > 0)
            {
                endPosition = m_CurrentAudioPCMFormat.AdjustByteToBlockAlignFrameSize(bytesTo);
            }

            if (m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(startPosition, 0))
            {
                startPosition = 0;
            }

            if (m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(endPosition, dataLength))
            {
                endPosition = dataLength;
            }

            if (m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(endPosition, 0))
            {
                endPosition = 0;
            }

            if (endPosition != 0 &&
                m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(endPosition, startPosition))
            {
                return;
            }

            if (startPosition >= 0 &&
                (endPosition == 0 || startPosition < endPosition) &&
                endPosition <= dataLength)
            {
                if (m_FwdRwdRate == 0)
                {
                    startPlayback(startPosition, endPosition);
                    Console.WriteLine("starting playback ");
                }
                else if (m_FwdRwdRate > 0)
                {
                    FastForward(startPosition);
                    Console.WriteLine("fast forward ");
                }
                else if (m_FwdRwdRate < 0)
                {
                    if (startPosition == 0)
                    {
                        startPosition = m_CurrentAudioStream.Length;
                    }
                    Rewind(startPosition);
                    Console.WriteLine("Rewind ");
                }
            }
            else
            {
                //throw new Exception("Start/end positions out of bounds of audio asset.");
                DebugFix.Assert(false);
            }
        }