// NewDetection // Detects the maximum size of noise level in a silent sample file public static long GetSilenceAmplitude(Stream assetStream, AudioLibPCMFormat audioPCMFormat) { CancelOperation = false; //m_AudioAsset = RefAsset.AudioMediaData; BinaryReader brRef = new BinaryReader(assetStream); // creates counter of size equal to clip size //long lSize = RefAsset.AudioMediaData.PCMFormat.Data.ConvertTimeToBytes(RefAsset.AudioMediaData.AudioDuration.AsLocalUnits); long lSize = audioPCMFormat.AdjustByteToBlockAlignFrameSize(assetStream.Length); // Block size of audio chunck which is least count of detection int Block; // determine the Block size if (audioPCMFormat.SampleRate > 22500) { Block = 192; } else { Block = 96; } //set reading position after the header long lLargest = 0; long lBlockSum; // adjust the lSize to avoid reading beyond file length //lSize = ((lSize / Block) * Block) - 4; // Experiment starts here double BlockTime = 25; double assetTimeInMS = audioPCMFormat.ConvertBytesToTime(assetStream.Length) / AudioLibPCMFormat.TIME_UNIT; //Console.WriteLine("assetTimeInMS " + assetTimeInMS); long Iterations = Convert.ToInt64(assetTimeInMS / BlockTime); long SampleCount = Convert.ToInt64((int)audioPCMFormat.SampleRate / (1000 / BlockTime)); long lCurrentSum = 0; long lSumPrev = 0; for (long j = 0; j < Iterations - 1; j++) { // BlockSum is function to retrieve average amplitude in Block //lCurrentSum = GetAverageSampleValue(brRef, SampleCount) ; lCurrentSum = GetAvragePeakValue(assetStream, SampleCount, audioPCMFormat); lBlockSum = Convert.ToInt64((lCurrentSum + lSumPrev) / 2); lSumPrev = lCurrentSum; if (lLargest < lBlockSum) { lLargest = lBlockSum; } if (CancelOperation) { break; } } long SilVal = Convert.ToInt64(lLargest); brRef.Close(); return(SilVal); }
public static long RemoveSilenceFromEnd(Stream assetStream, AudioLibPCMFormat audioPCMFormat, long threshold, double GapLength, double before) { GapLength = audioPCMFormat.ConvertTimeToBytes((long)GapLength); before = audioPCMFormat.ConvertTimeToBytes((long)before); CancelOperation = false; double assetTimeInMS = audioPCMFormat.ConvertBytesToTime(assetStream.Length) / AudioLibPCMFormat.TIME_UNIT; GapLength = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)GapLength); before = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)before); long lSum = 0; double detectedSilenceTime = 0; // flags to indicate phrases and silence bool boolPhraseDetected = false; double BlockTime = 25; // milliseconds long Iterations = Convert.ToInt64(assetTimeInMS / BlockTime); long SampleCount = Convert.ToInt64(audioPCMFormat.SampleRate / (1000 / BlockTime)); double errorCompensatingCoefficient = GetErrorCompensatingConstant(SampleCount, audioPCMFormat); long lCurrentSum = 0; long lSumPrev = 0; bool IsSilenceDetected = false; long phraseMarkTimeForDeletingSilence = 0; BinaryReader br = new BinaryReader(assetStream); for (long j = 0; j < Iterations - 1; j++) { if (CancelOperation) { return(0); } // decodes audio chunck inside block lCurrentSum = GetAvragePeakValue(assetStream, SampleCount, audioPCMFormat); lSum = (lCurrentSum + lSumPrev) / 2; lSumPrev = lCurrentSum; if (lSum < threshold) { if (!IsSilenceDetected) { phraseMarkTimeForDeletingSilence = audioPCMFormat.ConvertBytesToTime(Convert.ToInt64(errorCompensatingCoefficient * (j)) * SampleCount * audioPCMFormat.BlockAlign) / AudioLibPCMFormat.TIME_UNIT; IsSilenceDetected = true; } } else { IsSilenceDetected = false; phraseMarkTimeForDeletingSilence = 0; } } br.Close(); if (phraseMarkTimeForDeletingSilence != 0) { boolPhraseDetected = true; detectedSilenceTime = phraseMarkTimeForDeletingSilence + before; } long detectedPhraseTimingsInTimeUnits = 0; if (boolPhraseDetected == false) { return(0); } else { detectedPhraseTimingsInTimeUnits = Convert.ToInt64(detectedSilenceTime * AudioLibPCMFormat.TIME_UNIT); } return(detectedPhraseTimingsInTimeUnits); }
/// <summary> /// Detects phrases of the asset for which stream is provided and returns timing list of detected phrases in local units /// </summary> /// <param name="assetStream"></param> /// <param name="audioPCMFormat"></param> /// <param name="threshold"></param> /// <param name="GapLength"></param> /// <param name="before"></param> /// <returns></returns> private static List <long> ApplyPhraseDetection(Stream assetStream, AudioLibPCMFormat audioPCMFormat, long threshold, double GapLength, double before) { CancelOperation = false; //m_AudioAsset = ManagedAsset.AudioMediaData; double assetTimeInMS = audioPCMFormat.ConvertBytesToTime(assetStream.Length) / AudioLibPCMFormat.TIME_UNIT; GapLength = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)GapLength); before = audioPCMFormat.AdjustByteToBlockAlignFrameSize((long)before); int Block = 0; // determine the Block size if (audioPCMFormat.SampleRate > 22500) { Block = 192; } else { Block = 96; } // count chunck of silence which trigger phrase detection long lCountSilGap = (long)(2 * GapLength) / Block; // multiplied by two because j counter is incremented by 2 long lSum = 0; List <double> detectedPhraseTimingList = new List <double>(); long lCheck = 0; // flags to indicate phrases and silence bool boolPhraseDetected = false; bool boolBeginPhraseDetected = false; double BlockTime = 25; // milliseconds double BeforePhraseInMS = audioPCMFormat.ConvertBytesToTime((long)before) / AudioLibPCMFormat.TIME_UNIT; //Console.WriteLine ("before , silgap " + BeforePhraseInMS+" , " + GapLength ) ; lCountSilGap = Convert.ToInt64((audioPCMFormat.ConvertBytesToTime((long)GapLength) / AudioLibPCMFormat.TIME_UNIT) / BlockTime); long Iterations = Convert.ToInt64(assetTimeInMS / BlockTime); long SampleCount = Convert.ToInt64(audioPCMFormat.SampleRate / (1000 / BlockTime)); double errorCompensatingCoefficient = GetErrorCompensatingConstant(SampleCount, audioPCMFormat); long SpeechBlockCount = 0; long lCurrentSum = 0; long lSumPrev = 0; BinaryReader br = new BinaryReader(assetStream); bool PhraseNominated = false; long SpeechChunkSize = 5; long Counter = 0; for (long j = 0; j < Iterations - 1; j++) { if (CancelOperation) { return(null); } // decodes audio chunck inside block //lCurrentSum = GetAverageSampleValue(br, SampleCount); lCurrentSum = GetAvragePeakValue(assetStream, SampleCount, audioPCMFormat); lSum = (lCurrentSum + lSumPrev) / 2; lSumPrev = lCurrentSum; // conditional triggering of phrase detection if (lSum < threshold) { lCheck++; SpeechBlockCount = 0; } else { if (j < lCountSilGap && boolBeginPhraseDetected == false) { boolBeginPhraseDetected = true; detectedPhraseTimingList.Add(Convert.ToInt64(0)); boolPhraseDetected = true; lCheck = 0; } // checks the length of silence if (lCheck > lCountSilGap) { PhraseNominated = true; lCheck = 0; } if (PhraseNominated) { SpeechBlockCount++; } if (SpeechBlockCount >= SpeechChunkSize && Counter >= 4) { //sets the detection flag boolPhraseDetected = true; // changing following time calculations to reduce concatination of rounding off errors //alPhrases.Add(((j - Counter) * BlockTime) - BeforePhraseInMS); //double phraseMarkTime = ObiCalculationFunctions.ConvertByteToTime (Convert.ToInt64(errorCompensatingCoefficient * (j - Counter)) * SampleCount * m_AudioAsset.PCMFormat.Data.BlockAlign, //(int) m_AudioAsset.PCMFormat.Data.SampleRate, //(int) m_AudioAsset.PCMFormat.Data.BlockAlign); long phraseMarkTime = audioPCMFormat.ConvertBytesToTime(Convert.ToInt64(errorCompensatingCoefficient * (j - Counter)) * SampleCount * audioPCMFormat.BlockAlign) / AudioLibPCMFormat.TIME_UNIT; //Console.WriteLine("mark time :" + phraseMarkTime); detectedPhraseTimingList.Add(phraseMarkTime - BeforePhraseInMS); SpeechBlockCount = 0; Counter = 0; PhraseNominated = false; } lCheck = 0; } if (PhraseNominated) { Counter++; } // end outer For } br.Close(); List <long> detectedPhraseTimingsInTimeUnits = new List <long>(); if (boolPhraseDetected == false) { return(null); } else { for (int i = 0; i < detectedPhraseTimingList.Count; i++) { detectedPhraseTimingsInTimeUnits.Add(Convert.ToInt64(detectedPhraseTimingList[i] * AudioLibPCMFormat.TIME_UNIT)); } } return(detectedPhraseTimingsInTimeUnits); }
public void PlayBytes(StreamProviderDelegate currentAudioStreamProvider, long dataLength, AudioLibPCMFormat pcmInfo, long bytesFrom, long bytesTo) { if (pcmInfo == null) { throw new ArgumentNullException("PCM format cannot be null !"); } if (currentAudioStreamProvider == null) { throw new ArgumentNullException("Stream cannot be null !"); } if (dataLength <= 0) { throw new ArgumentOutOfRangeException("Duration cannot be <= 0 !"); } if (CurrentState == State.NotReady) { return; } if (CurrentState != State.Stopped) { Debug.Fail("Attempting to play when not stopped ? " + CurrentState); return; } #if USE_SOUNDTOUCH if (false && pcmInfo.NumberOfChannels > 1) { m_UseSoundTouch = false; //TODO: stereo all scrambled with SoundTouch !! } else { m_UseSoundTouch = m_UseSoundTouchBackup; } #endif // USE_SOUNDTOUCH m_CurrentAudioStreamProvider = currentAudioStreamProvider; m_CurrentAudioStream = m_CurrentAudioStreamProvider(); m_CurrentAudioPCMFormat = pcmInfo; m_CurrentAudioDataLength = dataLength; long startPosition = 0; if (bytesFrom > 0) { startPosition = m_CurrentAudioPCMFormat.AdjustByteToBlockAlignFrameSize(bytesFrom); } long endPosition = 0; if (bytesTo > 0) { endPosition = m_CurrentAudioPCMFormat.AdjustByteToBlockAlignFrameSize(bytesTo); } if (m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(startPosition, 0)) { startPosition = 0; } if (m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(endPosition, dataLength)) { endPosition = dataLength; } if (m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(endPosition, 0)) { endPosition = 0; } if (endPosition != 0 && m_CurrentAudioPCMFormat.BytesAreEqualWithMillisecondsTolerance(endPosition, startPosition)) { return; } if (startPosition >= 0 && (endPosition == 0 || startPosition < endPosition) && endPosition <= dataLength) { if (m_FwdRwdRate == 0) { startPlayback(startPosition, endPosition); Console.WriteLine("starting playback "); } else if (m_FwdRwdRate > 0) { FastForward(startPosition); Console.WriteLine("fast forward "); } else if (m_FwdRwdRate < 0) { if (startPosition == 0) { startPosition = m_CurrentAudioStream.Length; } Rewind(startPosition); Console.WriteLine("Rewind "); } } else { //throw new Exception("Start/end positions out of bounds of audio asset."); DebugFix.Assert(false); } }