public static SignalFrame[] DivideSignalToFrames(double[] pSignal, int pSamplingRate, double pSignalLengthInMilliSeconds, double pFrameLengthinMilliSeconds) { int numberOfFrames = (int)Math.Ceiling(pSignalLengthInMilliSeconds / pFrameLengthinMilliSeconds); //START FIX1 int frameSize = (int)(pSamplingRate * pFrameLengthinMilliSeconds / 1000.0); //END FIX1 //Start FIX2 int remainingDataSize = pSignal.Length - frameSize * (numberOfFrames - 1); int compensation = (int)(remainingDataSize / frameSize); numberOfFrames += compensation; remainingDataSize -= compensation * frameSize; //End FIX2 //initialize frames. SignalFrame[] frames = new SignalFrame[numberOfFrames]; for (int i = 0; i < numberOfFrames; i++) { //START: FIX1 frames[i] = new SignalFrame(); //END: FIX1 frames[i].Data = new double[frameSize]; } //copy data from signal to frames. int signalIndex = 0; //START FIX1 for (int i = 0; i < numberOfFrames - 1; i++) { Array.Copy(pSignal, signalIndex, frames[i].Data, 0, frameSize); signalIndex += frameSize; } Array.Copy(pSignal, signalIndex, frames[numberOfFrames - 1].Data, 0, remainingDataSize); //END FIX1 return frames; }
public static SignalFrame[] DivideSignalToFrames(double[] pSignal, int pSamplingRate, double pSignalLengthInMilliSeconds, double pFrameLengthinMilliSeconds) { int numberOfFrames = (int)Math.Ceiling(pSignalLengthInMilliSeconds / pFrameLengthinMilliSeconds); //START FIX1 int frameSize = (int)(pSamplingRate * pFrameLengthinMilliSeconds / 1000.0); //END FIX1 //Start FIX2 int remainingDataSize = pSignal.Length - frameSize * (numberOfFrames - 1); int compensation = (int)(remainingDataSize / frameSize); numberOfFrames += compensation; remainingDataSize -= compensation * frameSize; //End FIX2 //initialize frames. SignalFrame[] frames = new SignalFrame[numberOfFrames]; for (int i = 0; i < numberOfFrames; i++) { //START: FIX1 frames[i] = new SignalFrame(); //END: FIX1 frames[i].Data = new double[frameSize]; } //copy data from signal to frames. int signalIndex = 0; //START FIX1 for (int i = 0; i < numberOfFrames - 1; i++) { Array.Copy(pSignal, signalIndex, frames[i].Data, 0, frameSize); signalIndex += frameSize; } Array.Copy(pSignal, signalIndex, frames[numberOfFrames - 1].Data, 0, remainingDataSize); //END FIX1 return(frames); }
public static SignalFrame[] DivideSignalToFrames(double[] pSignal, int pSamplingRate, double pSignalLengthInMilliSeconds, double pFrameLengthinMilliSeconds) { int numberOfFrames = (int)Math.Floor(pSignalLengthInMilliSeconds / pFrameLengthinMilliSeconds); int frameSize = (int)(pSamplingRate * pFrameLengthinMilliSeconds / 100.0); //initialize frames. SignalFrame[] frames = new SignalFrame[numberOfFrames]; for (int i = 0; i < numberOfFrames; i++) { frames[i].Data = new double[frameSize]; } //copy data from signal to frames. int signalIndex = 0; for (int i = 0; i < numberOfFrames; i++) { pSignal.CopyTo(frames[i].Data, signalIndex); signalIndex += frameSize; } return(frames); }
//Voice Activation Detection (VAD) public static SignalFrame[] RemoveSilentSegments(SignalFrame[] pFrames) { double[] framesWeights = new double[pFrames.Length]; int frameIndex = 0; foreach (SignalFrame frame in pFrames) { double squareMean = 0; double avgZeroCrossing = 0; for (int i = 0; i < frame.Data.Length - 1; i++) { //FIX1 squareMean += frame.Data[i] * frame.Data[i]; // avgZeroCrossing += Math.Abs(Math.Sign(frame.Data[i+1]) - Math.Sign(frame.Data[i])) / 2; avgZeroCrossing += Math.Abs(Math.Abs(frame.Data[i + 1]) - Math.Abs(frame.Data[i])) / 2.0; } squareMean /= frame.Data.Length; avgZeroCrossing /= frame.Data.Length; framesWeights[frameIndex++] = squareMean * (1 - avgZeroCrossing) * 1000; } double avgWeights = mean(framesWeights); double stdWeights = std(framesWeights); double gamma = 0.2 * Math.Pow(stdWeights, -0.8); double activationThreshold = avgWeights + gamma * stdWeights; //threshold weights. threshold(framesWeights, activationThreshold); //smooth weights to remove short silences. smooth(framesWeights); //set anything more than 0 with 1. threshold(framesWeights, 0); int numberOfActiveFrames = (int)framesWeights.Sum(); SignalFrame[] activeFrames = new SignalFrame[numberOfActiveFrames]; int activeFramesIndex = 0; for (int i = 0; i < pFrames.Length; i++) { if (framesWeights[i] == 1) { activeFrames[activeFramesIndex] = new SignalFrame(); activeFrames[activeFramesIndex].Data = new double[pFrames[i].Data.Length]; pFrames[i].Data.CopyTo(activeFrames[activeFramesIndex].Data, 0); activeFramesIndex++; } } return(activeFrames); }
//Voice Activation Detection (VAD) public static SignalFrame[] RemoveSilentSegments(SignalFrame[] pFrames) { double[] framesWeights = new double[pFrames.Length]; int frameIndex = 0; foreach (SignalFrame frame in pFrames) { double squareMean = 0; double avgZeroCrossing = 0; for (int i = 0; i < frame.Data.Length - 1; i++) { //FIX1 squareMean += frame.Data[i] * frame.Data[i]; //avgZeroCrossing += Math.Abs(Math.Sign(frame.Data[i+1]) - Math.Sign(frame.Data[i])) / 2; avgZeroCrossing += Math.Abs(Math.Abs(frame.Data[i + 1]) - Math.Abs(frame.Data[i])) / 2.0; } squareMean /= frame.Data.Length; avgZeroCrossing /= frame.Data.Length; framesWeights[frameIndex++] = squareMean * (1 - avgZeroCrossing) * 1000; } double avgWeights = mean(framesWeights); double stdWeights = std(framesWeights); double gamma = 0.2 * Math.Pow(stdWeights, -0.8); double activationThreshold = avgWeights + gamma * stdWeights; //threshold weights. threshold(framesWeights, activationThreshold); //smooth weights to remove short silences. smooth(framesWeights); //set anything more than 0 with 1. threshold(framesWeights, 0); int numberOfActiveFrames = (int)framesWeights.Sum(); SignalFrame[] activeFrames = new SignalFrame[numberOfActiveFrames]; int activeFramesIndex = 0; for (int i = 0; i < pFrames.Length; i++) { if (framesWeights[i] == 1) { activeFrames[activeFramesIndex] = new SignalFrame(); activeFrames[activeFramesIndex].Data = new double[pFrames[i].Data.Length]; pFrames[i].Data.CopyTo(activeFrames[activeFramesIndex].Data, 0); activeFramesIndex++; } } return activeFrames; }