/// <summary> /// Finds the non-speech regions in audio data based on zero-crossing rate and energy descriptors /// </summary> /// <param name="waveform">Wavefrom to get spaces from.</param> /// <returns>spaces</returns> public static List<Space> FindSpaces(Waveform waveform) { var spaces = new List<Space>(); float waveDurationSeconds = (waveform.Header.DataSize) / ((float)waveform.Header.ByteRate); int ratio = waveform.Header.Channels == 2 ? 40 : 80; double samplesPerSecond = waveform.Header.SampleRate * (waveform.Header.BlockAlign / (double)ratio); //keeps track of sounds descriptors for each window var zcrHistogram = new List<float>(); var energyHistogram = new List<float>(); const int windowSize = 1; // 1 SECOND //do through each window and calculate the audio descriptors for (int window = 0; window < waveDurationSeconds; window++) { var start = (int)Math.Round(window * (windowSize * samplesPerSecond)); if (start + samplesPerSecond < waveform.Data.Count) { List<short> bin = waveform.Data.GetRange(start, (int)samplesPerSecond); //section of the audio waveform.Data zcrHistogram.Add(Zcr(bin)); energyHistogram.Add(Energy(bin)); } } const double zcrFactor = 0.80; const double energyFactor = 0.4; double zcrAvg = zcrFactor * zcrHistogram.Average(); double energyAvg = energyFactor * energyHistogram.Average(); //decide if a window should be marked as speech or not for (int i = 0; i < zcrHistogram.Count; i++) { if ((zcrHistogram[i] == 0) || (energyHistogram[i] == 0) || (zcrHistogram[i] > zcrAvg && energyHistogram[i] < energyAvg)) { //no speech //check consecutive windows to find the end of this space for (int j = i + 1; j < zcrHistogram.Count; j++) { if (!((zcrHistogram[j] == 0) || (energyHistogram[j] == 0) || (zcrHistogram[j] > zcrAvg && energyHistogram[j] < energyAvg))) { spaces.Add(new Space(i * 1000, j * 1000)); i = j; break; } } } } return spaces; }
/// <summary> /// This function is to close the video control, it is called by the main control /// </summary> private void CloseMediaControlViewModel() { _waveform = null; _mediaVideo.Path = null; _mediaVideo.Stop(); _mediaVideo.Close(); _mediaVideo.CurrentState = LiveDescribeVideoStates.VideoNotLoaded; }
private void StripAudioAnContinueLoadingProject(Project project) { var worker = new BackgroundWorker { WorkerReportsProgress = true, }; Waveform waveform = null; //Strip the audio from the given project video worker.DoWork += (sender, args) => { Log.Info("Beginning to strip audio"); var audioOperator = new AudioUtility(project); audioOperator.StripAudio(worker); var waveFormData = audioOperator.ReadWavData(worker); var audioHeader = audioOperator.Header; waveform = new Waveform(audioHeader, waveFormData); Log.Info("Audio stripped"); }; worker.RunWorkerCompleted += (sender, args) => { project.Waveform = waveform; if (Settings.Default.AutoGenerateSpaces) { List<Space> spaceData = AudioAnalyzer.FindSpaces(waveform); OnSpacesAudioAnalysisCompleted(project, spaceData); Log.Info("Spaces found"); } else Log.Info("Spaces not auto-generated"); FileWriter.WriteWaveFormHeader(project, waveform.Header); FileWriter.WriteWaveFormFile(project, waveform.Data); ContinueLoadingProject(project); }; worker.ProgressChanged += (sender, args) => _loadingViewModel.SetProgress("Importing Video", args.ProgressPercentage); _loadingViewModel.SetProgress("Importing Video", 0); _loadingViewModel.Visible = true; worker.RunWorkerAsync(); }