C# (CSharp) SpectrogramSettingsの例

プログラミング言語: C# (CSharp)

hotexamples.comのコード掲載数: 7

C# (CSharp) SpectrogramSettings - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC# (CSharp)のSpectrogramSettingsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: SpectralPeakTracking2018Tests.cs プロジェクト: gitter-badger/audio-analysis

        public void LocalSpectralPeakTest()
        {
            var configPath    = @"SpectralPeakTrackingConfig.yml";
            var recordingPath = @"SM27 22 Sep 2018 3.30 am.wav";
            var imagePath     = @"image_whistle_peaks_1500_3500_100_250_6.bmp";
            //var trackImagePath = @"trackImage.bmp";
            var pathToCsvFile = @"PeakTrackInfo_SM27 22 Sep 2018 3.30 am.csv";

            var configFile = configPath.ToFileInfo();

            if (configFile == null)
            {
                throw new FileNotFoundException("No config file argument provided");
            }
            else if (!configFile.Exists)
            {
                throw new ArgumentException($"Config file {configFile.FullName} not found");
            }

            var configuration = ConfigFile.Deserialize <SpectralPeakTrackingConfig>(configFile);

            var recording = new AudioRecording(recordingPath);

            // get the nyquist value from the recording
            int           nyquist         = new AudioRecording(recordingPath).Nyquist;
            int           frameSize       = configuration.FrameWidth;
            double        frameOverlap    = configuration.FrameOverlap;
            int           finalBinCount   = 512;
            var           hertzPerFreqBin = nyquist / finalBinCount;
            FreqScaleType scaleType       = FreqScaleType.Linear;

            var spectrogramSettings = new SpectrogramSettings()
            {
                WindowSize    = frameSize,
                WindowOverlap = frameOverlap,
                //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
                //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType = NoiseReductionType.None,
            };


            var sonoConfig = new SonogramConfig()
            {
                WindowSize    = frameSize,
                WindowOverlap = frameOverlap,
                //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
                //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType = NoiseReductionType.None,
            };

            var frameStep       = frameSize * (1 - frameOverlap);
            var secondsPerFrame = frameStep / (nyquist * 2);

            //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            var amplitudeSpectrogram = new AmplitudeSpectrogram(spectrogramSettings, recording.WavReader);
            var energySpectrogram    = new EnergySpectrogram(amplitudeSpectrogram);
            var decibelSpectrogram   = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // Noise Reduction
            //var noiseReducedSpectrogram = SNR.NoiseReduce_Standard(energySpectrogram.Data);

            var output = SpectralPeakTracking2018.SpectralPeakTracking(energySpectrogram.Data, configuration.SptSettings, hertzPerFreqBin, secondsPerFrame);

            // draw the local peaks
            double[,] hits = SpectralPeakTracking2018.MakeHitMatrix(energySpectrogram.Data, output.TargetPeakBinsIndex, output.BandIndex);
            var image = SpectralPeakTracking2018.DrawSonogram(decibelSpectrogram, hits);

            image.Save(imagePath);

            string[] header  = new[] { "Frame No", "Start Time", "Bin No", "Freq", "Score", "Detection" };
            var      csv     = new StringBuilder();
            string   content = string.Empty;

            foreach (var entry in header.ToArray())
            {
                content += entry.ToString() + ",";
            }

            csv.AppendLine(content);

            foreach (var entry in output.peakTrackInfoList)
            {
                content = string.Empty;
                foreach (var value in entry)
                {
                    content += value.ToString() + ",";
                }

                csv.AppendLine(content);
            }

            File.WriteAllText(pathToCsvFile, csv.ToString());

            //Csv.WriteMatrixToCsv(pathToCsvFile.ToFileInfo(), output.peakTrackInfoList);

            // draw spectral tracks
            //var trackImage = SpectralPeakTracking2018.DrawTracks(decibelSpectrogram, hits, output.SpecTracks);
            //trackImage.Save(trackImagePath, ImageFormat.Bmp);
        }

コード例 #2

ファイルを表示

        public static void GenerateSpectrograms()
        {
            var recordingDir = @"M:\Liz\SupervisedPatchSamplingSet\Recordings\";
            var resultDir    = @"M:\Liz\SupervisedPatchSamplingSet\";

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(recordingDir, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            int           frameSize     = 1024;
            int           finalBinCount = 256;
            FreqScaleType scaleType     = FreqScaleType.Mel;
            var           settings      = new SpectrogramSettings()
            {
                WindowSize = frameSize,

                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
                // The "WindowOverlap" is calculated to answer this question
                // each 24 single-frames duration is equal to 1 second
                // note that the "WindowOverlap" value should be recalculated if frame size is changed
                // this has not yet been considered in the Config file!
                WindowOverlap           = 0.10725204,
                DoMelScale              = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount             = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            foreach (string filePath in Directory.GetFiles(recordingDir, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    settings.SourceFileName = recording.BaseName;

                    var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recording.WavReader);

                    var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);

                    // DO NOISE REDUCTION
                    decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);

                    // draw the spectrogram
                    var attributes = new SpectrogramAttributes()
                    {
                        NyquistFrequency = decibelSpectrogram.Attributes.NyquistFrequency,
                        Duration         = decibelSpectrogram.Attributes.Duration,
                    };

                    Image  image = DecibelSpectrogram.DrawSpectrogramAnnotated(decibelSpectrogram.Data, settings, attributes);
                    string pathToSpectrogramFiles = Path.Combine(resultDir, "Spectrograms", settings.SourceFileName + ".bmp");
                    image.Save(pathToSpectrogramFiles);

                    // write the matrix to a csv file
                    string pathToMatrixFiles = Path.Combine(resultDir, "Matrices", settings.SourceFileName + ".csv");
                    Csv.WriteMatrixToCsv(pathToMatrixFiles.ToFileInfo(), decibelSpectrogram.Data);
                }
            }
        }

コード例 #3

ファイルを表示

ファイル: FeatureExtraction.cs プロジェクト: ninascarpelli/audio-analysis

        /// <summary>
        /// Apply feature learning process on a set of target (1-minute) recordings (inputPath)
        /// according to the a set of centroids learned using feature learning process.
        /// Output feature vectors (outputPath).
        /// </summary>
        public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config, List <double[][]> allCentroids,
                                                         string inputPath, string outputPath)
        {
            var           simVecDir     = Directory.CreateDirectory(Path.Combine(outputPath, "SimilarityVectors"));
            int           frameSize     = config.FrameSize;
            int           finalBinCount = config.FinalBinCount;
            FreqScaleType scaleType     = config.FrequencyScaleType;
            var           settings      = new SpectrogramSettings()
            {
                WindowSize = frameSize,

                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
                // The "WindowOverlap" is calculated to answer this question
                // each 24 single-frames duration is equal to 1 second
                // note that the "WindowOverlap" value should be recalculated if frame size is changed
                // this has not yet been considered in the Config file!
                WindowOverlap           = 0.10725204,
                DoMelScale              = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount             = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
            double frameStep   = frameSize * (1 - settings.WindowOverlap);
            int    minFreqBin  = config.MinFreqBin;
            int    maxFreqBin  = config.MaxFreqBin;
            int    numFreqBand = config.NumFreqBand;
            int    patchWidth  =
                (maxFreqBin - minFreqBin + 1) / numFreqBand;
            int patchHeight = config.PatchHeight;

            // the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
            int frameWindowLength = config.FrameWindowLength;

            // the step size to make a window of frames
            int stepSize = config.StepSize;

            // the factor of downsampling
            int maxPoolingFactor = config.MaxPoolingFactor;

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            //*****
            // lists of features for all processing files
            // the key is the file name, and the value is the features for different bands
            Dictionary <string, List <double[, ]> > allFilesMinFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesMeanFeatureVectors     = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesMaxFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesStdFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesSkewnessFeatureVectors = new Dictionary <string, List <double[, ]> >();

            double[,] inputMatrix;
            List <AudioRecording> recordings = new List <AudioRecording>();

            foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    settings.SourceFileName = recording.BaseName;

                    if (config.DoSegmentation)
                    {
                        recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
                    }
                    else
                    {
                        recordings.Add(recording);
                    }

                    for (int s = 0; s < recordings.Count; s++)
                    {
                        string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + "-" + s.ToString() + ".csv");
                        var    amplitudeSpectrogram        = new AmplitudeSpectrogram(settings, recordings[s].WavReader);
                        var    decibelSpectrogram          = new DecibelSpectrogram(amplitudeSpectrogram);

                        // DO RMS NORMALIZATION
                        //sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                        // DO NOISE REDUCTION
                        if (config.DoNoiseReduction)
                        {
                            decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
                        }

                        // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
                        if (minFreqBin != 1 || maxFreqBin != finalBinCount)
                        {
                            inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
                        }
                        else
                        {
                            inputMatrix = decibelSpectrogram.Data;
                        }

                        // creating matrices from different freq bands of the source spectrogram
                        List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
                        double[][,] matrices2 = allSubmatrices2.ToArray();
                        List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>();
                        for (int i = 0; i < matrices2.GetLength(0); i++)
                        {
                            // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
                            double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor);

                            int rows              = downsampledMatrix.GetLength(0);
                            int columns           = downsampledMatrix.GetLength(1);
                            var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                            allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
                        }

                        // +++++++++++++++++++++++++++++++++++Feature Transformation
                        // to do the feature transformation, we normalize centroids and
                        // sequential patches from the input spectrogram to unit length
                        // Then, we calculate the dot product of each patch with the centroids' matrix

                        List <double[][]> allNormCentroids = new List <double[][]>();
                        for (int i = 0; i < allCentroids.Count; i++)
                        {
                            // double check the index of the list
                            double[][] normCentroids = new double[allCentroids.ToArray()[i].GetLength(0)][];
                            for (int j = 0; j < allCentroids.ToArray()[i].GetLength(0); j++)
                            {
                                normCentroids[j] = ART_2A.NormaliseVector(allCentroids.ToArray()[i][j]);
                            }

                            allNormCentroids.Add(normCentroids);
                        }

                        List <double[][]> allFeatureTransVectors = new List <double[][]>();

                        // processing the sequential patch matrix for each band
                        for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
                        {
                            List <double[]> featureTransVectors = new List <double[]>();
                            double[][]      similarityVectors   = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][];

                            for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++)
                            {
                                // normalize each patch to unit length
                                var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
                                var normVector  = inputVector;

                                // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero.
                                if (inputVector.Euclidean() != 0)
                                {
                                    normVector = ART_2A.NormaliseVector(inputVector);
                                }

                                similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                            }

                            Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix());

                            // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames
                            // using FrameWindowLength

                            // patchId refers to the patch id that has been processed so far according to the step size.
                            // if we want no overlap between different frame windows, then stepSize = frameWindowLength
                            int patchId = 0;
                            while (patchId + frameWindowLength - 1 < similarityVectors.GetLength(0))
                            {
                                List <double[]> patchGroup = new List <double[]>();
                                for (int k = 0; k < frameWindowLength; k++)
                                {
                                    patchGroup.Add(similarityVectors[k + patchId]);
                                }

                                featureTransVectors.Add(DataTools.ConcatenateVectors(patchGroup));
                                patchId = patchId + stepSize;
                            }

                            allFeatureTransVectors.Add(featureTransVectors.ToArray());
                        }

                        // +++++++++++++++++++++++++++++++++++Feature Transformation

                        // +++++++++++++++++++++++++++++++++++Temporal Summarization
                        // Based on the resolution to generate features, the "numFrames" parameter will be set.
                        // Each 24 single-frame patches form 1 second
                        // for each 24 patch, we generate 5 vectors of min, mean, std, and max (plus skewness from Accord.net)
                        // The pre-assumption is that each input recording is 1 minute long

                        // store features of different bands in lists
                        List <double[, ]> allMinFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allMeanFeatureVectors     = new List <double[, ]>();
                        List <double[, ]> allMaxFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allStdFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allSkewnessFeatureVectors = new List <double[, ]>();

                        // Each 24 frames form 1 second using WindowOverlap
                        // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization.
                        int numFrames = 24 / (patchHeight * stepSize * maxPoolingFactor);

                        foreach (var freqBandFeature in allFeatureTransVectors)
                        {
                            List <double[]> minFeatureVectors      = new List <double[]>();
                            List <double[]> meanFeatureVectors     = new List <double[]>();
                            List <double[]> maxFeatureVectors      = new List <double[]>();
                            List <double[]> stdFeatureVectors      = new List <double[]>();
                            List <double[]> skewnessFeatureVectors = new List <double[]>();

                            int c = 0;
                            while (c + numFrames <= freqBandFeature.GetLength(0))
                            {
                                // First, make a list of patches that would be equal to the needed resolution (1 second, 60 second, etc.)
                                List <double[]> sequencesOfFramesList = new List <double[]>();
                                for (int i = c; i < c + numFrames; i++)
                                {
                                    sequencesOfFramesList.Add(freqBandFeature[i]);
                                }

                                List <double> min      = new List <double>();
                                List <double> mean     = new List <double>();
                                List <double> std      = new List <double>();
                                List <double> max      = new List <double>();
                                List <double> skewness = new List <double>();

                                double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                                // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
                                for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                                {
                                    double[] temp = new double[sequencesOfFrames.GetLength(0)];
                                    for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                                    {
                                        temp[k] = sequencesOfFrames[k, j];
                                    }

                                    min.Add(temp.GetMinValue());
                                    mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                                    std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                                    max.Add(temp.GetMaxValue());
                                    skewness.Add(temp.Skewness());
                                }

                                minFeatureVectors.Add(min.ToArray());
                                meanFeatureVectors.Add(mean.ToArray());
                                maxFeatureVectors.Add(max.ToArray());
                                stdFeatureVectors.Add(std.ToArray());
                                skewnessFeatureVectors.Add(skewness.ToArray());
                                c += numFrames;
                            }

                            // when (freqBandFeature.GetLength(0) % numFrames) != 0, it means there are a number of frames (< numFrames)
                            // (or the whole) at the end of the target recording , left unprocessed.
                            // this would be problematic when an the resolution to generate the feature vector is 1 min,
                            // but the the length of the target recording is a bit less than one min.
                            if (freqBandFeature.GetLength(0) % numFrames != 0 && freqBandFeature.GetLength(0) % numFrames > 1)
                            {
                                // First, make a list of patches that would be less than the required resolution
                                List <double[]> sequencesOfFramesList = new List <double[]>();
                                int             unprocessedFrames     = freqBandFeature.GetLength(0) % numFrames;
                                for (int i = freqBandFeature.GetLength(0) - unprocessedFrames;
                                     i < freqBandFeature.GetLength(0);
                                     i++)
                                {
                                    sequencesOfFramesList.Add(freqBandFeature[i]);
                                }

                                List <double> min      = new List <double>();
                                List <double> mean     = new List <double>();
                                List <double> std      = new List <double>();
                                List <double> max      = new List <double>();
                                List <double> skewness = new List <double>();

                                double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                                // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
                                for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                                {
                                    double[] temp = new double[sequencesOfFrames.GetLength(0)];
                                    for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                                    {
                                        temp[k] = sequencesOfFrames[k, j];
                                    }

                                    min.Add(temp.GetMinValue());
                                    mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                                    std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                                    max.Add(temp.GetMaxValue());
                                    skewness.Add(temp.Skewness());
                                }

                                minFeatureVectors.Add(min.ToArray());
                                meanFeatureVectors.Add(mean.ToArray());
                                maxFeatureVectors.Add(max.ToArray());
                                stdFeatureVectors.Add(std.ToArray());
                                skewnessFeatureVectors.Add(skewness.ToArray());
                            }

                            allMinFeatureVectors.Add(minFeatureVectors.ToArray().ToMatrix());
                            allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix());
                            allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix());
                            allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix());
                            allSkewnessFeatureVectors.Add(skewnessFeatureVectors.ToArray().ToMatrix());
                        }

                        //*****
                        // the keys of the following dictionaries contain file name
                        // and their values are a list<double[,]> which the list.count is
                        // the number of all subsegments for which features are extracted
                        // the number of freq bands defined as an user-defined parameter.
                        // the 2D-array is the feature vectors.
                        allFilesMinFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMinFeatureVectors);
                        allFilesMeanFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMeanFeatureVectors);
                        allFilesMaxFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMaxFeatureVectors);
                        allFilesStdFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allStdFeatureVectors);
                        allFilesSkewnessFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allSkewnessFeatureVectors);

                        // +++++++++++++++++++++++++++++++++++Temporal Summarization
                    }
                }
            }

            // ++++++++++++++++++++++++++++++++++Writing features to one file
            // First, concatenate mean, max, std for each second.
            // Then, write the features of each pre-defined frequency band into a separate CSV file.
            var filesName        = allFilesMeanFeatureVectors.Keys.ToArray();
            var minFeatures      = allFilesMinFeatureVectors.Values.ToArray();
            var meanFeatures     = allFilesMeanFeatureVectors.Values.ToArray();
            var maxFeatures      = allFilesMaxFeatureVectors.Values.ToArray();
            var stdFeatures      = allFilesStdFeatureVectors.Values.ToArray();
            var skewnessFeatures = allFilesSkewnessFeatureVectors.Values.ToArray();

            // The number of elements in the list shows the number of freq bands
            // the size of each element in the list shows the number of files processed to generate feature for.
            // the dimensions of the matrix shows the number of feature vectors generated for each file and the length of feature vector
            var allMins     = new List <double[][, ]>();
            var allMeans    = new List <double[][, ]>();
            var allMaxs     = new List <double[][, ]>();
            var allStds     = new List <double[][, ]>();
            var allSkewness = new List <double[][, ]>();

            // looping over freq bands
            for (int i = 0; i < meanFeatures[0].Count; i++)
            {
                var mins       = new List <double[, ]>();
                var means      = new List <double[, ]>();
                var maxs       = new List <double[, ]>();
                var stds       = new List <double[, ]>();
                var skewnesses = new List <double[, ]>();

                // looping over all files
                for (int k = 0; k < meanFeatures.Length; k++)
                {
                    mins.Add(minFeatures[k].ToArray()[i]);
                    means.Add(meanFeatures[k].ToArray()[i]);
                    maxs.Add(maxFeatures[k].ToArray()[i]);
                    stds.Add(stdFeatures[k].ToArray()[i]);
                    skewnesses.Add(skewnessFeatures[k].ToArray()[i]);
                }

                allMins.Add(mins.ToArray());
                allMeans.Add(means.ToArray());
                allMaxs.Add(maxs.ToArray());
                allStds.Add(stds.ToArray());
                allSkewness.Add(skewnesses.ToArray());
            }

            // each element of meanFeatures array is a list of features for different frequency bands.
            // looping over the number of freq bands
            for (int i = 0; i < allMeans.ToArray().GetLength(0); i++)
            {
                // creating output feature file based on the number of freq bands
                var outputFeatureFile = Path.Combine(outputPath, "FeatureVectors-" + i.ToString() + ".csv");

                // creating the header for CSV file
                List <string> header = new List <string>();
                header.Add("file name");

                for (int j = 0; j < allMins.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("min" + j.ToString());
                }

                for (int j = 0; j < allMeans.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("mean" + j.ToString());
                }

                for (int j = 0; j < allMaxs.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("max" + j.ToString());
                }

                for (int j = 0; j < allStds.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("std" + j.ToString());
                }

                for (int j = 0; j < allSkewness.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("skewness" + j.ToString());
                }

                var    csv     = new StringBuilder();
                string content = string.Empty;
                foreach (var entry in header.ToArray())
                {
                    content += entry.ToString() + ",";
                }

                csv.AppendLine(content);

                var allFilesFeatureVectors = new Dictionary <string, double[, ]>();

                // looping over files
                for (int j = 0; j < allMeans.ToArray()[i].GetLength(0); j++)
                {
                    // concatenating mean, std, and max vector together for the pre-defined resolution
                    List <double[]> featureVectors = new List <double[]>();
                    for (int k = 0; k < allMeans.ToArray()[i][j].ToJagged().GetLength(0); k++)
                    {
                        List <double[]> featureList = new List <double[]>
                        {
                            allMins.ToArray()[i][j].ToJagged()[k],
                                        allMeans.ToArray()[i][j].ToJagged()[k],
                                        allMaxs.ToArray()[i][j].ToJagged()[k],
                                        allStds.ToArray()[i][j].ToJagged()[k],
                                        allSkewness.ToArray()[i][j].ToJagged()[k],
                        };
                        double[] featureVector = DataTools.ConcatenateVectors(featureList);
                        featureVectors.Add(featureVector);
                    }

                    allFilesFeatureVectors.Add(filesName[j], featureVectors.ToArray().ToMatrix());
                }

                // writing feature vectors to CSV file
                foreach (var entry in allFilesFeatureVectors)
                {
                    content  = string.Empty;
                    content += entry.Key.ToString() + ",";
                    foreach (var cent in entry.Value)
                    {
                        content += cent.ToString() + ",";
                    }

                    csv.AppendLine(content);
                }

                File.WriteAllText(outputFeatureFile, csv.ToString());
            }
        }

コード例 #4

ファイルを表示

ファイル: UnsupervisedFeatureLearningTest.cs プロジェクト: gitter-badger/audio-analysis

        public void TestSpectrograms()
        {
            var recordingPath                = PathHelper.ResolveAsset("Recordings", "SM304264_0+1_20160421_004539_47-48min.wav"); //    "SM304264_0+1_20160421_094539_37-38min.wav"
            var resultDir                    = PathHelper.ResolveAssetPath("SpectrogramTestResults");
            var outputAmpSpecImagePath       = Path.Combine(resultDir, "AmplitudeSpectrogram.bmp");
            var outputDecibelSpecImagePath   = Path.Combine(resultDir, "DecibelSpectrogram.bmp");
            var outputEnergySpecImagePath    = Path.Combine(resultDir, "EnergySpectrogram.bmp");
            var outputLogEnergySpecImagePath = Path.Combine(resultDir, "LogEnergySpectrogram.bmp");
            var outputLinScaImagePath        = Path.Combine(resultDir, "LinearScaleSpectrogram.bmp");
            var outputMelScaImagePath        = Path.Combine(resultDir, "MelScaleSpectrogram.bmp");
            var outputNormalizedImagePath    = Path.Combine(resultDir, "NormalizedSpectrogram.bmp");
            var outputNoiseReducedImagePath  = Path.Combine(resultDir, "NoiseReducedSpectrogram.bmp");
            var outputLogPsdImagePath        = Path.Combine(resultDir, "Psd.bmp");

            var recording     = new AudioRecording(recordingPath);
            int nyquist       = recording.Nyquist; // 11025;
            int frameSize     = 1024;
            int finalBinCount = 512;               //256; //128; //  100; // 40; // 200; //
            int hertzInterval = 1000;

            //FreqScaleType scaleType = FreqScaleType.Linear;
            var scaleType = FreqScaleType.Mel;

            //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
            //var fst = freqScale.ScaleType;

            var settings = new SpectrogramSettings()
            {
                WindowSize         = frameSize,
                WindowOverlap      = 0.1028,
                DoMelScale         = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount        = 256, //(scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType = NoiseReductionType.None,
                //NoiseReductionType = NoiseReductionType.Median,
            };

            //settings.NoiseReductionParameter = 0.0; // backgroundNeighbourhood noise reduction in dB

            settings.SourceFileName = recording.BaseName;
            //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            var sonogram = new EnergySpectrogram(settings, recording.WavReader);

            sonogram.Data = MatrixTools.Matrix2LogValues(sonogram.Data);

            var attributes = new SpectrogramAttributes()
            {
                NyquistFrequency = sonogram.Attributes.NyquistFrequency,
                Duration         = sonogram.Attributes.Duration,
            };

            Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes);

            //image.Save(outputLogEnergySpecImagePath, ImageFormat.Bmp);

            //var logSonogramData = MatrixTools.Matrix2LogValues(sonogram.Data);
            //var dbSpectrogram = new DecibelSpectrogram(settings, recording.WavReader);
            //dbSpectrogram.DrawSpectrogram(outputMelScaImagePath);

            //var energySpectro = new EnergySpectrogram(settings, recording.WavReader);

            //var image = SpectrogramTools.GetImage(sonogram.Data, nyquist, settings.DoMelScale);
            //var specImage = SpectrogramTools.GetImageFullyAnnotated(image, "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, settings.Duration);

            //var logSonogramData = MatrixTools.Matrix2LogValues(sonogram.Data);

            //var image = SpectrogramTools.GetImage(logSonogramData, nyquist, settings.DoMelScale);
            //var specImage = SpectrogramTools.GetImageFullyAnnotated(image, "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration);

            //specImage.Save(outputMelScaImagePath);
            //specImage.Save(outputAmpSpecImagePath);

            // DO RMS NORMALIZATION
            //sonogram.Data = SNR.RmsNormalization(sonogram.Data);
            //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data);

            //dbSpectrogram.DrawSpectrogram(outputNormalizedImagePath);
            //var image2 = SpectrogramTools.GetImage(dbSpectrogram.Data, nyquist, settings.DoMelScale);
            //var normImage = SpectrogramTools.GetImageFullyAnnotated(image2, "NORMALIZEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration);
            //normImage.Save(outputNormalizedImagePath);

            // DO NOISE REDUCTION
            sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);
            //dbSpectrogram.DrawSpectrogram(outputNoiseReducedImagePath);
            //var image3 = SpectrogramTools.GetImage(dbSpectrogram.Data, nyquist, settings.DoMelScale);
            //var noiseReducedImage = SpectrogramTools.GetImageFullyAnnotated(image3, "NOISEREDUCEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration);
            //noiseReducedImage.Save(outputNoiseReducedImagePath);
            Image image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes);
            //image2.Save(outputNoiseReducedImagePath, ImageFormat.Bmp);

            //energySpectro.DrawLogPsd(outputLogPsdImagePath);

            /*
             * var fst = FreqScaleType.Linear;
             * var freqScale = new FrequencyScale(fst);
             * var recording = new AudioRecording(recordingPath);
             *
             * var sonoConfig = new SonogramConfig
             * {
             *  WindowSize = freqScale.FinalBinCount * 2,
             *  WindowOverlap = 0.2,
             *  SourceFName = recording.BaseName,
             *  NoiseReductionType = NoiseReductionType.None,
             *  NoiseReductionParameter = 0.0,
             * };
             *
             * // GENERATE AMPLITUDE SPECTROGRAM
             * var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
             * amplitudeSpectrogram.Configuration.WindowSize = freqScale.WindowSize;
             *
             * var image = amplitudeSpectrogram.GetImageFullyAnnotated(amplitudeSpectrogram.GetImage(), "AmplitudeSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * image.Save(outputAmpSpecImagePath);
             *
             * // DO RMS NORMALIZATION
             * amplitudeSpectrogram.Data = SNR.RmsNormalization(amplitudeSpectrogram.Data);
             * var normImage = amplitudeSpectrogram.GetImageFullyAnnotated(amplitudeSpectrogram.GetImage(), "NORMAmplitudeSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * normImage.Save(outputNormAmpImagePath);
             *
             * // CONVERT NORMALIZED AMPLITUDE SPECTROGRAM TO dB SPECTROGRAM
             * var sonogram = new SpectrogramStandard(amplitudeSpectrogram);
             * var standImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "LinearScaleSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * standImage.Save(outputLinScaImagePath);
             *
             * // DO NOISE REDUCTION
             * sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);
             * //SNR.NoiseReduce_Standard(sonogram.Data);
             * var noiseReducedImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "NOISEREDUCEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * noiseReducedImage.Save(outputNoiseReducedImagePath);
             */
        }

コード例 #5

ファイルを表示

ファイル: UnsupervisedFeatureLearningTest.cs プロジェクト: gitter-badger/audio-analysis

        public void PowerSpectrumDensityTest()
        {
            var inputPath                 = @"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\";
            var resultPsdPath             = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD.bmp";
            var resultNoiseReducedPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD_NoiseReduced.bmp";

            //var inputPath =Path.Combine(inputDir, "TrainSet"); // directory of the one-min recordings of one day (21 and 23 Apr - Black Rail Data)

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            // get the nyquist value from the first wav file in the folder of recordings
            int           nq            = new AudioRecording(Directory.GetFiles(inputPath, "*.wav")[0]).Nyquist;
            int           nyquist       = nq;  // 11025;
            int           frameSize     = 1024;
            int           finalBinCount = 512; //256; //
            int           hertzInterval = 1000;
            FreqScaleType scaleType     = FreqScaleType.Linear;
            //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
            //var fst = freqScale.ScaleType;
            //var fst = FreqScaleType.Linear;
            //var freqScale = new FrequencyScale(fst);

            var settings = new SpectrogramSettings()
            {
                WindowSize    = frameSize,
                WindowOverlap = 0.1028,

                //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
                //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,

                //DoMelScale = false,
                MelBinCount = 256,
                DoMelScale  = (scaleType == FreqScaleType.Mel) ? true : false,
                //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,

                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var attributes = new SpectrogramAttributes()
            {
                NyquistFrequency = nyquist,
                Duration         = TimeSpan.FromMinutes(1440),
            };

            List <double[]> psd = new List <double[]>();

            foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);

                    //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
                    //var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
                    // save the matrix
                    // skip normalisation
                    // skip mel
                    settings.SourceFileName = recording.BaseName;

                    var spectrogram = new EnergySpectrogram(settings, recording.WavReader);
                    //var sonogram = new AmplitudeSpectrogram(settings, recording.WavReader);

                    //var energySpectrogram = new EnergySpectrogram(sonoConfig, amplitudeSpectrogram.Data);
                    //var energySpectrogram = new EnergySpectrogram(sonoConfig, recording.WavReader);
                    //var energySpectrogram = new EnergySpectrogram(settings, recording.WavReader);

                    // square the FFT coefficients to get an energy spectrogram
                    // double[,] energySpectrogram = PowerSpectrumDensity.GetEnergyValues(amplitudeSpectrogram.Data);

                    // RMS NORMALIZATION
                    //double[,] normalizedValues = SNR.RmsNormalization(energySpectro.Data);
                    //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data);

                    // Median Noise Reduction
                    //spectrogram.Data = PcaWhitening.NoiseReduction(spectrogram.Data);
                    //spectrogram.Data = SNR.NoiseReduce_Standard(spectrogram.Data);

                    //double[] psd = PowerSpectralDensity.GetPowerSpectrum(noiseReducedValues);
                    //psd.Add(energySpectro.GetLogPsd());
                    psd.Add(MatrixTools.GetColumnAverages(spectrogram.Data));

                    //psd.Add(SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(normalizedValues));
                    //psd.Add(PowerSpectralDensity.GetPowerSpectrum(normalizedValues));
                }
            }

            // writing psd matrix to csv file
            //Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\psd.csv"), psd.ToArray().ToMatrix());
            //Image imagePsd = DecibelSpectrogram.DrawSpectrogramAnnotated(psd.ToArray().ToMatrix(), settings, attributes);
            //imagePsd.Save(resultPsdPath, ImageFormat.Bmp);
            var psdMatrix = psd.ToArray().ToMatrix();

            // calculate the log of matrix
            var logPsd = MatrixTools.Matrix2LogValues(psdMatrix);

            Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd.csv"), logPsd);

            var image = DecibelSpectrogram.DrawSpectrogramAnnotated(logPsd, settings, attributes);

            image.Save(resultPsdPath);

            var noiseReducedLogPsd = PcaWhitening.NoiseReduction(logPsd); //SNR.NoiseReduce_Standard(logPsd); //SNR.NoiseReduce_Mean(logPsd, 0.0);//SNR.NoiseReduce_Median(logPsd, 0.0); //

            Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd_NoiseReduced.csv"), logPsd);

            var image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(noiseReducedLogPsd, settings, attributes);

            image2.Save(resultNoiseReducedPsdPath);

            //ImageTools.DrawMatrix(psd.ToArray().ToMatrix(), resultPath);
            //ImageTools.DrawReversedMatrix(psd.ToArray().ToMatrix(), resultPath);
            //var data = MatrixTools.Matrix2LogValues(psd.ToArray().ToMatrix());
            //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(data);
            //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(logPsd);
        }

コード例 #6

ファイルを表示

ファイル: FeatureLearning.cs プロジェクト: gitter-badger/audio-analysis

        /// <summary>
        /// Apply feature learning process on a set of patch sampling set in an unsupervised manner
        /// Output clusters
        /// </summary>
        public static List <KmeansClustering.Output> UnsupervisedFeatureLearning(FeatureLearningSettings config, string inputPath)
        {
            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            int           frameSize     = config.FrameSize;
            int           finalBinCount = config.FinalBinCount;
            FreqScaleType scaleType     = config.FrequencyScaleType;
            var           settings      = new SpectrogramSettings()
            {
                WindowSize = frameSize,

                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
                // The "WindowOverlap" is calculated to answer this question
                // each 24 single-frames duration is equal to 1 second
                // note that the "WindowOverlap" value should be recalculated if frame size is changed
                // this has not yet been considered in the Config file!
                WindowOverlap           = 0.10725204,
                DoMelScale              = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount             = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
            double frameStep   = frameSize * (1 - settings.WindowOverlap);
            int    minFreqBin  = config.MinFreqBin;
            int    maxFreqBin  = config.MaxFreqBin;
            int    numFreqBand = config.NumFreqBand;
            int    patchWidth  =
                (maxFreqBin - minFreqBin + 1) / numFreqBand;
            int patchHeight      = config.PatchHeight;
            int numRandomPatches = config.NumRandomPatches;

            // Define variable number of "randomPatch" lists based on "numFreqBand"
            Dictionary <string, List <double[, ]> > randomPatchLists = new Dictionary <string, List <double[, ]> >();

            for (int i = 0; i < numFreqBand; i++)
            {
                randomPatchLists.Add($"randomPatch{i.ToString()}", new List <double[, ]>());
            }

            List <double[, ]> randomPatches = new List <double[, ]>();

            double[,] inputMatrix;
            List <AudioRecording> recordings = new List <AudioRecording>();

            foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    settings.SourceFileName = recording.BaseName;

                    if (config.DoSegmentation)
                    {
                        recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
                    }
                    else
                    {
                        recordings.Add(recording);
                    }

                    for (int i = 0; i < recordings.Count; i++)
                    {
                        var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[i].WavReader);
                        var decibelSpectrogram   = new DecibelSpectrogram(amplitudeSpectrogram);

                        // DO RMS NORMALIZATION
                        //sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                        if (config.DoNoiseReduction)
                        {
                            decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
                        }

                        // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
                        if (minFreqBin != 1 || maxFreqBin != finalBinCount)
                        {
                            inputMatrix =
                                PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
                        }
                        else
                        {
                            inputMatrix = decibelSpectrogram.Data;
                        }

                        // creating matrices from different freq bands of the source spectrogram
                        List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);

                        // selecting random patches from each freq band matrix and add them to the corresponding patch list
                        int count = 0;

                        while (count < allSubmatrices.Count)
                        {
                            // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
                            double[,] downsampledMatrix = MaxPooling(allSubmatrices.ToArray()[count], config.MaxPoolingFactor);

                            randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
                                                                                   .GetPatches(downsampledMatrix, patchWidth, patchHeight, numRandomPatches,
                                                                                               PatchSampling.SamplingMethod.Random).ToMatrix());
                            count++;
                        }
                    }
                }
            }

            foreach (string key in randomPatchLists.Keys)
            {
                randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
            }

            // convert list of random patches matrices to one matrix
            int numClusters =
                config.NumClusters;

            List <KmeansClustering.Output> allClusteringOutput = new List <KmeansClustering.Output>();

            for (int i = 0; i < randomPatches.Count; i++)
            {
                double[,] patchMatrix = randomPatches[i];

                // Apply PCA Whitening
                var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix);

                // Do k-means clustering
                var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numClusters);
                allClusteringOutput.Add(clusteringOutput);
            }

            return(allClusteringOutput);
        }

コード例 #7

ファイルを表示

ファイル: FeatureLearning.cs プロジェクト: gitter-badger/audio-analysis

        /// <summary>
        /// This method is called semi-supervised feature learning because one of the clusters is formed using
        /// the positive frames manually selected from 1-min recordings.
        /// The input to this methods is a group of files that contains the call of interest,
        /// a 2D-array that contains file name, the second number and the corresponding frame numbers in each file.
        /// At the moment, this method only handles single-frames as patches (PatchHeight = 1).
        /// </summary>
        public static List <KmeansClustering.Output> SemisupervisedFeatureLearning(FeatureLearningSettings config,
                                                                                   string inputPath, string[,] frameInfo)
        {
            // making a dictionary of frame info as file name and second number as key, and start and end frame number as value.
            Dictionary <Tuple <string, int>, int[]> info = new Dictionary <Tuple <string, int>, int[]>();

            for (int i = 0; i < frameInfo.GetLength(0); i++)
            {
                Tuple <string, int> keys = new Tuple <string, int>(frameInfo[i, 0], Convert.ToInt32(frameInfo[i, 1]));
                int[] values             = new int[2] {
                    Convert.ToInt32(frameInfo[i, 2]), Convert.ToInt32(frameInfo[i, 3])
                };
                info.Add(keys, values);
            }

            // processing the recordings within the input path
            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            int           frameSize     = config.FrameSize;
            int           finalBinCount = config.FinalBinCount;
            FreqScaleType scaleType     = config.FrequencyScaleType;
            var           settings      = new SpectrogramSettings()
            {
                WindowSize = frameSize,

                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
                // The "WindowOverlap" is calculated to answer this question
                // each 24 single-frames duration is equal to 1 second
                // note that the "WindowOverlap" value should be recalculated if frame size is changed
                // this has not yet been considered in the Config file!
                WindowOverlap           = 0.10725204,
                DoMelScale              = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount             = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
            double frameStep   = frameSize * (1 - settings.WindowOverlap);
            int    minFreqBin  = config.MinFreqBin;
            int    maxFreqBin  = config.MaxFreqBin;
            int    numFreqBand = config.NumFreqBand;
            int    patchWidth  =
                (maxFreqBin - minFreqBin + 1) / numFreqBand;
            int patchHeight      = config.PatchHeight;
            int numRandomPatches = config.NumRandomPatches;

            // Define variable number of "randomPatch" lists based on "numFreqBand"
            Dictionary <string, List <double[, ]> > randomPatchLists     = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > sequentialPatchLists = new Dictionary <string, List <double[, ]> >();

            for (int i = 0; i < numFreqBand; i++)
            {
                randomPatchLists.Add($"randomPatch{i.ToString()}", new List <double[, ]>());
                sequentialPatchLists.Add($"sequentialPatch{i.ToString()}", new List <double[, ]>());
            }

            List <double[, ]> randomPatches   = new List <double[, ]>();
            List <double[, ]> positivePatches = new List <double[, ]>();

            double[,] inputMatrix;
            List <AudioRecording> recordings = new List <AudioRecording>();

            foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    settings.SourceFileName = recording.BaseName;

                    if (config.DoSegmentation)
                    {
                        recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
                    }
                    else
                    {
                        recordings.Add(recording);
                    }

                    for (int i = 0; i < recordings.Count; i++)
                    {
                        var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[i].WavReader);

                        var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);

                        if (config.DoNoiseReduction)
                        {
                            decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
                        }

                        // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
                        if (minFreqBin != 1 || maxFreqBin != finalBinCount)
                        {
                            inputMatrix =
                                PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
                        }
                        else
                        {
                            inputMatrix = decibelSpectrogram.Data;
                        }

                        // creating matrices from different freq bands of the source spectrogram
                        List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);

                        // check whether the file has any positive frame
                        List <int> positiveFrameNumbers = new List <int>();
                        foreach (var entry in info)
                        {
                            // check whether the file  and the current second (i) has positive frame
                            if ((fileInfo.Name == entry.Key.Item1) && (i == entry.Key.Item2))
                            {
                                // make a list of frame numbers
                                for (int j = entry.Value[0]; j <= entry.Value[1]; j++)
                                {
                                    positiveFrameNumbers.Add(j);
                                }
                            }
                        }

                        // making two matrices, one from positive frames and one from negative frames.
                        List <double[, ]> allPositiveFramesSubmatrices = new List <double[, ]>();
                        List <double[, ]> allNegativeFramesSubmatrices = new List <double[, ]>();
                        List <int>        negativeFrameNumbers         = new List <int>();

                        for (int j = 1; j <= 24; j++)
                        {
                            bool flag = false;
                            foreach (var number in positiveFrameNumbers)
                            {
                                if (j == number)
                                {
                                    flag = true;
                                    break;
                                }
                            }

                            // if flag is false, it means that the frame does not contain a part of bird call and should be added
                            // to the negativeFrameNumbers list.
                            if (!flag)
                            {
                                negativeFrameNumbers.Add(j);
                            }
                        }

                        if (positiveFrameNumbers.ToArray().Length != 0)
                        {
                            foreach (var submatrix in allSubmatrices)
                            {
                                List <double[]> positiveFrames = new List <double[]>();
                                foreach (var number in positiveFrameNumbers)
                                {
                                    positiveFrames.Add(submatrix.ToJagged()[number - 1]);
                                }

                                allPositiveFramesSubmatrices.Add(positiveFrames.ToArray().ToMatrix());

                                List <double[]> negativeFrames = new List <double[]>();
                                foreach (var number in negativeFrameNumbers)
                                {
                                    negativeFrames.Add(submatrix.ToJagged()[number - 1]);
                                }

                                allNegativeFramesSubmatrices.Add(positiveFrames.ToArray().ToMatrix());
                            }
                        }
                        else
                        {
                            allNegativeFramesSubmatrices = allSubmatrices;
                        }

                        // selecting random patches from each freq band matrix and add them to the corresponding patch list
                        int count = 0;

                        while (count < allNegativeFramesSubmatrices.Count)
                        {
                            // select random patches from those segments that do not contain the call of interest
                            if (allPositiveFramesSubmatrices.Count != 0)
                            {
                                // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
                                double[,] downsampledPositiveMatrix = MaxPooling(allPositiveFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor);
                                int rows    = downsampledPositiveMatrix.GetLength(0);
                                int columns = downsampledPositiveMatrix.GetLength(1);
                                sequentialPatchLists[$"sequentialPatch{count.ToString()}"].Add(
                                    PatchSampling.GetPatches(downsampledPositiveMatrix, patchWidth, patchHeight,
                                                             (rows / patchHeight) * (columns / patchWidth),
                                                             PatchSampling.SamplingMethod.Sequential).ToMatrix());
                            }
                            else
                            {
                                // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
                                double[,] downsampledNegativeMatrix = MaxPooling(allNegativeFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor);
                                randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
                                                                                       .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight, numRandomPatches,
                                                                                                   PatchSampling.SamplingMethod.Random).ToMatrix());
                            }

                            /*
                             * We can use this block of code instead of line 384 to 389, if we want to select random patches from negative frames of the segments with call of interest
                             * // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
                             * double[,] downsampledNegativeMatrix = MaxPooling(allNegativeFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor);
                             * if (downsampledNegativeMatrix.GetLength(0) < numRandomPatches)
                             * {
                             *  int numR = downsampledNegativeMatrix.GetLength(0);
                             *  int numC = downsampledNegativeMatrix.GetLength(1);
                             *  randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
                             *      .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight,
                             *         (numR / patchHeight) * (numC / patchWidth),
                             *         PatchSampling.SamplingMethod.Sequential).ToMatrix());
                             * }
                             * else
                             * {
                             *  randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
                             *  .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight, numRandomPatches,
                             *      PatchSampling.SamplingMethod.Random).ToMatrix());
                             * }
                             */

                            count++;
                        }
                    }
                }
            }

            foreach (string key in sequentialPatchLists.Keys)
            {
                positivePatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(sequentialPatchLists[key]));
            }

            foreach (string key in randomPatchLists.Keys)
            {
                randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
            }

            // convert list of random patches matrices to one matrix
            int numClusters =
                config.NumClusters - 1;

            List <KmeansClustering.Output> semisupervisedClusteringOutput = new List <KmeansClustering.Output>();
            List <KmeansClustering.Output> unsupervisedClusteringOutput   = new List <KmeansClustering.Output>();
            List <KmeansClustering.Output> supervisedClusteringOutput     = new List <KmeansClustering.Output>();

            // clustering of random patches
            for (int i = 0; i < randomPatches.Count; i++)
            {
                double[,] patchMatrix = randomPatches[i];

                // Apply PCA Whitening
                var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix);

                // Do k-means clustering
                var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numClusters);
                unsupervisedClusteringOutput.Add(clusteringOutput);
            }

            // build one cluster out of positive frames
            for (int i = 0; i < positivePatches.Count; i++)
            {
                double[,] patchMatrix = positivePatches[i];

                // Apply PCA Whitening
                var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix);

                // Do k-means clustering
                // build one cluster from positive patches
                var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, 1);
                supervisedClusteringOutput.Add(clusteringOutput);
            }

            // merge the output of two clustering obtained from supervised and unsupervised approaches
            var positiveClusterId = config.NumClusters - 1;
            List <double[][]> positiveCentroids   = new List <double[][]>();
            List <double[]>   positiveClusterSize = new List <double[]>();

            foreach (var output in supervisedClusteringOutput)
            {
                positiveCentroids.Add(output.ClusterIdCentroid.Values.ToArray());
                positiveClusterSize.Add(output.ClusterIdSize.Values.ToArray());
            }

            semisupervisedClusteringOutput = unsupervisedClusteringOutput;

            for (int i = 0; i < semisupervisedClusteringOutput.Count; i++)
            {
                semisupervisedClusteringOutput[i].ClusterIdCentroid.Add(positiveClusterId, positiveCentroids[i][0]);
                semisupervisedClusteringOutput[i].ClusterIdSize.Add(positiveClusterId, positiveClusterSize[i][0]);
            }

            return(semisupervisedClusteringOutput);
        }