Ejemplo n.º 1
0
        } // GetSpectralMaxima()

        /// <summary>
        /// THIS METHOD CALLED ONLY FROM THE Frogs.CS class.
        /// returns an array showing which freq bin in each frame has the maximum amplitude.
        /// However only returns values for those frames in the neighbourhood of an envelope peak.
        /// </summary>
        /// <param name="decibelsPerFrame"></param>
        /// <param name="spectrogram"></param>
        /// <param name="threshold"></param>
        /// <param name="nhLimit"></param>
        /// <returns></returns>
        public static Tuple <int[], double[, ]> GetSpectralMaxima(double[] decibelsPerFrame, double[,] spectrogram, double threshold, int nhLimit)
        {
            int rowCount = spectrogram.GetLength(0);
            int colCount = spectrogram.GetLength(1);

            var peaks = DataTools.GetPeakValues(decibelsPerFrame);

            var maxFreqArray = new int[rowCount]; //array (one element per frame) indicating which freq bin has max amplitude.
            var hitsMatrix   = new double[rowCount, colCount];

            for (int r = nhLimit; r < rowCount - nhLimit; r++)
            {
                if (peaks[r] < threshold)
                {
                    continue;
                }

                //find local freq maxima and store in freqArray & hits matrix.
                for (int nh = -nhLimit; nh < nhLimit; nh++)
                {
                    double[] spectrum = MatrixTools.GetRow(spectrogram, r + nh);
                    spectrum[0] = 0.0;                    // set DC = 0.0 just in case it is max.
                    int maxFreqbin = DataTools.GetMaxIndex(spectrum);
                    if (spectrum[maxFreqbin] > threshold) //only record spectral peak if it is above threshold.
                    {
                        maxFreqArray[r + nh] = maxFreqbin;

                        //if ((spectrum[maxFreqbin] > dBThreshold) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r - 1, maxFreqbin]) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r + 1, maxFreqbin]))
                        hitsMatrix[r + nh, maxFreqbin] = 1.0;
                    }
                }
            }

            return(Tuple.Create(maxFreqArray, hitsMatrix));
        } // GetSpectralMaxima()
Ejemplo n.º 2
0
        public static double[,] ExoticMaxPoolingMatrixColumns(double[,] matrix, int reducedColCount)
        {
            int rows = matrix.GetLength(0);
            int cols = matrix.GetLength(1);

            double[,] returnMatrix = new double[rows, reducedColCount];
            for (int r = 0; r < rows; r++)
            {
                var rowVector = MatrixTools.GetRow(matrix, r);

                // ie reduce the second half of vector by factor of two.
                for (int c = 0; c < 100; c++)
                {
                    returnMatrix[r, c] = rowVector[c];
                }

                int offset = 0;
                for (int c = 100; c < reducedColCount; c++)
                {
                    returnMatrix[r, c] = rowVector[c + offset];
                    offset            += 1;
                }
            }

            return(returnMatrix);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Normalisation and Concatentation of spectra:
        /// can be done in three ways ie (i) Unit length (ii) Unit Area (iii) Unit bounds i.e. 0,1.
        /// </summary>
        /// <param name="arguments"></param>
        /// <param name="output"></param>
        public static void Normalise(Arguments arguments, Output output)
        {
            var keyArray      = FEATURE_KEYS.Split(',');
            int speciesCount  = arguments.SpeciesCount;
            int instanceCount = arguments.InstanceCount;

            // loop through all species
            for (int r = 0; r < speciesCount; r++)
            {
                double[] ipVector     = MatrixTools.GetRow(output.SpeciesFeatureMatrix, r);
                double[] normedVector = NormaliseVector(ipVector, output.Weights);

                for (int c = 0; c < normedVector.Length; c++)
                {
                    output.SpeciesFeatureMatrix[r, c] = normedVector[c];
                }
            }

            // loop through all instances
            for (int r = 0; r < instanceCount; r++)
            {
                double[] ipVector     = MatrixTools.GetRow(output.InstanceFeatureMatrix, r);
                double[] normedVector = NormaliseVector(ipVector, output.Weights);

                for (int c = 0; c < normedVector.Length; c++)
                {
                    output.InstanceFeatureMatrix[r, c] = normedVector[c];
                }
            } // end for loop r over all instances
        }
        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram.
        /// Developed for GenericRecognizer of harmonics.
        /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise.
        ///          But it requires that the frequency of any potential formants is not changing rapidly.
        ///          THis may not be suitable for detecting human speech. However can reduce the frame step.
        /// </summary>
        /// <param name="m">spectrogram data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);
            var binCount = m.GetLength(1);

            //set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(binCount, binCount);

            // set up arrays to store decibels, formant intensity and max index.
            var dBArray       = new double[rowCount];
            var intensity     = new double[rowCount];
            var maxIndexArray = new int[rowCount];

            // for all time frames
            for (int t = 2; t < rowCount - 2; t++)
            {
                // get average of five adjacent frames
                var frame1 = MatrixTools.GetRow(m, t - 2);
                var frame2 = MatrixTools.GetRow(m, t - 1);
                var frame3 = MatrixTools.GetRow(m, t);
                var frame4 = MatrixTools.GetRow(m, t + 1);
                var frame5 = MatrixTools.GetRow(m, t + 2);
                var frame  = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5;
                }

                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    // Would normally normalise the xcorr values for overlap count.
                    // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values.
                    //normXr[i] = xr[i] / (colCount - i);
                    normXr[i] = xr[i];
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            }

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }
        public void TestFreqScaleOnArtificialSignal1()
        {
            int    sampleRate = 22050;
            double duration   = 20; // signal duration in seconds

            int[] harmonics       = { 500, 1000, 2000, 4000, 8000 };
            int   windowSize      = 512;
            var   freqScale       = new FrequencyScale(sampleRate / 2, windowSize, 1000);
            var   outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal1_LinearFreqScale.png");

            var recording  = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine);
            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.WindowSize,
                WindowOverlap           = 0.0,
                SourceFName             = "Signal1",
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.12,
            };

            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            // pick a row, any row
            var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40);

            oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5);
            var peaks = DataTools.GetPeaks(oneSpectrum);

            for (int i = 5; i < peaks.Length - 5; i++)
            {
                if (peaks[i])
                {
                    LoggedConsole.WriteLine($"bin ={freqScale.BinBounds[i, 0]},  Herz={freqScale.BinBounds[i, 1]}-{freqScale.BinBounds[i + 1, 1]}  ");
                }
            }

            foreach (int h in harmonics)
            {
                LoggedConsole.WriteLine($"Harmonic {h}Herz  should be in bin  {freqScale.GetBinIdForHerzValue(h)}");
            }

            // spectrogram without framing, annotation etc
            var    image = sonogram.GetImage();
            string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)}   SR={sampleRate}  Window={windowSize}";

            image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations);
            image.Save(outputImagePath);

            // Check that image dimensions are correct
            Assert.AreEqual(861, image.Width);
            Assert.AreEqual(310, image.Height);

            Assert.IsTrue(peaks[11]);
            Assert.IsTrue(peaks[22]);
            Assert.IsTrue(peaks[45]);
            Assert.IsTrue(peaks[92]);
            Assert.IsTrue(peaks[185]);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Produce a CONFUSION MATRIX and a RANK ORDER MATRIX.
        /// </summary>
        /// <param name=""></param>
        /// <param name=""></param>
        public static void CalculateAccuracy(Arguments arguments, Output output)
        {
            int maxRank       = 10;
            int speciesCount  = arguments.SpeciesCount;
            int instanceCount = arguments.InstanceCount;

            output.ConfusionMatrix = new int[speciesCount, speciesCount];
            output.RankOrderMatrix = new int[instanceCount, maxRank];

            // loop through all instances
            for (int r = 0; r < instanceCount; r++)
            {
                int      correctID      = output.SpeciesID[r] - 1;
                double[] instanceScores = MatrixTools.GetRow(output.SimilarityScores, r);
                int      maxID          = DataTools.GetMaxIndex(instanceScores);
                output.ConfusionMatrix[correctID, maxID]++;

                // calculate rank order matrix.
                if (maxID == correctID)
                {
                    output.RankOrderMatrix[r, 0] = 1;
                }

                instanceScores[maxID] = 0.0;
                for (int rank = 1; rank < maxRank; rank++)
                {
                    maxID = DataTools.GetMaxIndex(instanceScores);
                    if (maxID == correctID)
                    {
                        output.RankOrderMatrix[r, rank] = 1;
                        break;
                    }

                    instanceScores[maxID] = 0.0;
                }
            } // end for loop r over all instances

            int diagonalSum = 0;

            for (int r = 0; r < speciesCount; r++)
            {
                diagonalSum += output.ConfusionMatrix[r, r];
            }

            LoggedConsole.WriteLine("Diagonal Sum = " + diagonalSum);
            LoggedConsole.WriteLine("% Accuracy = " + (100 * diagonalSum / instanceCount));

            LoggedConsole.WriteLine("% Rank");
            for (int rank = 0; rank < maxRank; rank++)
            {
                var    colSum = MatrixTools.SumColumn(output.RankOrderMatrix, rank);
                double acc    = 100 * colSum / (double)instanceCount;
                string str    = string.Format("{0}   % Acc = {1:f2}", rank, acc);
                LoggedConsole.WriteLine(str);
            }
        }
Ejemplo n.º 7
0
        public static void TestMethod_GenerateSignal1()
        {
            int    sampleRate = 22050;
            double duration   = 20; // signal duration in seconds

            int[]  harmonics  = { 500, 1000, 2000, 4000, 8000 };
            int    windowSize = 512;
            var    freqScale  = new FrequencyScale(sampleRate / 2, windowSize, 1000);
            string path       = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\SineSignal1.png";

            var recording  = GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine);
            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.WindowSize,
                WindowOverlap           = 0.0,
                SourceFName             = "Signal1",
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.12,
            };
            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            // pick a row, any row
            var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40);

            oneSpectrum = DataTools.normalise(oneSpectrum);
            var peaks = DataTools.GetPeaks(oneSpectrum, 0.5);

            for (int i = 2; i < peaks.Length - 2; i++)
            {
                if (peaks[i])
                {
                    LoggedConsole.WriteLine($"bin ={freqScale.BinBounds[i, 0]},  Herz={freqScale.BinBounds[i, 1]}-{freqScale.BinBounds[i + 1, 1]}  ");
                }
            }

            if (peaks[11] && peaks[22] && peaks[45] && peaks[92] && peaks[185])
            {
                LoggedConsole.WriteSuccessLine("Spectral Peaks found at correct places");
            }
            else
            {
                LoggedConsole.WriteErrorLine("Spectral Peaks found at INCORRECT places");
            }

            foreach (int h in harmonics)
            {
                LoggedConsole.WriteLine($"Harmonic {h}Herz  should be in bin  {freqScale.GetBinIdForHerzValue(h)}");
            }

            // spectrogram without framing, annotation etc
            var    image = sonogram.GetImage();
            string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)}   SR={sampleRate}  Window={windowSize}";

            image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations);
            image.Save(path);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Read five sets of acoustic indices into a matrix each row of which is a combined feature vector.
        /// </summary>
        public static double[,] ReadSpectralIndicesFromIndexMatrices(DirectoryInfo dir, string baseName, TimeSpan startTime, TimeSpan duration)
        {
            //get start and end minutes
            int startMinute = (int)startTime.TotalMinutes;
            int minuteSpan  = (int)duration.TotalMinutes;
            int endMinute   = startMinute + minuteSpan;

            // obtain a matrix to see what size data we are dealing with
            // assume all matrices have the same dimensions.
            // construct a path to the required matrix
            var key  = ContentSignatures.IndexNames[0];
            var path = Path.Combine(dir.FullName, baseName + "__Towsey.Acoustic." + key + ".csv");

            // read in the matrix and get its dimensions
            var indexMatrix = Csv.ReadMatrixFromCsv <double>(new FileInfo(path));
            var rowCount    = indexMatrix.GetLength(0);
            var colCount    = indexMatrix.GetLength(1);

            if (rowCount < endMinute)
            {
                throw new ArgumentOutOfRangeException(string.Empty, "Not enough rows in matrix to read the given timespan.");
            }

            // set up the return Matrix
            // indexCount will be number of indices X number of frequency bins
            var indexCount = ContentSignatures.IndexNames.Length * colCount;
            var opMatrix   = new double[minuteSpan, indexCount];

            for (int i = 1; i < ContentSignatures.IndexNames.Length; i++)
            {
                key = ContentSignatures.IndexNames[i];

                // construct a path to the required matrix
                path = Path.Combine(dir.FullName, baseName + "__Towsey.Acoustic." + key + ".csv");

                // read in the matrix
                indexMatrix = Csv.ReadMatrixFromCsv <double>(new FileInfo(path));

                for (int r = 0; r < rowCount; r++)
                {
                    // copy in index[key] row
                    var row         = MatrixTools.GetRow(indexMatrix, r);
                    int startColumn = colCount * i;
                    for (int c = 0; c < colCount; c++)
                    {
                        var normalisedValue = row[c];
                        opMatrix[r, startColumn + c] = normalisedValue;
                    }
                }
            }

            return(opMatrix);
        }
Ejemplo n.º 9
0
        public static Dictionary <string, double[]> GetIndicesForOneMinute(Dictionary <string, double[, ]> allIndices, int rowId)
        {
            var opIndices = new Dictionary <string, double[]>();

            var keys = allIndices.Keys;

            foreach (string key in keys)
            {
                var success = allIndices.TryGetValue(key, out double[,] matrix);
                if (success)
                {
                    opIndices.Add(key, MatrixTools.GetRow(matrix, rowId));
                }
            }

            return(opIndices);
        }
Ejemplo n.º 10
0
        } // GetInstanceRepresentations()

        public static void GetSpeciesRepresentations(Arguments arguments, Output output)
        {
            LoggedConsole.WriteLine("\n\n2a. Obtain feature representation of every species.");

            int instanceCount = arguments.InstanceCount;
            int speciesCount  = arguments.SpeciesCount;
            var keyArray      = FEATURE_KEYS.Split(',');

            int featureCount = output.InstanceFeatureMatrix.GetLength(1);

            // initialise species description matrix
            double[,] speciesFeatureMatrix = new double[speciesCount, featureCount];
            int[] frameNumbersPerSpecies = new int[speciesCount];

            // loop through all 50 species
            for (int i = 0; i < speciesCount; i++)
            {
                int speciesLabel = i + 1;
                LoggedConsole.Write(" " + speciesLabel);

                // loop through all instances multiple times - once for each species
                for (int j = 0; j < instanceCount; j++)
                {
                    if (output.SpeciesID[j] != speciesLabel)
                    {
                        continue;
                    }

                    //aggregate the instance feature values
                    double[] ipVector = MatrixTools.GetRow(output.InstanceFeatureMatrix, j);
                    for (int c = 0; c < featureCount; c++)
                    {
                        speciesFeatureMatrix[i, c] += ipVector[c];
                    }

                    //output.InstanceNumbersPerSpecies[i]++;
                    frameNumbersPerSpecies[i] += output.FrameNumbersPerInstance[j];
                } // end for loop j over all instances
            }     // loop through all 50 species

            LoggedConsole.WriteLine(" Done");

            output.SpeciesFeatureMatrix   = speciesFeatureMatrix;
            output.FrameNumbersPerSpecies = frameNumbersPerSpecies;
        } // GetSpeciesRepresentations()
Ejemplo n.º 11
0
        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Developed for GenericRecognizer of harmonics.
        /// </summary>
        /// <param name="m">data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>two arrays.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);

            double[] dBArray       = new double[rowCount];
            var      intensity     = new double[rowCount]; //an array of formant intensity
            var      maxIndexArray = new int[rowCount];    //an array of max value index values
            var      binCount      = m.GetLength(1);

            double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients

            // for all time frames
            for (int t = 0; t < rowCount; t++)
            {
                var    frame    = MatrixTools.GetRow(m, t);
                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half. Also need to normalise the values for overlap count.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    normXr[i] = xr[i] / (colCount - i);
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            } // frames = rows of matrix

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }
Ejemplo n.º 12
0
        } // GetSpeciesRepresentations()

        public static void DrawSpeciesImages(Arguments arguments, Output output)
        {
            LoggedConsole.WriteLine("2b. Draw feature representation of every species.");
            int scalingFactor = 20;
            int imageHeight   = 100;

            int speciesCount = arguments.SpeciesCount;
            var keyArray     = FEATURE_KEYS.Split(',');

            int featureCount = keyArray.Length * output.ReducedSpectralLength;

            // loop through all 50 species
            for (int r = 0; r < speciesCount; r++)
            {
                double[] ipVector = MatrixTools.GetRow(output.SpeciesFeatureMatrix, r);

                // now make images
                var images    = new List <Image>();
                int featureID = 0;
                foreach (string key in keyArray)
                {
                    double[] vector        = new double[output.ReducedSpectralLength];
                    int      featureOffset = featureID * output.ReducedSpectralLength;
                    for (int c = 0; c < output.ReducedSpectralLength; c++)
                    {
                        vector[c] = ipVector[featureOffset + c];
                    }

                    featureID++;

                    vector = DataTools.Normalise2Probabilites(vector);
                    vector = DataTools.filterMovingAverage(vector, 3);
                    string label = string.Format("{0} {1} ({2})", r + 1, key, output.InstanceNumbersPerSpecies[r]);
                    Image  image = GraphsAndCharts.DrawGraph(label, vector, output.ReducedSpectralLength, imageHeight, scalingFactor);
                    images.Add(image);
                }

                Image  combinedImage  = ImageTools.CombineImagesVertically(images);
                string outputFileName = string.Format("Species{0}.SpectralFeatures.png", r + 1);
                string path           = Path.Combine(arguments.OutputDirectory.FullName, outputFileName);
                combinedImage.Save(path);
            } // loop through 50 species
        }
        } //DetectBarsInTheRowsOfaMatrix()

        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Was first developed for crow calls.
        /// First looks for a decibel profile that matches the passed call duration and decibel loudness.
        /// Then samples the centre portion for the correct harmonic period.
        /// </summary>
        /// <param name="m">data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <param name="callSpan">Minimum length of call of interest.</param>
        /// <returns>a tuple.</returns>
        public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan)
        {
            int rowCount    = m.GetLength(0);
            int colCount    = m.GetLength(1);
            var intensity   = new double[rowCount];   //an array of period intensity
            var periodicity = new double[rowCount];   //an array of the periodicity values

            double[] dBArray = MatrixTools.GetRowAverages(m);
            dBArray = DataTools.filterMovingAverage(dBArray, 3);

            // for all time frames
            for (int t = 0; t < rowCount; t++)
            {
                if (dBArray[t] < dBThreshold)
                {
                    continue;
                }

                var row          = MatrixTools.GetRow(m, t);
                var spectrum     = AutoAndCrossCorrelation.CrossCorr(row, row);
                int zeroBinCount = 3; //to remove low freq content which dominates the spectrum
                for (int s = 0; s < zeroBinCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId          = DataTools.GetMaxIndex(spectrum);
                double intensityValue = spectrum[maxId];
                intensity[t] = intensityValue;

                double period = 0.0;
                if (maxId != 0)
                {
                    period = 2 * colCount / (double)maxId;
                }

                periodicity[t] = period;
            }

            return(Tuple.Create(dBArray, intensity, periodicity));
        }
Ejemplo n.º 14
0
        /// <summary>
        /// This done using Cosine similarity. Could also use Euclidian distance.
        /// </summary>
        /// <param name=""></param>
        /// <param name=""></param>
        public static void CalculateSimilarityScores(Arguments arguments, Output output)
        {
            int speciesCount  = arguments.SpeciesCount;
            int instanceCount = arguments.InstanceCount;

            output.SimilarityScores = new double[instanceCount, speciesCount];

            // loop through all instances
            for (int r = 0; r < instanceCount; r++)
            {
                double[] instance = MatrixTools.GetRow(output.InstanceFeatureMatrix, r);

                for (int s = 0; s < speciesCount; s++)
                {
                    double[] species    = MatrixTools.GetRow(output.SpeciesFeatureMatrix, s);
                    double   similarity = DataTools.DotProduct(instance, species);
                    output.SimilarityScores[r, s] = similarity;
                }
            } // end for loop r over all instances
        }
Ejemplo n.º 15
0
        public static double[,] MaxPoolingLimited(double[,] M, int startBin, int maxOf2Bin, int maxOf3Bin, int endBin, int reducedBinCount)
        {
            int rows = M.GetLength(0);
            int cols = M.GetLength(1);

            var reducedM = new double[rows, reducedBinCount];

            for (int r = 0; r < rows; r++)
            {
                var      rowVector = MatrixTools.GetRow(M, r);
                double[] V         = MaxPoolingLimited(rowVector, startBin, maxOf2Bin, maxOf3Bin, endBin);

                for (int c = 0; c < reducedBinCount; c++)
                {
                    reducedM[r, c] = V[c];
                }
            }

            return(reducedM);
        }
        /// <summary>
        /// Converts a spectrogram having linear freq scale to one having an Octave freq scale.
        /// Note that the sample rate (sr) and the frame size both need to be apporpriate to the choice of FreqScaleType.
        /// TODO: SHOULD DEVELOP A SEPARATE UNIT TEST for this method
        /// </summary>
        public static double[,] ConvertLinearSpectrogramToOctaveFreqScale(double[,] inputSpgram, FrequencyScale freqScale)
        {
            if (freqScale == null)
            {
                throw new ArgumentNullException(nameof(freqScale));
            }

            if (freqScale.ScaleType == FreqScaleType.Linear)
            {
                LoggedConsole.WriteLine("Linear Hz Scale is not valid for this Octave method.");
                throw new ArgumentNullException(nameof(freqScale));
            }

            // get the octave bin bounds for this octave scale type
            var octaveBinBounds = freqScale.BinBounds;

            //var octaveBinBounds = GetOctaveScale(freqScale.ScaleType);

            int newBinCount = octaveBinBounds.GetLength(0);

            // set up the new octave spectrogram
            int frameCount = inputSpgram.GetLength(0);

            //int binCount = inputSpgram.GetLength(1);
            double[,] octaveSpectrogram = new double[frameCount, newBinCount];

            for (int row = 0; row < frameCount; row++)
            {
                //get each frame or spectrum in turn
                var linearSpectrum = MatrixTools.GetRow(inputSpgram, row);

                // convert the spectrum to its octave form
                var octaveSpectrum = OctaveSpectrum(octaveBinBounds, linearSpectrum);

                //return the spectrum to output spectrogram.
                MatrixTools.SetRow(octaveSpectrogram, row, octaveSpectrum);
            }

            return(octaveSpectrogram);
        }
        /// <summary>
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        ///
        /// </summary>
        /// <param name="m"></param>
        /// <param name="amplitudeThreshold"></param>
        /// <returns></returns>
        public static Tuple <double[], double[]> DetectBarsInTheRowsOfaMatrix(double[,] m, double threshold, int zeroBinCount)
        {
            int rowCount    = m.GetLength(0);
            int colCount    = m.GetLength(1);
            var intensity   = new double[rowCount];   //an array of period intensity
            var periodicity = new double[rowCount];   //an array of the periodicity values

            double[] prevRow = MatrixTools.GetRow(m, 0);
            prevRow = DataTools.DiffFromMean(prevRow);

            for (int r = 1; r < rowCount; r++)
            {
                double[] thisRow = MatrixTools.GetRow(m, r);
                thisRow = DataTools.DiffFromMean(thisRow);

                var spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow);

                for (int s = 0; s < zeroBinCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId          = DataTools.GetMaxIndex(spectrum);
                double intensityValue = spectrum[maxId];
                intensity[r] = intensityValue;

                double period = 0.0;
                if (maxId != 0)
                {
                    period = 2 * colCount / (double)maxId;
                }

                periodicity[r] = period;

                prevRow = thisRow;
            }// rows

            return(Tuple.Create(intensity, periodicity));
        } //DetectBarsInTheRowsOfaMatrix()
Ejemplo n.º 18
0
        public static double[,] MaxPoolMatrixColumns(double[,] matrix, int reducedColCount)
        {
            int rows = matrix.GetLength(0);
            int cols = matrix.GetLength(1);

            double[,] returnMatrix = new double[rows, reducedColCount];
            for (int r = 0; r < rows; r++)
            {
                var   rowVector = MatrixTools.GetRow(matrix, r);
                int[] bounds    = { 8, 23, 53, 113, 233 };

                // ie reduce the 256 vector to 4 values
                for (int c = 0; c < reducedColCount; c++)
                {
                    int      length    = bounds[c + 1] - bounds[c];
                    double[] subvector = DataTools.Subarray(rowVector, bounds[c], length);
                    int      max       = DataTools.GetMaxIndex(subvector);
                    returnMatrix[r, c] = subvector[max];
                }
            }

            return(returnMatrix);
        }
Ejemplo n.º 19
0
        public static double[,] MaxPoolMatrixColumnsByFactor(double[,] matrix, int factor)
        {
            int rows            = matrix.GetLength(0);
            int cols            = matrix.GetLength(1);
            int reducedColCount = cols / factor;

            double[,] returnMatrix = new double[rows, reducedColCount];
            for (int r = 0; r < rows; r++)
            {
                var rowVector  = MatrixTools.GetRow(matrix, r);
                int lowerBound = 0;

                // ie reduce the 256 vector to 4 values
                for (int c = 0; c < reducedColCount; c++)
                {
                    double[] subvector = DataTools.Subarray(rowVector, lowerBound, factor);
                    int      max       = DataTools.GetMaxIndex(subvector);
                    returnMatrix[r, c] = subvector[max];
                    lowerBound        += factor;
                }
            }

            return(returnMatrix);
        }
Ejemplo n.º 20
0
        internal RecognizerResults Gruntwork(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset)
        {
            double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1;

            // make a spectrogram
            var config = new SonogramConfig
            {
                WindowSize              = 256,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            config.WindowOverlap = 0.0;

            // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex
            // get frame parameters for the analysis
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader);

            // remove the DC column
            var spg        = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);
            int sampleRate = audioRecording.SampleRate;
            int rowCount   = spg.GetLength(0);
            int colCount   = spg.GetLength(1);

            int    frameSize          = config.WindowSize;
            int    frameStep          = frameSize; // this default = zero overlap
            double frameStepInSeconds = frameStep / (double)sampleRate;
            double framesPerSec       = 1 / frameStepInSeconds;

            // reading in variables from the config file
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";
            int    minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int    maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // ## THREE THRESHOLDS ---- only one of these is given to user.
            // minimum dB to register a dominant freq peak. After noise removal
            double peakThresholdDb = 3.0;

            // The threshold dB amplitude in the dominant freq bin required to yield an event
            double eventThresholdDb = 6;

            // minimum score for an acceptable event - that is when processing the score array.
            double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2;

            // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 256.
            int    minFrameWidth = 7;
            int    maxFrameWidth = 14;
            double minDuration   = (minFrameWidth - 1) * frameStepInSeconds;
            double maxDuration   = maxFrameWidth * frameStepInSeconds;

            // Calculate Max Amplitude
            int binMin = (int)Math.Round(minHz / sonogram.FBinWidth);
            int binMax = (int)Math.Round(maxHz / sonogram.FBinWidth);

            int[]    dominantBins = new int[rowCount];    // predefinition of events max frequency
            double[] scores       = new double[rowCount]; // predefinition of score array
            double[,] hits = new double[rowCount, colCount];

            // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical.
            // mark the hits in hitMatrix
            for (int s = 0; s < rowCount; s++)
            {
                double[] spectrum     = MatrixTools.GetRow(spg, s);
                double   maxAmplitude = double.MinValue;
                int      maxId        = 0;

                // loop through bandwidth of L.onvex call and look for dominant frequency
                for (int binID = 5; binID < binMax; binID++)
                {
                    if (spectrum[binID] > maxAmplitude)
                    {
                        maxAmplitude = spectrum[binID];
                        maxId        = binID;
                    }
                }

                if (maxId < binMin)
                {
                    continue;
                }

                // peak should exceed thresold amplitude
                if (spectrum[maxId] < peakThresholdDb)
                {
                    continue;
                }

                scores[s]       = maxAmplitude;
                dominantBins[s] = maxId;

                // Console.WriteLine("Col {0}, Bin {1}  ", c, freqBinID);
            } // loop through all spectra

            // Find average amplitude

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(
                sonogram.Data,
                0,
                binMin,
                rowCount - 1,
                binMax);

            var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7);

            // We now have a list of potential hits for C. tinnula. This needs to be filtered.
            var startEnds = new List <Point>();

            Plot.FindStartsAndEndsOfScoreEvents(highPassFilteredSignal, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out startEnds);

            // High pass Filter

            // loop through the score array and find beginning and end of potential events
            var potentialEvents = new List <AcousticEvent>();

            foreach (Point point in startEnds)
            {
                // get average of the dominant bin
                int binSum     = 0;
                int binCount   = 0;
                int eventWidth = point.Y - point.X + 1;
                for (int s = point.X; s <= point.Y; s++)
                {
                    if (dominantBins[s] >= binMin)
                    {
                        binSum += dominantBins[s];
                        binCount++;
                    }
                }

                // find average dominant bin for the event
                int avDominantBin  = (int)Math.Round(binSum / (double)binCount);
                int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * sonogram.FBinWidth);

                // Get score for the event.
                // Use a simple template for the honk and calculate cosine similarity to the template.
                // Template has three dominant frequenices.
                // minimum number of bins covering frequency bandwidth of C. tinnula call// minimum number of bins covering frequency bandwidth of L.convex call
                int    callBinWidth = 14;
                var    templates    = GetCtinnulaTemplates(callBinWidth);
                var    eventMatrix  = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1);
                double eventScore   = GetEventScore(eventMatrix, templates);

                // put hits into hits matrix
                // put cosine score into the score array
                for (int s = point.X; s <= point.Y; s++)
                {
                    hits[s, avDominantBin] = 10;
                    prunedScores[s]        = eventScore;
                }

                if (eventScore < similarityThreshold)
                {
                    continue;
                }

                int topBinForEvent    = avDominantBin + 2;
                int bottomBinForEvent = topBinForEvent - callBinWidth;

                double startTime    = point.X * frameStepInSeconds;
                double durationTime = eventWidth * frameStepInSeconds;
                var    newEvent     = new AcousticEvent(segmentStartOffset, startTime, durationTime, minHz, maxHz);
                newEvent.DominantFreq = avDominantFreq;
                newEvent.Score        = eventScore;
                newEvent.SetTimeAndFreqScales(framesPerSec, sonogram.FBinWidth);
                newEvent.Name = string.Empty; // remove name because it hides spectral content of the event.

                potentialEvents.Add(newEvent);
            }

            // display the original score array
            scores = DataTools.normalise(scores);
            var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold);

            // DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, eventThresholdDb, out var normalisedScores, out var normalisedThreshold);
                var ampltdPlot = new Plot("Average amplitude", normalisedScores, normalisedThreshold);

                DataTools.Normalise(highPassFilteredSignal, eventThresholdDb, out normalisedScores, out normalisedThreshold);
                var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold);

                /*
                 * DataTools.Normalise(scores, eventThresholdDb, out normalisedScores, out normalisedThreshold);
                 * var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold);
                 *
                 *
                 * DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                 * var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold);
                 */
                var debugPlots = new List <Plot> {
                    ampltdPlot, demeanedPlot
                };
                Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, null);
                var   debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            // display the cosine similarity scores
            var plot  = new Plot(this.DisplayName, prunedScores, similarityThreshold);
            var plots = new List <Plot> {
                plot
            };

            // add names into the returned events
            foreach (AcousticEvent ae in potentialEvents)
            {
                ae.Name = "speciesName"; // abbreviatedSpeciesName;
            }

            return(new RecognizerResults()
            {
                Events = potentialEvents,
                Hits = hits,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
        /// <summary>
        /// New and alternative version of Lconvex recogniser because discovered that the call is more variable than I first realised.
        /// </summary>
        internal RecognizerResults Gruntwork2(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset)
        {
            // make a spectrogram
            double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1;
            int    frameStep          = 512;
            int    sampleRate         = audioRecording.SampleRate;
            double frameStepInSeconds = frameStep / (double)sampleRate;
            double framesPerSec       = 1 / frameStepInSeconds;
            var    config             = new SonogramConfig
            {
                WindowSize              = frameStep, // this default = zero overlap
                WindowOverlap           = 0.0,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex
            // get frame parameters for the analysis
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader);

            // remove the DC column
            // var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);
            // sonogram.Data = spg;

            var    spg        = sonogram.Data;
            int    rowCount   = spg.GetLength(0);
            int    colCount   = spg.GetLength(1);
            double herzPerBin = sampleRate / 2.0 / colCount;

            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // ## TWO THRESHOLDS
            // The threshold dB amplitude in the dominant freq bin required to yield an event
            double eventThresholdDb = configuration.GetDoubleOrNull("PeakThresholdDecibels") ?? 3.0;

            // minimum score for an acceptable event - that is when processing the score array.
            double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.5;

            // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512.
            int callFrameWidth = 5;
            int callHalfWidth  = callFrameWidth / 2;

            // minimum number of bins covering frequency bandwidth of L.convex call
            // call has binWidth=25 but we want zero buffer of four bins either side.
            int callBinWidth     = 25;
            int binSilenceBuffer = 4;
            int topFrequency     = configuration.GetInt("TopFrequency");

            // # The Limnodynastes call has a duration of 3-5 frames given the above settings.
            // # But we will assume 5-7 because sometimes the three harmonics are not exactly alligned!!
            // # The call has three major peaks. The top peak, typically the dominant peak, is at approx 1850, a value which is set in the convig.
            // # The second and third peak are at equal gaps below. TopFreq-gap and TopFreq-(2*gap);
            // # The gap could be set in the Config file, but this is not implemented yet.
            // Instead the algorithm uses three pre-fixed templates that determine the different kinds ogap. Gap is typically close to 500Hz
            // In the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // In the Kiyomi's JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So the strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            // To this end we produce three templates each of length 36, but having 2nd and 3rd peaks at different intervals.
            var templates      = GetLconvexTemplates(callBinWidth, binSilenceBuffer);
            int templateHeight = templates[0].Length;

            // NOTE: could give user control over other call features
            //  Such as frequency gap between peaks. But not in this first iteration of the recognizer.
            //int peakGapInHerz = (int)configuration["PeakGap"];

            int searchBand = 8;
            int topBin     = (int)Math.Round(topFrequency / herzPerBin);
            int bottomBin  = topBin - templateHeight - searchBand + 1;

            if (bottomBin < 0)
            {
                Log.Fatal("Template bandwidth exceeds availble bandwidth given your value for top frequency.");
            }

            spg = MatrixTools.Submatrix(spg, 0, bottomBin, sonogram.Data.GetLength(0) - 1, topBin);

            double[,] frames = MatrixTools.Submatrix(spg, 0, 0, callFrameWidth - 1, spg.GetLength(1) - 1);
            double[] spectrum = MatrixTools.GetColumnSums(frames);

            // set up arrays for monitoring important event parameters
            double[] decibels    = new double[rowCount];
            int[]    bottomBins  = new int[rowCount];
            double[] scores      = new double[rowCount]; // predefinition of score array
            int[]    templateIds = new int[rowCount];
            double[,] hits = new double[rowCount, colCount];

            // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical.
            for (int s = callFrameWidth; s < rowCount; s++)
            {
                double[] rowToRemove = MatrixTools.GetRow(spg, s - callFrameWidth);
                double[] rowToAdd    = MatrixTools.GetRow(spg, s);

                // shift frame block to the right.
                for (int b = 0; b < spectrum.Length; b++)
                {
                    spectrum[b] = spectrum[b] - rowToRemove[b] + rowToAdd[b];
                }

                // now check if frame block matches a template.
                ScanEventScores(spectrum, templates, out double eventScore, out int eventBottomBin, out int templateId);

                //hits[rowCount, colCount];
                decibels[s - callHalfWidth - 1]    = spectrum.Max() / callFrameWidth;
                bottomBins[s - callHalfWidth - 1]  = eventBottomBin + bottomBin;
                scores[s - callHalfWidth - 1]      = eventScore;
                templateIds[s - callHalfWidth - 1] = templateId;
            } // loop through all spectra

            // we now have a score array and decibel array and bottom bin array for the entire spectrogram.
            // smooth them to find events
            scores   = DataTools.filterMovingAverageOdd(scores, 5);
            decibels = DataTools.filterMovingAverageOdd(decibels, 3);

            var peaks = DataTools.GetPeaks(scores);

            // loop through the score array and find potential events
            var potentialEvents = new List <AcousticEvent>();

            for (int s = callHalfWidth; s < scores.Length - callHalfWidth - 1; s++)
            {
                if (!peaks[s])
                {
                    continue;
                }

                if (scores[s] < similarityThreshold)
                {
                    continue;
                }

                if (decibels[s] < eventThresholdDb)
                {
                    continue;
                }

                // put hits into hits matrix
                // put cosine score into the score array
                //for (int s = point.X; s <= point.Y; s++)
                //{
                //    hits[s, topBins[s]] = 10;
                //}

                int bottomBinForEvent  = bottomBins[s];
                int topBinForEvent     = bottomBinForEvent + templateHeight;
                int topFreqForEvent    = (int)Math.Round(topBinForEvent * herzPerBin);
                int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin);

                double startTime    = (s - callHalfWidth) * frameStepInSeconds;
                double durationTime = callFrameWidth * frameStepInSeconds;
                var    newEvent     = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent)
                {
                    //Name = string.Empty, // remove name because it hides spectral content of the event.
                    Name  = "Lc" + templateIds[s],
                    Score = scores[s],
                };
                newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin);
                potentialEvents.Add(newEvent);
            }

            // display the original score array
            scores = DataTools.normalise(scores);
            var scorePlot = new Plot(this.DisplayName + " scores", scores, similarityThreshold);

            DataTools.Normalise(decibels, eventThresholdDb, out double[] normalisedDb, out double normalisedThreshold);
            var decibelPlot = new Plot("Decibels", normalisedDb, normalisedThreshold);
            var debugPlots  = new List <Plot> {
                scorePlot, decibelPlot
            };

            if (this.displayDebugImage)
            {
                var debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits);
                var debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            // display the cosine similarity scores
            var plot  = new Plot(this.DisplayName, scores, similarityThreshold);
            var plots = new List <Plot> {
                plot
            };

            // add names into the returned events
            string speciesName = configuration[AnalysisKeys.SpeciesName] ?? this.SpeciesName;

            foreach (var ae in potentialEvents)
            {
                ae.Name        = abbreviatedSpeciesName;
                ae.SpeciesName = speciesName;
            }

            return(new RecognizerResults()
            {
                Events = potentialEvents,
                Hits = hits,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
        internal RecognizerResults Gruntwork1(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset)
        {
            // make a spectrogram
            double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1;
            var    config = new SonogramConfig
            {
                WindowSize              = 512,
                WindowOverlap           = 0.0,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex
            // get frame parameters for the analysis
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader);

            // remove the DC column
            var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);

            sonogram.Data = spg;
            int sampleRate = audioRecording.SampleRate;
            int rowCount   = spg.GetLength(0);
            int colCount   = spg.GetLength(1);

            //double epsilon = Math.Pow(0.5, audioRecording.BitsPerSample - 1);
            int frameSize = colCount * 2;
            int frameStep = frameSize; // this default = zero overlap

            //double frameDurationInSeconds = frameSize / (double)sampleRate;
            double frameStepInSeconds = frameStep / (double)sampleRate;
            double framesPerSec       = 1 / frameStepInSeconds;
            double herzPerBin         = sampleRate / 2.0 / colCount;

            //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // ## THREE THRESHOLDS ---- only one of these is given to user.
            // minimum dB to register a dominant freq peak. After noise removal
            double peakThresholdDb = 3.0;

            // The threshold dB amplitude in the dominant freq bin required to yield an event
            double eventThresholdDb = 10.0;

            // minimum score for an acceptable event - that is when processing the score array.
            double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2;

            // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512.
            int minFrameWidth = 3;
            int maxFrameWidth = 5;

            //double minDuration = (minFrameWidth - 1) * frameStepInSeconds;
            //double maxDuration = maxFrameWidth * frameStepInSeconds;

            // minimum number of bins covering frequency bandwidth of L.convex call
            int callBinWidth     = 25;
            int silenceBinBuffer = 4;

            // # The Limnodynastes call has a duration of 3-5 frames given the above settings.
            // # The call has three major peaks. The dominant peak is at approx 1850, a value which is set in the convig.
            // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap);
            // # Set the gap in the Config file. Should typically be in range 880 to 970
            // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // for Limnodynastes convex, in the Kiyomi's JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So the strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            //  starting with highest power (the top peak) and working down to lowest power (bottom peak).
            // To this end we produce two templates each of length 25, but having 2nd and 3rd peaks at different intervals.
            var templates = GetLconvexTemplates(callBinWidth, silenceBinBuffer);

            int dominantFrequency = (int)configuration.GetIntOrNull("DominantFrequency");

            // NOTE: could give user control over other call features
            //  Such as frequency gap between peaks. But not in this first iteration of the recognizer.
            //int peakGapInHerz = (int)configuration["PeakGap"];
            //int minHz = (int)configuration[AnalysisKeys.MinHz];
            //int F1AndF2BinGap = (int)Math.Round(peakGapInHerz / herzPerBin);
            //int F1AndF3BinGap = 2 * F1AndF2BinGap;

            int hzBuffer       = 250;
            int dominantBin    = (int)Math.Round(dominantFrequency / herzPerBin);
            int binBuffer      = (int)Math.Round(hzBuffer / herzPerBin);
            int dominantBinMin = dominantBin - binBuffer;
            int dominantBinMax = dominantBin + binBuffer;

            //int bandwidth = dominantBinMax - dominantBinMin + 1;

            int[]    dominantBins = new int[rowCount];    // predefinition of events max frequency
            double[] scores       = new double[rowCount]; // predefinition of score array
            double[,] hits = new double[rowCount, colCount];

            // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical.
            // mark the hits in hitMatrix
            for (int s = 0; s < rowCount; s++)
            {
                double[] spectrum     = MatrixTools.GetRow(spg, s);
                double   maxAmplitude = -double.MaxValue;
                int      maxId        = 0;

                // loop through bandwidth of L.onvex call and look for dominant frequency
                for (int binId = 5; binId < dominantBinMax; binId++)
                {
                    if (spectrum[binId] > maxAmplitude)
                    {
                        maxAmplitude = spectrum[binId];
                        maxId        = binId;
                    }
                }

                if (maxId < dominantBinMin)
                {
                    continue;
                }

                // peak should exceed thresold amplitude
                if (spectrum[maxId] < peakThresholdDb)
                {
                    continue;
                }

                scores[s]       = maxAmplitude;
                dominantBins[s] = maxId;

                // Console.WriteLine("Col {0}, Bin {1}  ", c, freqBinID);
            } // loop through all spectra

            // We now have a list of potential hits for LimCon. This needs to be filtered.
            Plot.FindStartsAndEndsOfScoreEvents(scores, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out var startEnds);

            // loop through the score array and find beginning and end of potential events
            var potentialEvents = new List <AcousticEvent>();

            foreach (Point point in startEnds)
            {
                // get average of the dominant bin
                int binSum     = 0;
                int binCount   = 0;
                int eventWidth = point.Y - point.X + 1;
                for (int s = point.X; s <= point.Y; s++)
                {
                    if (dominantBins[s] >= dominantBinMin)
                    {
                        binSum += dominantBins[s];
                        binCount++;
                    }
                }

                // find average dominant bin for the event
                int avDominantBin  = (int)Math.Round(binSum / (double)binCount);
                int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * herzPerBin);

                // Get score for the event.
                // Use a simple template for the honk and calculate cosine similarity to the template.
                // Template has three dominant frequenices.
                var      eventMatrix   = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1);
                double[] eventAsVector = MatrixTools.SumColumns(eventMatrix);
                GetEventScore(eventAsVector, templates, out double eventScore, out int templateId);

                // put hits into hits matrix
                // put cosine score into the score array
                for (int s = point.X; s <= point.Y; s++)
                {
                    hits[s, avDominantBin] = 10;
                    prunedScores[s]        = eventScore;
                }

                if (eventScore < similarityThreshold)
                {
                    continue;
                }

                int topBinForEvent     = avDominantBin + 2;
                int bottomBinForEvent  = topBinForEvent - callBinWidth;
                int topFreqForEvent    = (int)Math.Round(topBinForEvent * herzPerBin);
                int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin);

                double startTime    = point.X * frameStepInSeconds;
                double durationTime = eventWidth * frameStepInSeconds;
                var    newEvent     = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent)
                {
                    //Name = string.Empty, // remove name because it hides spectral content of the event.
                    Name         = "L.c" + templateId,
                    DominantFreq = avDominantFreq,
                    Score        = eventScore,
                };
                newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin);
                potentialEvents.Add(newEvent);
            }

            // display the original score array
            scores = DataTools.normalise(scores);
            var debugPlot  = new Plot(this.DisplayName, scores, similarityThreshold);
            var debugPlots = new List <Plot> {
                debugPlot
            };

            if (this.displayDebugImage)
            {
                Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits);
                var   debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            // display the cosine similarity scores
            var plot  = new Plot(this.DisplayName, prunedScores, similarityThreshold);
            var plots = new List <Plot> {
                plot
            };

            // add names into the returned events
            string speciesName = configuration[AnalysisKeys.SpeciesName] ?? this.SpeciesName;

            foreach (var ae in potentialEvents)
            {
                ae.Name        = abbreviatedSpeciesName;
                ae.SpeciesName = speciesName;
            }

            return(new RecognizerResults()
            {
                Events = potentialEvents,
                Hits = hits,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaCaeruleaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // BETTER TO SET THESE. IGNORE USER!
            // This framesize is large because the oscillation we wish to detect is due to repeated croaks
            // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation.
            const int    frameSize     = 2048;
            const double windowOverlap = 0.5;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap));

            //int dominantFreqBin = (int)Math.Round(recognizerConfig.DominantFreq / freqBinWidth) + 1;
            int minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var decibelThreshold = 9.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            // get the freq band as set by min and max Herz
            var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz.
            // Skip over spectra where maximum is not in correct location.
            int buffer            = 150;
            var croakScoreArray   = new double[rowCount];
            var hzAtTopOfTopBand  = recognizerConfig.DominantFreq + buffer;
            var hzAtBotOfTopBand  = recognizerConfig.DominantFreq - buffer;
            var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth);
            var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth);

            // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband.
            for (int x = 0; x < rowCount; x++)
            {
                //extract spectrum
                var spectrum = MatrixTools.GetRow(frogBand, x);
                int maxIndex = DataTools.GetMaxIndex(spectrum);
                if (spectrum[maxIndex] < decibelThreshold)
                {
                    continue;
                }

                if (maxIndex < binAtTopOfTopBand && maxIndex > binAtBotOfTopBand)
                {
                    croakScoreArray[x] = spectrum[maxIndex];
                }
            }

            // Perpare a normalised plot for later display with spectrogram
            double[] normalisedScores;
            double   normalisedThreshold;

            DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text1      = string.Format($"Croak scores (threshold={decibelThreshold})");
            var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold);

            // extract potential croak events from the array of croak candidate
            var croakEvents = AcousticEvent.ConvertScoreArray2Events(
                croakScoreArray,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinCroakDuration,
                recognizerConfig.MaxCroakDuration,
                segmentStartOffset);

            // add necesary info into the candidate events
            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in croakEvents)
            {
                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // With those events that survive the above Array2Events process, we now extract a new array croak scores
            croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName);
            DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text2      = string.Format($"Croak events (threshold={decibelThreshold})");
            var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold);

            // Look for oscillations in the difference array
            // duration of DCT in seconds
            //croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5);
            double dctDuration = recognizerConfig.DctDuration;

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = recognizerConfig.DctThreshold;
            double minOscRate   = 1 / recognizerConfig.MaxPeriod;
            double maxOscRate   = 1 / recognizerConfig.MinPeriod;
            var    dctScores    = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            var events = AcousticEvent.ConvertScoreArray2Events(
                dctScores,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;
            prunedEvents   = new List <AcousticEvent>();
            foreach (var ae in events)
            {
                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            if (MainEntry.InDEBUG)
            {
                //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath));
                //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath));
            }

            var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold);

            if (true)
            {
                // display a variety of debug score arrays
                // calculate amplitude at location
                double[] amplitudeArray = MatrixTools.SumRows(frogBand);
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scoresPlot, croakPlot2, croakPlot1, amplPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = scoresPlot.AsList(),
                Events = prunedEvents,

                //Events = events
            });
        }
Ejemplo n.º 24
0
        } //Execute()

        public static Output GetInstanceRepresentations(Arguments arguments)
        {
            LoggedConsole.WriteLine("1. Read in all Instances and do feature extraction");

            //################################### FEATURE WEIGHTS
            //TRY DIFFERENT WEIGHTINGS assuming following "SPT,RHZ,RVT,RPS,RNG";
            bool doDeltaFeatures = false;

            double[] weights      = { 1.0, 1.0, 0.8, 0.7, 0.7 };
            double[] deltaWeights = { 1.0, 1.0, 0.8, 0.7, 0.7, 0.5, 0.4, 0.4, 0.2, 0.2 };
            if (doDeltaFeatures)
            {
                weights = deltaWeights;
            }

            //MAX-POOLING for SPECTRAL REDUCTION
            // frequency bins used to reduce dimensionality of the 256 spectral values.
            int startBin  = 8;
            int maxOf2Bin = 117;
            int maxOf3Bin = 160;
            int endBin    = 200;

            double[] testArray = new double[256];
            for (int i = 0; i < testArray.Length; i++)
            {
                testArray[i] = i;
            }

            double[] reducedArray          = MaxPoolingLimited(testArray, startBin, maxOf2Bin, maxOf3Bin, endBin);
            int      reducedSpectralLength = reducedArray.Length;

            LoggedConsole.WriteLine("     Reduced spectral length = " + reducedSpectralLength);
            int instanceCount = arguments.InstanceCount;
            int speciesCount  = arguments.SpeciesCount;

            // READ IN THE SPECIES LABELS FILE AND SET UP THE DATA
            string[] fileID    = new string[instanceCount];
            int[]    speciesID = new int[speciesCount];
            ReadGlotinsSpeciesLabelFile(arguments.SpeciesLabelsFile, instanceCount, out fileID, out speciesID);

            // INIT array of species counts
            int[] instanceNumbersPerSpecies = new int[speciesCount];

            // INIT array of frame counts
            int[] frameNumbersPerInstance = new int[instanceCount];

            // initialise species description matrix
            var keyArray = FEATURE_KEYS.Split(',');

            int totalFeatureCount = keyArray.Length * reducedArray.Length;

            Console.WriteLine("    Total Feature Count = " + totalFeatureCount);

            if (doDeltaFeatures)
            {
                totalFeatureCount *= 2;
                LoggedConsole.WriteLine("    Total Delta Feature Count = " + totalFeatureCount);
            }

            // one matrix row per species
            double[,] instanceFeatureMatrix = new double[instanceCount, totalFeatureCount];

            // loop through all all instances
            for (int j = 0; j < instanceCount; j++)
            {
                LoggedConsole.Write(".");
                int frameCount = 0;

                // get the spectral index files
                int speciesLabel = speciesID[j];

                // dictionary to store feature spectra for instance.
                var aggreDictionary = new Dictionary <string, double[]>();

                // dictionary to store delta spectra for instance.
                var deltaDictionary = new Dictionary <string, double[]>();

                foreach (string key in keyArray)
                {
                    string   name = string.Format("{0}_Species{1:d2}.{2}.csv", fileID[j], speciesLabel, key);
                    FileInfo file = new FileInfo(Path.Combine(arguments.InputDataDirectory.FullName, name));

                    if (file.Exists)
                    {
                        int binCount;
                        double[,] matrix = IndexMatrices.ReadSpectrogram(file, out binCount);

                        // create or get the array of spectral values.
                        double[] aggregateArray = new double[reducedSpectralLength];
                        double[] deltaArray     = new double[reducedSpectralLength];

                        double[] ipVector = MatrixTools.GetRow(matrix, 0);
                        ipVector     = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold);
                        reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin);
                        double[] previousArray = reducedArray;

                        // transfer spectral values to array.
                        int rowCount = matrix.GetLength(0);

                        //rowCount = (int)Math.Round(rowCount * 0.99); // ###################### USE ONLY 99% of instance
                        //if (rowCount > 1200) rowCount = 1200;
                        for (int r = 1; r < rowCount; r++)
                        {
                            ipVector     = MatrixTools.GetRow(matrix, r);
                            ipVector     = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold);
                            reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin);

                            for (int c = 0; c < reducedSpectralLength; c++)
                            {
                                aggregateArray[c] += reducedArray[c];

                                // Calculate the DELTA values TWO OPTIONS ##################################################
                                double delta = Math.Abs(reducedArray[c] - previousArray[c]);

                                //double delta = reducedArray[c] - previousArray[c];
                                //if (delta < 0.0)  delta = 0.0;
                                //double delta = previousArray[c]; //previous array - i.e. do not calculate delta
                                deltaArray[c] += delta;
                            }

                            previousArray = reducedArray;
                        }

                        aggreDictionary[key] = aggregateArray;
                        deltaDictionary[key] = deltaArray;
                        frameCount           = rowCount;
                    } //if (file.Exists)
                }     //foreach (string key in keyArray)

                instanceNumbersPerSpecies[speciesLabel - 1]++;
                frameNumbersPerInstance[j] += frameCount;

                // create the matrix of instance descriptions which consists of concatenated vectors
                // j = index of instance ID = row number
                int featureID = 0;
                foreach (string key in keyArray)
                {
                    int featureOffset = featureID * reducedSpectralLength;
                    for (int c = 0; c < reducedSpectralLength; c++)
                    {
                        // TWO OPTIONS: SUM OR AVERAGE ######################################
                        //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c];
                        instanceFeatureMatrix[j, featureOffset + c] = aggreDictionary[key][c] / frameCount;
                    }

                    featureID++;
                }

                if (doDeltaFeatures)
                {
                    foreach (string key in keyArray)
                    {
                        int featureOffset = featureID * reducedSpectralLength;
                        for (int c = 0; c < reducedSpectralLength; c++)
                        {
                            // TWO OPTIONS: SUM OR AVERAGE ######################################
                            //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c];
                            instanceFeatureMatrix[j, featureOffset + c] = deltaDictionary[key][c] / frameCount;
                        }

                        featureID++;
                    }
                } // if doDeltaFeatures
            }     // end for loop j over all instances

            LoggedConsole.WriteLine("Done!");

            LoggedConsole.WriteLine("\nSum of species number array = " + instanceNumbersPerSpecies.Sum());
            LoggedConsole.WriteLine("Sum of  frame  number array = " + frameNumbersPerInstance.Sum());
            bool   addLineNumbers            = true;
            string countsArrayOutputFilePath = Path.Combine(arguments.OutputDirectory.FullName, "BirdClef50_training_Counts.txt");

            FileTools.WriteArray2File(instanceNumbersPerSpecies, addLineNumbers, countsArrayOutputFilePath);

            // Initialise output data arrays
            Output output = new Output();

            output.FileID    = fileID;
            output.SpeciesID = speciesID;
            output.InstanceNumbersPerSpecies = instanceNumbersPerSpecies;
            output.ReducedSpectralLength     = reducedSpectralLength;

            // INIT array of frame counts
            output.FrameNumbersPerInstance = frameNumbersPerInstance;

            // matrix: each row= one instance;  each column = one feature
            output.InstanceFeatureMatrix = instanceFeatureMatrix;

            output.Weights = weights;

            return(output);
        } // GetInstanceRepresentations()
        /// <summary>
        /// This method assumes that the ribbon spectrograms are composed using the following five indices for RGB
        /// string[] colourKeys1 = { "ACI", "ENT", "EVN" };.
        /// string[] colourKeys2 = { "BGN", "PMN", "EVN" };.
        /// </summary>
        public static double[,] ReadSpectralIndicesFromTwoFalseColourSpectrogramRibbons(Image image1, Image image2, TimeSpan startTime, TimeSpan duration)
        {
            //get start and end minutes
            int startMinute = (int)startTime.TotalMinutes;
            int minuteSpan  = (int)duration.TotalMinutes;
            int endMinute   = startMinute + minuteSpan;

            // get index matrices from the two images
            var matrixList1 = ReadSpectralIndicesFromFalseColourSpectrogram((Image <Rgb24>)image1, startMinute, endMinute);
            var matrixList2 = ReadSpectralIndicesFromFalseColourSpectrogram((Image <Rgb24>)image2, startMinute, endMinute);

            //set up the return Matrix containing 1440 rows and 5 x 32 indices
            var rowCount   = matrixList1[0].GetLength((0));
            var colCount   = matrixList1[0].GetLength((1));
            var indexCount = colCount * 5; // 5 because will incorporate 5 indices
            var matrix     = new double[rowCount, indexCount];

            // copy indices into return matrix
            for (int r = 0; r < rowCount; r++)
            {
                // copy in ACI row
                var row = MatrixTools.GetRow(matrixList1[0], r);
                for (int c = 0; c < colCount; c++)
                {
                    matrix[r, c] = row[c];
                }

                // copy in ENT row
                row = MatrixTools.GetRow(matrixList1[1], r);
                for (int c = 0; c < colCount; c++)
                {
                    int startColumn = colCount;
                    matrix[r, startColumn + c] = row[c];
                }

                // copy in EVN row
                row = MatrixTools.GetRow(matrixList1[2], r);
                for (int c = 0; c < colCount; c++)
                {
                    int startColumn = colCount * 2;
                    matrix[r, startColumn + c] = row[c];
                }

                // copy in BGN row
                row = MatrixTools.GetRow(matrixList2[0], r);
                for (int c = 0; c < colCount; c++)
                {
                    int startColumn = colCount * 3;
                    matrix[r, startColumn + c] = row[c];
                }

                // copy in PMN row
                row = MatrixTools.GetRow(matrixList2[1], r);
                for (int c = 0; c < colCount; c++)
                {
                    int startColumn = colCount * 4;
                    matrix[r, startColumn + c] = row[c];
                }
            }

            return(matrix);
        }
        internal RecognizerResults Algorithm1(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset)
        {
            double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1;

            // make a spectrogram
            var config = new SonogramConfig
            {
                WindowSize              = 256,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
                WindowOverlap           = 0.0,
            };

            // now construct the standard decibel spectrogram WITH noise removal
            // get frame parameters for the analysis
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader);

            // remove the DC column
            var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);

            sonogram.Data = spg;
            int sampleRate = audioRecording.SampleRate;
            int rowCount   = spg.GetLength(0);
            int colCount   = spg.GetLength(1);

            // double epsilon = Math.Pow(0.5, audioRecording.BitsPerSample - 1);
            int frameSize = colCount * 2;
            int frameStep = frameSize; // this default = zero overlap

            // double frameDurationInSeconds = frameSize / (double)sampleRate;
            double frameStepInSeconds = frameStep / (double)sampleRate;
            double framesPerSec       = 1 / frameStepInSeconds;
            double herzPerBin         = sampleRate / 2 / (double)colCount;

            // string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            // string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // ## THREE THRESHOLDS ---- only one of these is given to user.
            // minimum dB to register a dominant freq peak. After noise removal
            double peakThresholdDb = 3.0;

            // The threshold dB amplitude in the dominant freq bin required to yield an event
            double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0;

            // minimum score for an acceptable event - that is when processing the score array.
            double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2;

            // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512.
            int minFrameWidth = 2;
            int maxFrameWidth = 5;  // this is larger than actual to accomodate an echo.

            // double minDuration = (minFrameWidth - 1) * frameStepInSeconds;
            // double maxDuration = maxFrameWidth * frameStepInSeconds;

            // minimum number of bins covering frequency bandwidth of call
            int callBinWidth = 19;

            // # The PlatyplectrumOrnatum call has a duration of 3-5 frames given the above settings.
            // To this end we produce two templates.
            var templates = GetTemplatesForAlgorithm1(callBinWidth);

            int dominantFrequency = configuration.GetInt("DominantFrequency");

            // NOTE: could give user control over other call features
            //  Such as frequency gap between peaks. But not in this first iteration of the recognizer.
            //int peakGapInHerz = (int)configuration["PeakGap"];
            //int minHz = (int)configuration[AnalysisKeys.MinHz];
            //int F1AndF2BinGap = (int)Math.Round(peakGapInHerz / herzPerBin);
            //int F1AndF3BinGap = 2 * F1AndF2BinGap;

            int hzBuffer       = 100;
            int dominantBin    = (int)Math.Round(dominantFrequency / herzPerBin);
            int binBuffer      = (int)Math.Round(hzBuffer / herzPerBin);
            int dominantBinMin = dominantBin - binBuffer;
            int dominantBinMax = dominantBin + binBuffer;

            // int bandwidth = dominantBinMax - dominantBinMin + 1;

            int[]    dominantBins    = new int[rowCount];    // predefinition of events max frequency
            double[] amplitudeScores = new double[rowCount]; // predefinition of amplitude score array
            double[,] hits = new double[rowCount, colCount];

            // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical.
            // mark the hits in hitMatrix
            for (int s = 0; s < rowCount; s++)
            {
                double[] spectrum     = MatrixTools.GetRow(spg, s);
                double   maxAmplitude = -double.MaxValue;
                int      maxId        = 0;

                // loop through bandwidth of call and look for dominant frequency
                for (int binId = 5; binId < dominantBinMax; binId++)
                {
                    if (spectrum[binId] > maxAmplitude)
                    {
                        maxAmplitude = spectrum[binId];
                        maxId        = binId;
                    }
                }

                if (maxId < dominantBinMin)
                {
                    continue;
                }

                // peak should exceed thresold amplitude
                if (spectrum[maxId] < peakThresholdDb)
                {
                    continue;
                }

                amplitudeScores[s] = maxAmplitude;
                dominantBins[s]    = maxId;

                // Console.WriteLine("Col {0}, Bin {1}  ", c, freqBinID);
            } // loop through all spectra

            // We now have a list of potential hits. This needs to be filtered.
            Plot.FindStartsAndEndsOfScoreEvents(amplitudeScores, eventDecibelThreshold, minFrameWidth, maxFrameWidth, out var prunedScores, out var startEnds);

            // loop through the score array and find beginning and end of potential events
            var potentialEvents = new List <AcousticEvent>();

            foreach (Point point in startEnds)
            {
                // get average of the dominant bin
                int binSum     = 0;
                int binCount   = 0;
                int eventWidth = point.Y - point.X + 1;
                for (int s = point.X; s <= point.Y; s++)
                {
                    if (dominantBins[s] >= dominantBinMin)
                    {
                        binSum += dominantBins[s];
                        binCount++;
                    }
                }

                // find average dominant bin for the event
                int avDominantBin  = (int)Math.Round(binSum / (double)binCount);
                int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * herzPerBin);

                // Get score for the event.
                // Use a simple template for the honk and calculate cosine similarity to the template.
                // Template has three dominant frequenices.
                var    eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1);
                double eventScore  = GetEventScore(eventMatrix, templates);

                // put hits into hits matrix
                // put cosine score into the score array
                for (int s = point.X; s <= point.Y; s++)
                {
                    hits[s, avDominantBin] = 10;
                    prunedScores[s]        = eventScore;
                }

                if (eventScore < eventSimilarityThreshold)
                {
                    continue;
                }

                int topBinForEvent     = avDominantBin + 2;
                int bottomBinForEvent  = topBinForEvent - callBinWidth;
                int topFreqForEvent    = (int)Math.Round(topBinForEvent * herzPerBin);
                int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin);

                double startTime    = point.X * frameStepInSeconds;
                double durationTime = eventWidth * frameStepInSeconds;
                var    newEvent     = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent)
                {
                    DominantFreq = avDominantFreq,
                    Score        = eventScore,

                    // remove name because it hides spectral content in display of the event.
                    Name = string.Empty,
                };
                newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin);

                potentialEvents.Add(newEvent);
            }

            // calculate the cosine similarity scores
            var plot  = new Plot(this.DisplayName, prunedScores, eventSimilarityThreshold);
            var plots = new List <Plot> {
                plot
            };

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display the original decibel score array
                DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var debugPlot  = new Plot(this.DisplayName, normalisedScores, normalisedThreshold);
                var debugPlots = new List <Plot> {
                    debugPlot, plot
                };
                var debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits);
                var debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(
                                                             Path.GetFileNameWithoutExtension(audioRecording.BaseName),
                                                             this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            // add names into the returned events
            foreach (var ae in potentialEvents)
            {
                ae.Name = "P.o"; // abbreviatedSpeciesName;
            }

            return(new RecognizerResults()
            {
                Events = potentialEvents,
                Hits = hits,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
        public void TestFreqScaleOnArtificialSignal2()
        {
            int    sampleRate = 64000;
            double duration   = 30; // signal duration in seconds

            int[] harmonics       = { 500, 1000, 2000, 4000, 8000 };
            var   freqScale       = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000);
            var   outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png");
            var   recording       = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine);

            // init the default sonogram config
            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.WindowSize,
                WindowOverlap           = 0.2,
                SourceFName             = "Signal2",
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale);

            // pick a row, any row
            var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40);

            oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5);
            var peaks = DataTools.GetPeaks(oneSpectrum);

            var peakIds = new List <int>();

            for (int i = 5; i < peaks.Length - 5; i++)
            {
                if (peaks[i])
                {
                    int peakId = freqScale.BinBounds[i, 0];
                    peakIds.Add(peakId);
                    LoggedConsole.WriteLine($"Spectral peak located in bin {peakId},  Herz={freqScale.BinBounds[i, 1]}");
                }
            }

            foreach (int h in harmonics)
            {
                LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}");
            }

            Assert.AreEqual(5, peakIds.Count);
            Assert.AreEqual(129, peakIds[0]);
            Assert.AreEqual(257, peakIds[1]);
            Assert.AreEqual(513, peakIds[2]);
            Assert.AreEqual(1025, peakIds[3]);
            Assert.AreEqual(2049, peakIds[4]);

            var    image = sonogram.GetImage();
            string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)}   SR={sampleRate}  Window={freqScale.WindowSize}";

            image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations);
            image.Save(outputImagePath);

            // Check that image dimensions are correct
            Assert.AreEqual(146, image.Width);
            Assert.AreEqual(310, image.Height);
        }
        } //DetectBarsInTheRowsOfaMatrix()

        /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Was first developed for crow calls.
        /// First looks for a decibel profile that matches the passed call duration and decibel loudness
        /// Then samples the centre portion for the correct harmonic period.
        /// </summary>
        /// <param name="m"></param>
        /// <param name="amplitudeThreshold"></param>
        /// <returns></returns>
        public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan)
        {
            int zeroBinCount = 3;  //to remove low freq content which dominates the spectrum
            int halfspan     = callSpan / 2;

            double[] dBArray = MatrixTools.GetRowAverages(m);
            dBArray = DataTools.filterMovingAverage(dBArray, 3);

            bool doNoiseRemoval = true;

            if (doNoiseRemoval)
            {
                double StandardDeviationCount = 0.1;  // number of noise SDs to calculate noise threshold - determines severity of noise reduction
                SNR.BackgroundNoise bgn       = SNR.SubtractBackgroundNoiseFromSignal(dBArray, StandardDeviationCount);
                dBArray = bgn.NoiseReducedSignal;
            }

            bool[] peaks = DataTools.GetPeaks(dBArray);

            int rowCount    = m.GetLength(0);
            int colCount    = m.GetLength(1);
            var intensity   = new double[rowCount];    //an array of period intensity
            var periodicity = new double[rowCount];    //an array of the periodicity values

            for (int r = halfspan; r < rowCount - halfspan; r++)
            {
                //APPLY A FILTER: must satisfy the following conditions for a call.
                if (!peaks[r])
                {
                    continue;
                }

                if (dBArray[r] < dBThreshold)
                {
                    continue;
                }

                double lowerDiff = dBArray[r] - dBArray[r - halfspan];
                double upperDiff = dBArray[r] - dBArray[r + halfspan];
                if (lowerDiff < dBThreshold || upperDiff < dBThreshold)
                {
                    continue;
                }

                double[] prevRow  = DataTools.DiffFromMean(MatrixTools.GetRow(m, r - 1));
                double[] thisRow  = DataTools.DiffFromMean(MatrixTools.GetRow(m, r));
                var      spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow);

                for (int s = 0; s < zeroBinCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId          = DataTools.GetMaxIndex(spectrum);
                double intensityValue = spectrum[maxId];
                intensity[r] = intensityValue;

                double period = 0.0;
                if (maxId != 0)
                {
                    period = 2 * colCount / (double)maxId;
                }

                periodicity[r] = period;

                prevRow = thisRow;
            } // rows

            return(Tuple.Create(dBArray, intensity, periodicity));
        } //DetectHarmonicsInSonogramMatrix()
Ejemplo n.º 29
0
        /// <summary>
        /// The CORE ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values -
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap              = 0.0;
            int    minHz                      = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap              = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap              = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double decibelThreshold           = double.Parse(configDict["DECIBEL_THRESHOLD"]);   //dB
            double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double callDuration               = double.Parse(configDict["CALL_DURATION"]);       // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            }; //default values config

            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            int callSpan = (int)Math.Round(callDuration * framesPerSecond);

            //#############################################################################################################################################
            //ii: DETECT HARMONICS
            var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan);

            double[] dBArray     = results.Item1;
            double[] intensity   = results.Item2;   //an array of periodicity scores
            double[] periodicity = results.Item3;

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < harmonicIntensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                //expect humans to have max < 1000 Hz
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < noiseBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                int    maxIndex         = DataTools.GetMaxIndex(spectrum);
                int    freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower < 1200)
                {
                    discount = 0.0;
                }

                if (intensity[r] > harmonicIntensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxPossibleScore = 0.5;
            int    halfCallSpan     = callSpan / 2;
            var    predictedEvents  = new List <AcousticEvent>();

            for (int i = 0; i < rowCount; i++)
            {
                //assume one score position per crow call
                if (scoreArray[i] < 0.001)
                {
                    continue;
                }

                double        startTime = (i - halfCallSpan) / framesPerSecond;
                AcousticEvent ev        = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz);
                ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth);
                ev.Score           = scoreArray[i];
                ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold

                //ev.Score_MaxPossible = maxPossibleScore;
                predictedEvents.Add(ev);
            } //for loop

            Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()
        /// <summary>
        /// Algorithm2:
        /// 1: Loop through spgm and find dominant freq bin and its amplitude in each frame
        /// 2: If frame passes amplitude test, then calculate a similarity cosine score for that frame. Simlarity score is wrt a template matrix.
        /// 3: If similarity score exceeds threshold, then assign event score based on the amplitude.
        /// </summary>
        internal RecognizerResults Algorithm2(AudioRecording recording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset)
        {
            double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1;

            // make a spectrogram
            var config = new SonogramConfig
            {
                WindowSize              = 256,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
                WindowOverlap           = 0.0,
            };

            // now construct the standard decibel spectrogram WITH noise removal
            // get frame parameters for the analysis
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, recording.WavReader);

            // remove the DC column
            var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);

            sonogram.Data = spg;
            int sampleRate = recording.SampleRate;
            int rowCount   = spg.GetLength(0);
            int colCount   = spg.GetLength(1);

            //double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1);
            int frameSize = colCount * 2;
            int frameStep = frameSize; // this default = zero overlap

            //double frameDurationInSeconds = frameSize / (double)sampleRate;
            double frameStepInSeconds = frameStep / (double)sampleRate;
            double framesPerSec       = 1 / frameStepInSeconds;
            double herzPerBin         = sampleRate / 2.0 / colCount;

            //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // ## THREE THRESHOLDS ---- only one of these is given to user.
            // minimum dB to register a dominant freq peak. After noise removal
            double peakThresholdDb = 3.0;

            // The threshold dB amplitude in the dominant freq bin required to yield an event
            double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0;

            // minimum score for an acceptable event - that is when processing the score array.
            double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2;

            // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512.
            //int minFrameWidth = 2;
            //int maxFrameWidth = 5;  // this is larger than actual to accomodate an echo.
            //double minDuration = (minFrameWidth - 1) * frameStepInSeconds;
            //double maxDuration = maxFrameWidth * frameStepInSeconds;

            // minimum number of frames and bins covering the call
            // The PlatyplectrumOrnatum call has a duration of 3-5 frames GIVEN THE ABOVE SAMPLING and WINDOW SETTINGS!

            // Get the call templates and their dimensions
            var templates = GetTemplatesForAlgorithm2(out var callFrameDuration, out var callBinWidth);

            int dominantFrequency = configuration.GetInt("DominantFrequency");

            const int hzBuffer       = 100;
            int       dominantBin    = (int)Math.Round(dominantFrequency / herzPerBin);
            int       binBuffer      = (int)Math.Round(hzBuffer / herzPerBin);
            int       dominantBinMin = dominantBin - binBuffer;
            int       dominantBinMax = dominantBin + binBuffer;
            int       bottomBin      = 1;
            int       topBin         = bottomBin + callBinWidth - 1;

            int[]    dominantBins     = new int[rowCount];    // predefinition of events max frequency
            double[] similarityScores = new double[rowCount]; // predefinition of score array
            double[] amplitudeScores  = new double[rowCount];
            double[,] hits = new double[rowCount, colCount];

            // loop through all spectra/rows of the spectrogram
            // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins mark the hits in hitMatrix
            for (int s = 1; s < rowCount - callFrameDuration; s++)
            {
                double[] spectrum     = MatrixTools.GetRow(spg, s);
                double   maxAmplitude = -double.MaxValue;
                int      maxId        = 0;

                // loop through bandwidth of call and look for dominant frequency
                for (int binId = 8; binId <= dominantBinMax; binId++)
                {
                    if (spectrum[binId] > maxAmplitude)
                    {
                        maxAmplitude = spectrum[binId];
                        maxId        = binId;
                    }
                }

                if (maxId < dominantBinMin)
                {
                    continue;
                }

                // peak should exceed thresold amplitude
                if (spectrum[maxId] < peakThresholdDb)
                {
                    continue;
                }

                //now calculate similarity with template
                var    locality      = MatrixTools.Submatrix(spg, s - 1, bottomBin, s + callFrameDuration - 2, topBin); // s-1 because first row of template is zeros.
                int    localMaxBin   = maxId - bottomBin;
                double callAmplitude = (locality[1, localMaxBin] + locality[2, localMaxBin] + locality[3, localMaxBin]) / 3.0;

                // use the following lines to write out call templates for use as recognizer
                //double[] columnSums = MatrixTools.SumColumns(locality);
                //if (columnSums[maxId - bottomBin] < 80) continue;
                //FileTools.WriteMatrix2File(locality, "E:\\SensorNetworks\\Output\\Frogs\\TestOfRecognizers-2016October\\Towsey.PlatyplectrumOrnatum\\Locality_S"+s+".csv");

                double score = DataTools.CosineSimilarity(locality, templates[0]);
                if (score > eventSimilarityThreshold)
                {
                    similarityScores[s] = score;
                    dominantBins[s]     = maxId;
                    amplitudeScores[s]  = callAmplitude;
                }
            } // loop through all spectra

            // loop through all spectra/rows of the spectrogram for a second time
            // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins
            // We now have a list of potential hits. This needs to be filtered. Mark the hits in hitMatrix
            var events = new List <AcousticEvent>();

            for (int s = 1; s < rowCount - callFrameDuration; s++)
            {
                // find peaks in the array of similarity scores. First step, only look for peaks
                if (similarityScores[s] < similarityScores[s - 1] || similarityScores[s] < similarityScores[s + 1])
                {
                    continue;
                }

                // require three consecutive similarity scores to be above the threshold
                if (similarityScores[s + 1] < eventSimilarityThreshold || similarityScores[s + 2] < eventSimilarityThreshold)
                {
                    continue;
                }

                // now check the amplitude
                if (amplitudeScores[s] < eventDecibelThreshold)
                {
                    continue;
                }

                // have an event
                // find average dominant bin for the event
                int avDominantBin      = (dominantBins[s] + dominantBins[s] + dominantBins[s]) / 3;
                int avDominantFreq     = (int)Math.Round(avDominantBin * herzPerBin);
                int topBinForEvent     = avDominantBin + 3;
                int bottomBinForEvent  = topBinForEvent - callBinWidth;
                int topFreqForEvent    = (int)Math.Round(topBinForEvent * herzPerBin);
                int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin);

                hits[s, avDominantBin] = 10;

                double startTime    = s * frameStepInSeconds;
                double durationTime = 4 * frameStepInSeconds;
                var    newEvent     = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent)
                {
                    DominantFreq = avDominantFreq,
                    Score        = amplitudeScores[s],

                    // remove name because it hides spectral content in display of the event.
                    Name = string.Empty,
                };
                newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin);

                events.Add(newEvent);
            } // loop through all spectra

            // display the amplitude scores
            DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold);
            var plot  = new Plot(this.DisplayName, normalisedScores, normalisedThreshold);
            var plots = new List <Plot> {
                plot
            };

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display the original decibel score array
                var debugPlot  = new Plot("Similarity Score", similarityScores, eventSimilarityThreshold);
                var debugPlots = new List <Plot> {
                    plot, debugPlot
                };
                var debugImage = DisplayDebugImage(sonogram, events, debugPlots, hits);
                var debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            // add names into the returned events
            foreach (var ae in events)
            {
                ae.Name = "P.o"; // abbreviatedSpeciesName;
            }

            return(new RecognizerResults()
            {
                Events = events,
                Hits = hits,
                Plots = plots,
                Sonogram = sonogram,
            });
        }