protected internal override void SetCenterChanged(PointF center) { float lastX = this.x; float lastY = this.y; PointF p = this.ClientBounds.Location; DUIMatrix matrix = new DUIMatrix(); matrix.Translate(this.ClientBounds.X + this.CenterX, this.ClientBounds.Y + this.CenterY); matrix.Rotate(this.Rotate); matrix.Skew(this.SkewX, this.SkewY); matrix.Scale(this.ScaleX, this.ScaleY); matrix.Translate(-(this.ClientBounds.X + this.CenterX), -(this.ClientBounds.Y + this.CenterY)); p = MatrixTools.PointAfterMatrix(p, matrix); matrix.Reset(); matrix.Translate(center.X, center.Y); matrix.Scale(1 / this.ScaleX, 1 / this.ScaleY); matrix.Skew(-this.SkewX, -this.SkewY); var s = (float)(Math.Tan(-this.SkewX) * Math.Tan(this.SkewY) + 1); matrix.Scale(1 / s, 1 / s); matrix.Rotate(-this.Rotate); matrix.Translate(-center.X, -center.Y); p = MatrixTools.PointAfterMatrix(p, matrix); float x = p.X - this.BorderWidth; float y = p.Y - this.BorderWidth; var polygon = this.Polygon.Select(pl => new PointF(pl.X + x - lastX, pl.Y + y - lastY)).ToArray(); this.SetBoundsChanged( center.X - p.X , center.Y - p.Y , this.Rotate , 0 , 0 , 0 , 0 , polygon , DUIBoundsPolygonSpecified.Center | DUIBoundsPolygonSpecified.Polygon | DUIBoundsPolygonSpecified.RotateAngle); }
public static Dictionary <string, double[, ]> GetSpectralIndexFilesAndConcatenate( DirectoryInfo[] dirs, string analysisType, string[] keys, IndexGenerationData indexGenerationData, bool verbose = false) { TimeSpan indexCalcTimeSpan = indexGenerationData.IndexCalculationDuration; Dictionary <string, double[, ]> spectrogramMatrices = new Dictionary <string, double[, ]>(); foreach (string key in keys) { //DateTime now1 = DateTime.Now; string pattern = "*__" + analysisType + "." + key + ".csv"; var files = GetFilesInDirectories(dirs, pattern); if (files.Length == 0) { LoggedConsole.WriteWarnLine($"{key} WARNING: No csv files found for KEY=" + key); continue; } List <double[, ]> matrices = ConcatenateSpectralIndexFilesWithTimeCheck(files, indexCalcTimeSpan, key); double[,] m = MatrixTools.ConcatenateMatrixRows(matrices); //Dictionary<string, double[,]> dict = spectralIndexValues.ToTwoDimensionalArray(SpectralIndexValues.CachedSelectors, TwoDimensionalArray.Rotate90ClockWise); m = MatrixTools.MatrixRotate90Anticlockwise(m); spectrogramMatrices.Add(key, m); //var now2 = DateTime.Now; //var et = now2 - now1; //if (verbose) //{ // LoggedConsole.WriteLine($"\t\tTime to read <{key}> spectral index files = {et.TotalSeconds:f2} seconds"); //} } return(spectrogramMatrices); }
/// <summary> /// Converts a spectrogram having linear freq scale to one having an Octave freq scale. /// Note that the sample rate (sr) and the frame size both need to be apporpriate to the choice of FreqScaleType. /// TODO: SHOULD DEVELOP A SEPARATE UNIT TEST for this method. /// </summary> public static double[,] ConvertLinearSpectrogramToOctaveFreqScale(double[,] inputSpgram, FrequencyScale freqScale) { if (freqScale == null) { throw new ArgumentNullException(nameof(freqScale)); } if (freqScale.ScaleType == FreqScaleType.Linear) { LoggedConsole.WriteLine("Linear Hz Scale is not valid for this Octave method."); throw new ArgumentNullException(nameof(freqScale)); } // get the octave bin bounds for this octave scale type var octaveBinBounds = freqScale.BinBounds; //var octaveBinBounds = GetOctaveScale(freqScale.ScaleType); int newBinCount = octaveBinBounds.GetLength(0); // set up the new octave spectrogram int frameCount = inputSpgram.GetLength(0); //int binCount = inputSpgram.GetLength(1); double[,] octaveSpectrogram = new double[frameCount, newBinCount]; for (int row = 0; row < frameCount; row++) { //get each frame or spectrum in turn var linearSpectrum = MatrixTools.GetRow(inputSpgram, row); // convert the spectrum to its octave form var octaveSpectrum = OctaveSpectrum(octaveBinBounds, linearSpectrum); //return the spectrum to output spectrogram. MatrixTools.SetRow(octaveSpectrogram, row, octaveSpectrum); } return(octaveSpectrogram); }
// ######################################################################################################################################################### public static double[,] BriggsNoiseFilterAndGetMask(double[,] matrix, int percentileThreshold, double binaryThreshold) { double[,] m = NoiseReduction_byDivision(matrix, percentileThreshold); // smooth and truncate m = ImageTools.WienerFilter(m, 7); //Briggs uses 17 m = MatrixTools.SubtractAndTruncate2Zero(m, 1.0); // make binary m = MatrixTools.ThresholdMatrix2RealBinary(m, binaryThreshold); //agaion smooth and truncate m = ImageTools.GaussianBlur_5cell(m); //m = ImageTools.GaussianBlur_5cell(m); //do a seoncd time //m = ImageTools.Blur(m, 10); // use a simple neighbourhood blurring function. double binaryThreshold2 = binaryThreshold * 0.8; m = MatrixTools.ThresholdMatrix2RealBinary(m, binaryThreshold2); return(m); }
/// <summary> /// Generate non-overlapping sequential patches from a <paramref name="matrix"/> /// </summary> private static List <double[]> GetSequentialPatches(double[,] matrix, int patchWidth, int patchHeight) { List <double[]> patches = new List <double[]>(); int rows = matrix.GetLength(0); int columns = matrix.GetLength(1); for (int r = 0; r < rows / patchHeight; r++) { for (int c = 0; c < columns / patchWidth; c++) { double[,] submatrix = MatrixTools.Submatrix(matrix, r * patchHeight, c * patchWidth, (r * patchHeight) + patchHeight - 1, (c * patchWidth) + patchWidth - 1); // convert a matrix to a vector by concatenating columns and // store it to the array of vectors patches.Add(MatrixTools.Matrix2Array(submatrix)); } } return(patches); }
/// <summary> /// Generate non-overlapping random patches from a matrix /// </summary> private static List <double[]> GetRandomPatches(double[,] matrix, int patchWidth, int patchHeight, int numberOfPatches) { // Note: to make the method more flexible in terms of selecting a random patch with any height and width, // first a random number generator is defined for both patchHeight and patchWidth. // However, the possibility of selecting duplicates especially when selecting too many random numbers from // a range (e.g., 1000 out of 1440) is high with a a random generator. // Since, we are mostly interested in full-band patches, i.e., patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand, // it is important to select non-duplicate patchHeights. Hence, instead of a random generator for patchHeight, // a better solution is to make a sequence of numbers to be selected, shuffle them, and // finally, a first n (number of required patches) numbers could be selected. int rows = matrix.GetLength(0); int columns = matrix.GetLength(1); int seed = 100; Random randomNumber = new Random(seed); // not sure whether it is better to use new Guid() instead of randomNumber.Next() var randomRowNumbers = Enumerable.Range(0, rows - patchHeight).OrderBy(x => randomNumber.Next()).Take(numberOfPatches).ToList(); List <double[]> patches = new List <double[]>(); for (int i = 0; i < randomRowNumbers.Count; i++) { // selecting a random number from the height of the matrix //int rowRandomNumber = randomNumber.Next(0, rows - patchHeight); // selecting a random number from the width of the matrix int columnRandomNumber = randomNumber.Next(0, columns - patchWidth); double[,] submatrix = MatrixTools.Submatrix(matrix, randomRowNumbers[i], columnRandomNumber, randomRowNumbers[i] + patchHeight - 1, columnRandomNumber + patchWidth - 1); // convert a matrix to a vector by concatenating columns and // store it to the array of vectors patches.Add(MatrixTools.Matrix2Array(submatrix)); } return(patches); }
/// <summary> /// Returns a matrix whose columns consist of the energy vector derived from the WPD tree for each WPD window of length 2^L where L= levelNumber. /// The WPD windows do not overlap. /// </summary> /// <param name="signal"></param> /// <param name="levelNumber"></param> /// <returns></returns> public static double[,] GetWPDEnergySequence(double[] signal, int levelNumber) { int windowWidth = (int)Math.Pow(2, levelNumber); int sampleCount = signal.Length / windowWidth; int lengthOfEnergyVector = (int)Math.Pow(2, levelNumber + 1) - 1; double[,] wpdByTime = new double[lengthOfEnergyVector, sampleCount]; for (int s = 0; s < sampleCount; s++) { int start = s * windowWidth; double[] subArray = DataTools.Subarray(signal, start, windowWidth); var wpd = new WaveletPacketDecomposition(subArray); double[] energyVector = wpd.GetWPDEnergyVector(); // reverse the energy vector so that low resolution coefficients are at the bottom. energyVector = DataTools.reverseArray(energyVector); MatrixTools.SetColumn(wpdByTime, s, energyVector); } return(wpdByTime); }
public static Image <Rgb24> DrawFrameSpectrogramAtScale( LdSpectrogramConfig config, SpectrogramZoomingConfig zoomingConfig, TimeSpan startTimeOfData, TimeSpan frameScale, double[,] frameData, IndexGenerationData indexGeneration, ImageChrome chromeOption) { // TODO: the following normalisation bounds could be passed instead of using hard coded. double min = zoomingConfig.LowerNormalizationBoundForDecibelSpectrograms; double max = zoomingConfig.UpperNormalizationBoundForDecibelSpectrograms; //need to correctly orient the matrix for this method frameData = MatrixTools.MatrixRotate90Clockwise(frameData); // Get an unchromed image var spectrogramImage = ZoomFocusedSpectrograms.DrawStandardSpectrogramInFalseColour(frameData); if (chromeOption == ImageChrome.Without) { return(spectrogramImage); } int nyquist = indexGeneration.SampleRateResampled / 2; int herzInterval = 1000; string title = $"ZOOM SCALE={frameScale.TotalMilliseconds}ms/pixel "; var titleBar = ZoomFocusedSpectrograms.DrawTitleBarOfZoomSpectrogram(title, spectrogramImage.Width); spectrogramImage = ZoomFocusedSpectrograms.FrameZoomSpectrogram( spectrogramImage, titleBar, startTimeOfData, frameScale, config.XAxisTicInterval, nyquist, herzInterval); return(spectrogramImage); }
public void CompressIndexSpectrogramsFillsAllValuesTest(double renderScale, int dataSize) { var bgnSpectra = new double[256, dataSize].Fill(-100); var spectra = new Dictionary <string, double[, ]> { { "BGN", bgnSpectra }, }; var compressed = IndexMatrices.CompressIndexSpectrograms( spectra, renderScale.Seconds(), 0.1.Seconds(), d => Math.Round(d, MidpointRounding.AwayFromZero)); var bgn = compressed["BGN"]; var average = bgn.Average(); // this test is specifically testing whether the last column has the correct value var lastColumn = MatrixTools.GetColumn(bgn, bgn.LastColumnIndex()); Assert.AreEqual(-100, lastColumn.Average()); Assert.AreEqual(-100, average); }
public void DrawImage(DUIImage image, PointF[] destTriangle, PointF[] srcTriangle, GraphicsUnit srcUnit, float opacity) { PointF t1 = destTriangle[0]; PointF t2 = destTriangle[1]; PointF t3 = destTriangle[2]; image.RenderTarget = this.target; using (DirectUI.Common.DUIBitmapBrush dbs = new DirectUI.Common.DUIBitmapBrush(image, DUIExtendMode.Clamp, opacity)) using (SharpDX.Direct2D1.PathGeometry pathGeometry = new SharpDX.Direct2D1.PathGeometry(this.target.RenderTarget.Factory)) using (SharpDX.Direct2D1.GeometrySink gs1 = pathGeometry.Open()) { dbs.RenderTarget = this.target; gs1.SetFillMode(SharpDX.Direct2D1.FillMode.Alternate); gs1.BeginFigure(DxConvert.ToVector2(t1), SharpDX.Direct2D1.FigureBegin.Filled); gs1.AddLine(DxConvert.ToVector2(t2)); gs1.AddLine(DxConvert.ToVector2(t3)); gs1.EndFigure(SharpDX.Direct2D1.FigureEnd.Closed); gs1.Close(); dbs.Transform = MatrixTools.ThreePointsAffine(srcTriangle, destTriangle); this.target.RenderTarget.FillGeometry(pathGeometry, dbs); } }
public void TestScaling() { testName = "Scaling"; var matrix = new ImageMatrix(new Bitmap(Image.FromFile(simplePageUrl))); { matrix = MatrixTools.Scale(matrix); TEST(matrix.Width == DATA.letterWidth, $"Actual {matrix.Width}"); TEST(matrix.Height == DATA.letterHeight, $"Actual {matrix.Height}"); } { matrix = MatrixTools.Scale(matrix, 1.5f); TEST(matrix.Width == 60, $"Actual {matrix.Width}"); TEST(matrix.Height == 75, $"Actual {matrix.Height}"); } { matrix = MatrixTools.Scale(matrix, 100, 100); TEST(matrix.Width == 100, $"Actual {matrix.Width}"); TEST(matrix.Height == 100, $"Actual {matrix.Height}"); } { matrix = MatrixTools.Scale(matrix, 60, 30); TEST(matrix.Width == 60, $"Actual {matrix.Width}"); TEST(matrix.Height == 30, $"Actual {matrix.Height}"); } matrix = new ImageMatrix(new bool[][] { new bool[] { true, true, true, true }, new bool[] { true, true, true, true }, new bool[] { true, true, true, true }, new bool[] { true, true, true, true }, }); { matrix = MatrixTools.Scale(matrix); TEST(matrix.Matrix.SelectMany(a => a).All(b => b), "Not all pixels are black"); matrix = MatrixTools.Scale(matrix, 200, 40); TEST(matrix.Matrix.SelectMany(a => a).All(b => b), "Not all pixels are black"); } }
public static double[,] MaxPoolMatrixColumns(double[,] matrix, int reducedColCount) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); double[,] returnMatrix = new double[rows, reducedColCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(matrix, r); int[] bounds = { 8, 23, 53, 113, 233 }; // ie reduce the 256 vector to 4 values for (int c = 0; c < reducedColCount; c++) { int length = bounds[c + 1] - bounds[c]; double[] subvector = DataTools.Subarray(rowVector, bounds[c], length); int max = DataTools.GetMaxIndex(subvector); returnMatrix[r, c] = subvector[max]; } } return(returnMatrix); }
private char MatchLetter(ImageMatrix letter, IEnumerable <ImageMatrix> matchers) { char theOne = '_'; float theRatio = 0; foreach (var m in matchers) { if (EgligableForRecognition(letter, m)) { float ratio = MatrixTools.EqualPixelRatioVaryingSize(m, letter); if (ratio > theRatio) { theRatio = ratio; theOne = m.Character; } } } if (theRatio == 0) { return(',');//CommaOrDot(letter); } return(theOne); }
public static Image DrawSonogram(BaseSonogram sonogram, Plot scores, List <AcousticEvent> poi, double eventThreshold, double[,] overlay) { Image_MultiTrack image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband: false, add1KHzLines: false, doMelScale: false)); image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond)); image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); if (scores != null) { image.AddTrack(ImageTrack.GetNamedScoreTrack(scores.data, 0.0, 1.0, scores.threshold, scores.title)); } if (poi != null && poi.Count > 0) { image.AddEvents(poi, sonogram.NyquistFrequency, sonogram.Configuration.FreqBinCount, sonogram.FramesPerSecond); } if (overlay != null) { var m = MatrixTools.ThresholdMatrix2Binary(overlay, 0.5); image.OverlayDiscreteColorMatrix(m); } return(image.GetImage()); }
/// <summary> /// Returns a matrix whose columns consist of autocorrelations of freq bin samples. /// The columns are non-overlapping. /// </summary> /// <param name="signal">an array corresponding to one frequency bin.</param> /// <param name="sampleLength">the length of a sample or patch (non-overllapping) for which xcerrelation is obtained.</param> public static double[,] GetXcorrByTimeMatrix(double[] signal, int sampleLength) { // NormaliseMatrixValues freq bin values to z-score. This is required else get spurious results signal = DataTools.Vector2Zscores(signal); // get number of complete non-overlapping samples or patches var sampleCount = signal.Length / sampleLength; var xCorrelationsByTime = new double[sampleLength, sampleCount]; for (var s = 0; s < sampleCount; s++) { var start = s * sampleLength; var subArray = DataTools.Subarray(signal, start, sampleLength); // do xcorr which returns an array same length as the sample or patch. var autocor = AutoAndCrossCorrelation.AutoCorrelationOldJavaVersion(subArray); //DataTools.writeBarGraph(autocor); MatrixTools.SetColumn(xCorrelationsByTime, s, autocor); } // return a matrix of [xCorrLength, sampleLength] return(xCorrelationsByTime); }
private void TestDisplay() { var folder = Path.Combine(DATA.testFolder, "Test Defense"); var qwe = new ImageMatrix(new Bitmap(Image.FromFile(Path.Combine(folder, "qwe.png")))); var q = new ImageMatrix(new Bitmap(Image.FromFile(Path.Combine(folder, "q.png")))); var w = new ImageMatrix(new Bitmap(Image.FromFile(Path.Combine(folder, "w.png")))); var e = new ImageMatrix(new Bitmap(Image.FromFile(Path.Combine(folder, "e.png")))); qwe = qwe.CopyMatrix(new MatrixTools(qwe).TrimWhiteAreaAround()); q = q.CopyMatrix(new MatrixTools(q).TrimWhiteAreaAround()); w = w.CopyMatrix(new MatrixTools(w).TrimWhiteAreaAround()); e = e.CopyMatrix(new MatrixTools(e).TrimWhiteAreaAround()); qwe = MatrixTools.Scale(qwe, 150, 50); q = MatrixTools.Scale(q); w = MatrixTools.Scale(w); e = MatrixTools.Scale(e); var crt = new ImageCreator(); var result = Splitters.DefenseMechanism.SplitToSingleLetters(qwe, new List <ImageMatrix> { q, w, e }); pictures.AddRange(result.Select(a => crt.CreateImageOutOfMatrix(a))); pictureBoxLine.Image = pictures.First(); }
public static double[,] MaxPoolMatrixColumnsByFactor(double[,] matrix, int factor) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); int reducedColCount = cols / factor; double[,] returnMatrix = new double[rows, reducedColCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(matrix, r); int lowerBound = 0; // ie reduce the 256 vector to 4 values for (int c = 0; c < reducedColCount; c++) { double[] subvector = DataTools.Subarray(rowVector, lowerBound, factor); int max = DataTools.GetMaxIndex(subvector); returnMatrix[r, c] = subvector[max]; lowerBound += factor; } } return(returnMatrix); }
/// <summary> /// Used to normalise a spectrogram in 0,1. /// </summary> /// <param name="matrix">the spectrogram data.</param> /// <param name="truncateMin">set all values above to 1.0.</param> /// <param name="truncateMax">set all values below to zero.</param> /// <param name="backgroundFilterCoeff">used to de-emphisize the background.</param> /// <returns>a normalised matrix of spectrogram data.</returns> public static double[,] NormaliseSpectrogramMatrix(double[,] matrix, double truncateMin, double truncateMax, double backgroundFilterCoeff) { double[,] m = MatrixTools.NormaliseInZeroOne(matrix, truncateMin, truncateMax); m = MatrixTools.FilterBackgroundValues(m, backgroundFilterCoeff); // to de-demphasize the background small values return(m); }
public AnalysisResult2 Analyze <T>(AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings) { var acousticIndicesConfiguration = (AcousticIndicesConfig)analysisSettings.AnalysisAnalyzerSpecificConfiguration; var indexCalculationDuration = acousticIndicesConfiguration.IndexCalculationDuration.Seconds(); var audioFile = segmentSettings.SegmentAudioFile; var recording = new AudioRecording(audioFile.FullName); var outputDirectory = segmentSettings.SegmentOutputDirectory; var analysisResults = new AnalysisResult2(analysisSettings, segmentSettings, recording.Duration); analysisResults.AnalysisIdentifier = this.Identifier; // calculate indices for each subsegment IndexCalculateResult[] subsegmentResults = CalculateIndicesInSubsegments( recording, segmentSettings.SegmentStartOffset, segmentSettings.AnalysisIdealSegmentDuration, indexCalculationDuration, acousticIndicesConfiguration.IndexProperties, segmentSettings.Segment.SourceMetadata.SampleRate, acousticIndicesConfiguration); var trackScores = new List <Plot>(subsegmentResults.Length); var tracks = new List <Track>(subsegmentResults.Length); analysisResults.SummaryIndices = new SummaryIndexBase[subsegmentResults.Length]; analysisResults.SpectralIndices = new SpectralIndexBase[subsegmentResults.Length]; for (int i = 0; i < subsegmentResults.Length; i++) { var indexCalculateResult = subsegmentResults[i]; indexCalculateResult.SummaryIndexValues.FileName = segmentSettings.Segment.SourceMetadata.Identifier; indexCalculateResult.SpectralIndexValues.FileName = segmentSettings.Segment.SourceMetadata.Identifier; analysisResults.SummaryIndices[i] = indexCalculateResult.SummaryIndexValues; analysisResults.SpectralIndices[i] = indexCalculateResult.SpectralIndexValues; trackScores.AddRange(indexCalculateResult.TrackScores); if (indexCalculateResult.Tracks != null) { tracks.AddRange(indexCalculateResult.Tracks); } } if (analysisSettings.AnalysisDataSaveBehavior) { this.WriteSummaryIndicesFile(segmentSettings.SegmentSummaryIndicesFile, analysisResults.SummaryIndices); analysisResults.SummaryIndicesFile = segmentSettings.SegmentSummaryIndicesFile; } if (analysisSettings.AnalysisDataSaveBehavior) { analysisResults.SpectraIndicesFiles = WriteSpectrumIndicesFilesCustom( segmentSettings.SegmentSpectrumIndicesDirectory, Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name), analysisResults.SpectralIndices); } // write the segment spectrogram (typically of one minute duration) to CSV // this is required if you want to produced zoomed spectrograms at a resolution greater than 0.2 seconds/pixel bool saveSonogramData = analysisSettings.Configuration.GetBoolOrNull(AnalysisKeys.SaveSonogramData) ?? false; if (saveSonogramData || analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResults.Events.Length)) { var sonoConfig = new SonogramConfig(); // default values config sonoConfig.SourceFName = recording.FilePath; sonoConfig.WindowSize = acousticIndicesConfiguration.FrameLength; sonoConfig.WindowStep = analysisSettings.Configuration.GetIntOrNull(AnalysisKeys.FrameStep) ?? sonoConfig.WindowSize; // default = no overlap sonoConfig.WindowOverlap = (sonoConfig.WindowSize - sonoConfig.WindowStep) / (double)sonoConfig.WindowSize; // Linear or Octave frequency scale? bool octaveScale = analysisSettings.Configuration.GetBoolOrNull(AnalysisKeys.KeyOctaveFreqScale) ?? false; if (octaveScale) { sonoConfig.WindowStep = sonoConfig.WindowSize; sonoConfig.WindowOverlap = (sonoConfig.WindowSize - sonoConfig.WindowStep) / (double)sonoConfig.WindowSize; } ////sonoConfig.NoiseReductionType = NoiseReductionType.NONE; // the default ////sonoConfig.NoiseReductionType = NoiseReductionType.STANDARD; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // remove the DC row of the spectrogram sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); if (analysisSettings.AnalysisImageSaveBehavior.ShouldSave()) { string imagePath = Path.Combine(outputDirectory.FullName, segmentSettings.SegmentImageFile.Name); // NOTE: hits (SPT in this case) is intentionally not supported var image = DrawSonogram(sonogram, null, trackScores, tracks); image.Save(imagePath); analysisResults.ImageFile = new FileInfo(imagePath); } if (saveSonogramData) { string csvPath = Path.Combine(outputDirectory.FullName, recording.BaseName + ".csv"); Csv.WriteMatrixToCsv(csvPath.ToFileInfo(), sonogram.Data); } } return(analysisResults); }
/// <summary> /// THE KEY ANALYSIS METHOD /// </summary> /// <param name="recording"> /// The segment Of Source File. /// </param> /// <param name="configDict"> /// The config Dict. /// </param> /// <param name="value"></param> /// <returns> /// The <see cref="LimnodynastesConvexResults"/>. /// </returns> internal static LimnodynastesConvexResults Analysis( Dictionary <string, double[, ]> dictionaryOfHiResSpectralIndices, AudioRecording recording, Dictionary <string, string> configDict, AnalysisSettings analysisSettings, SegmentSettingsBase segmentSettings) { // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // for Limnodynastes convex, in the JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). var outputDir = segmentSettings.SegmentOutputDirectory; TimeSpan segmentStartOffset = segmentSettings.SegmentStartOffset; //KeyValuePair<string, double[,]> kvp = dictionaryOfHiResSpectralIndices.First(); var spg = dictionaryOfHiResSpectralIndices["RHZ"]; int rhzRowCount = spg.GetLength(0); int rhzColCount = spg.GetLength(1); int sampleRate = recording.SampleRate; double herzPerBin = sampleRate / 2 / (double)rhzRowCount; double scoreThreshold = (double?)double.Parse(configDict["EventThreshold"]) ?? 3.0; int minimumFrequency = (int?)int.Parse(configDict["MinHz"]) ?? 850; int dominantFrequency = (int?)int.Parse(configDict["DominantFrequency"]) ?? 1850; // # The Limnodynastes call has three major peaks. The dominant peak is at 1850 or as set above. // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap); // # Set the gap in the Config file. Should typically be in range 880 to 970 int peakGapInHerz = (int?)int.Parse(configDict["PeakGap"]) ?? 470; int F1AndF2Gap = (int)Math.Round(peakGapInHerz / herzPerBin); //int F1AndF2Gap = 10; // 10 = number of freq bins int F1AndF3Gap = 2 * F1AndF2Gap; //int F1AndF3Gap = 20; int hzBuffer = 250; int bottomBin = 5; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin);; int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; // freqBin + rowID = binCount - 1; // therefore: rowID = binCount - freqBin - 1; int minRowID = rhzRowCount - dominantBinMax - 1; int maxRowID = rhzRowCount - dominantBinMin - 1; int bottomRow = rhzRowCount - bottomBin - 1; var list = new List <Point>(); // loop through all spectra/columns of the hi-res spectrogram. for (int c = 1; c < rhzColCount - 1; c++) { double maxAmplitude = -double.MaxValue; int idOfRowWithMaxAmplitude = 0; for (int r = minRowID; r <= bottomRow; r++) { if (spg[r, c] > maxAmplitude) { maxAmplitude = spg[r, c]; idOfRowWithMaxAmplitude = r; } } if (idOfRowWithMaxAmplitude < minRowID) { continue; } if (idOfRowWithMaxAmplitude > maxRowID) { continue; } // want a spectral peak. if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c - 1]) { continue; } if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c + 1]) { continue; } // peak should exceed thresold amplitude if (spg[idOfRowWithMaxAmplitude, c] < 3.0) { continue; } // convert row ID to freq bin ID int freqBinID = rhzRowCount - idOfRowWithMaxAmplitude - 1; list.Add(new Point(c, freqBinID)); // we now have a list of potential hits for LimCon. This needs to be filtered. // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // DEBUG ONLY // ################################ TEMPORARY ################################ // superimpose point on RHZ HiRes spectrogram for debug purposes bool drawOnHiResSpectrogram = true; //string filePath = @"G:\SensorNetworks\Output\Frogs\TestOfHiResIndices-2016July\Test\Towsey.HiResIndices\SpectrogramImages\3mile_creek_dam_-_Herveys_Range_1076_248366_20130305_001700_30_0min.CombinedGreyScale.png"; var fileName = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); string filePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScale.png"; var debugImage = new FileInfo(filePath); if (!debugImage.Exists) { drawOnHiResSpectrogram = false; } if (drawOnHiResSpectrogram) { // put red dot where max is Bitmap bmp = new Bitmap(filePath); foreach (Point point in list) { bmp.SetPixel(point.X + 70, 1911 - point.Y, Color.Red); } // mark off every tenth frequency bin for (int r = 0; r < 26; r++) { bmp.SetPixel(68, 1911 - (r * 10), Color.Blue); bmp.SetPixel(69, 1911 - (r * 10), Color.Blue); } // mark off upper bound and lower frequency bound bmp.SetPixel(69, 1911 - dominantBinMin, Color.Lime); bmp.SetPixel(69, 1911 - dominantBinMax, Color.Lime); //bmp.SetPixel(69, 1911 - maxRowID, Color.Lime); string opFilePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScaleAnnotated.png"; bmp.Save(opFilePath); } // END DEBUG ################################ TEMPORARY ################################ // now construct the standard decibel spectrogram WITHOUT noise removal, and look for LimConvex // get frame parameters for the analysis double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); int frameSize = rhzRowCount * 2; int frameStep = frameSize; // this default = zero overlap double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; //var dspOutput = DSP_Frames.ExtractEnvelopeAndFFTs(recording, frameSize, frameStep); //// Generate deciBel spectrogram //double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.amplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon); // i: Init SONOGRAM config var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.None, }; // init sonogram BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // remove the DC row of the spectrogram sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); //scores.Add(new Plot("Decibels", DataTools.NormaliseMatrixValues(dBArray), ActivityAndCover.DefaultActivityThresholdDb)); //scores.Add(new Plot("Active Frames", DataTools.Bool2Binary(activity.activeFrames), 0.0)); // convert spectral peaks to frequency //var tuple_DecibelPeaks = SpectrogramTools.HistogramOfSpectralPeaks(deciBelSpectrogram); //int[] peaksBins = tuple_DecibelPeaks.Item2; //double[] freqPeaks = new double[peaksBins.Length]; //int binCount = sonogram.Data.GetLength(1); //for (int i = 1; i < peaksBins.Length; i++) freqPeaks[i] = (lowerBinBound + peaksBins[i]) / (double)nyquistBin; //scores.Add(new Plot("Max Frequency", freqPeaks, 0.0)); // location of peaks for spectral images // create new list of LimCon hits in the standard spectrogram. double timeSpanOfFrameInSeconds = frameSize / (double)sampleRate; var newList = new List <int[]>(); int lastFrameID = sonogram.Data.GetLength(0) - 1; int lastBinID = sonogram.Data.GetLength(1) - 1; foreach (Point point in list) { double secondsFromStartOfSegment = (point.X * 0.1) + 0.05; // convert point.Y to center of time-block. int framesFromStartOfSegment = (int)Math.Round(secondsFromStartOfSegment / timeSpanOfFrameInSeconds); // location of max point is uncertain, so search in neighbourhood. // NOTE: sonogram.data matrix is time*freqBin double maxValue = -double.MaxValue; int idOfTMax = framesFromStartOfSegment; int idOfFMax = point.Y; for (int deltaT = -4; deltaT <= 4; deltaT++) { for (int deltaF = -1; deltaF <= 1; deltaF++) { int newT = framesFromStartOfSegment + deltaT; if (newT < 0) { newT = 0; } else if (newT > lastFrameID) { newT = lastFrameID; } double value = sonogram.Data[newT, point.Y + deltaF]; if (value > maxValue) { maxValue = value; idOfTMax = framesFromStartOfSegment + deltaT; idOfFMax = point.Y + deltaF; } } } // newList.Add(new Point(frameSpan, point.Y)); int[] array = new int[2]; array[0] = idOfTMax; array[1] = idOfFMax; newList.Add(array); } // Now obtain more of spectrogram to see if have peaks at two other places characteristic of Limnodynastes convex. // In the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // For Limnodynastes convex, in the JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). //We have found top/highest peak - now find the other two. int secondDominantFrequency = 1380; int secondDominantBin = (int)Math.Round(secondDominantFrequency / herzPerBin); int thirdDominantFrequency = 900; int thirdDominantBin = (int)Math.Round(thirdDominantFrequency / herzPerBin); var acousticEvents = new List <AcousticEvent>(); int Tbuffer = 2; // First extract a sub-matrix. foreach (int[] array in newList) { // NOTE: sonogram.data matrix is time*freqBin int Tframe = array[0]; int F1bin = array[1]; double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - Tbuffer, 0, Tframe + Tbuffer, F1bin); double F1power = subMatrix[Tbuffer, F1bin]; // convert to vector var spectrum = MatrixTools.GetColumnAverages(subMatrix); // use the following code to get estimate of background noise double[,] powerMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - 3, 10, Tframe + 3, F1bin); double averagePower = (MatrixTools.GetRowAverages(powerMatrix)).Average(); double score = F1power - averagePower; // debug - checking what the spectrum looks like. //for (int i = 0; i < 18; i++) // spectrum[i] = -100.0; //DataTools.writeBarGraph(spectrum); // locate the peaks in lower frequency bands, F2 and F3 bool[] peaks = DataTools.GetPeaks(spectrum); int F2bin = 0; double F2power = -200.0; // dB for (int i = -3; i <= 2; i++) { int bin = F1bin - F1AndF2Gap + i; if ((peaks[bin]) && (F2power < subMatrix[1, bin])) { F2bin = bin; F2power = subMatrix[1, bin]; } } if (F2bin == 0) { continue; } if (F2power == -200.0) { continue; } score += (F2power - averagePower); int F3bin = 0; double F3power = -200.0; for (int i = -5; i <= 2; i++) { int bin = F1bin - F1AndF3Gap + i; if ((peaks[bin]) && (F3power < subMatrix[1, bin])) { F3bin = bin; F3power = subMatrix[1, bin]; } } if (F3bin == 0) { continue; } if (F3power == -200.0) { continue; } score += (F3power - averagePower); score /= 3; // ignore events where SNR < decibel threshold if (score < scoreThreshold) { continue; } // ignore events with wrong power distribution. A good LimnoConvex call has strongest F1 power if ((F3power > F1power) || (F2power > F1power)) { continue; } //freq Bin ID must be converted back to Matrix row ID // freqBin + rowID = binCount - 1; // therefore: rowID = binCount - freqBin - 1; minRowID = rhzRowCount - F1bin - 2; maxRowID = rhzRowCount - F3bin - 1; int F1RowID = rhzRowCount - F1bin - 1; int F2RowID = rhzRowCount - F2bin - 1; int F3RowID = rhzRowCount - F3bin - 1; int maxfreq = dominantFrequency + hzBuffer; int topBin = (int)Math.Round(maxfreq / herzPerBin); int frameCount = 4; double duration = frameCount * frameStepInSeconds; double startTimeWrtSegment = (Tframe - 2) * frameStepInSeconds; // Got to here so start initialising an acoustic event var ae = new AcousticEvent(segmentStartOffset, startTimeWrtSegment, duration, minimumFrequency, maxfreq); ae.SetTimeAndFreqScales(framesPerSec, herzPerBin); //var ae = new AcousticEvent(oblong, recording.Nyquist, binCount, frameDurationInSeconds, frameStepInSeconds, frameCount); //ae.StartOffset = TimeSpan.FromSeconds(Tframe * frameStepInSeconds); var pointF1 = new Point(2, topBin - F1bin); var pointF2 = new Point(2, topBin - F2bin); var pointF3 = new Point(2, topBin - F3bin); ae.Points = new List <Point>(); ae.Points.Add(pointF1); ae.Points.Add(pointF2); ae.Points.Add(pointF3); //tried using HitElements but did not do what I wanted later on. //ae.HitElements = new HashSet<Point>(); //ae.HitElements = new SortedSet<Point>(); //ae.HitElements.Add(pointF1); //ae.HitElements.Add(pointF2); //ae.HitElements.Add(pointF3); ae.Score = score; //ae.MinFreq = Math.Round((topBin - F3bin - 5) * herzPerBin); //ae.MaxFreq = Math.Round(topBin * herzPerBin); acousticEvents.Add(ae); } // now add in extra common info to the acoustic events acousticEvents.ForEach(ae => { ae.SpeciesName = configDict[AnalysisKeys.SpeciesName]; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recording.Duration.TotalSeconds; ae.Name = abbreviatedName; ae.BorderColour = Color.Red; ae.FileName = recording.BaseName; }); double[] scores = new double[rhzColCount]; // predefinition of score array double nomalisationConstant = scoreThreshold * 4; // four times the score threshold double compressionFactor = rhzColCount / (double)sonogram.Data.GetLength(0); foreach (AcousticEvent ae in acousticEvents) { ae.ScoreNormalised = ae.Score / nomalisationConstant; if (ae.ScoreNormalised > 1.0) { ae.ScoreNormalised = 1.0; } int frameID = (int)Math.Round(ae.EventStartSeconds / frameDurationInSeconds); int hiresFrameID = (int)Math.Floor(frameID * compressionFactor); scores[hiresFrameID] = ae.ScoreNormalised; } var plot = new Plot(AnalysisName, scores, scoreThreshold); // DEBUG ONLY ################################ TEMPORARY ################################ // Draw a standard spectrogram and mark of hites etc. bool createStandardDebugSpectrogram = true; var imageDir = new DirectoryInfo(outputDir.FullName + @"\SpectrogramImages"); if (!imageDir.Exists) { imageDir.Create(); } if (createStandardDebugSpectrogram) { var fileName2 = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); string filePath2 = Path.Combine(imageDir.FullName, fileName + ".Spectrogram.png"); Bitmap sonoBmp = (Bitmap)sonogram.GetImage(); int height = sonoBmp.Height; foreach (AcousticEvent ae in acousticEvents) { ae.DrawEvent(sonoBmp); //g.DrawRectangle(pen, ob.ColumnLeft, ob.RowTop, ob.ColWidth-1, ob.RowWidth); //ae.DrawPoint(sonoBmp, ae.HitElements.[0], Color.OrangeRed); //ae.DrawPoint(sonoBmp, ae.HitElements[1], Color.Yellow); //ae.DrawPoint(sonoBmp, ae.HitElements[2], Color.Green); ae.DrawPoint(sonoBmp, ae.Points[0], Color.OrangeRed); ae.DrawPoint(sonoBmp, ae.Points[1], Color.Yellow); ae.DrawPoint(sonoBmp, ae.Points[2], Color.LimeGreen); } // draw the original hits on the standard sonogram foreach (int[] array in newList) { sonoBmp.SetPixel(array[0], height - array[1], Color.Cyan); } // mark off every tenth frequency bin on the standard sonogram for (int r = 0; r < 20; r++) { sonoBmp.SetPixel(0, height - (r * 10) - 1, Color.Blue); sonoBmp.SetPixel(1, height - (r * 10) - 1, Color.Blue); } // mark off upper bound and lower frequency bound sonoBmp.SetPixel(0, height - dominantBinMin, Color.Lime); sonoBmp.SetPixel(0, height - dominantBinMax, Color.Lime); sonoBmp.Save(filePath2); } // END DEBUG ################################ TEMPORARY ################################ return(new LimnodynastesConvexResults { Sonogram = sonogram, Hits = null, Plot = plot, Events = acousticEvents, RecordingDuration = recording.Duration, }); } // Analysis()
public void Execute(Arguments arguments) { LoggedConsole.WriteLine("feature learning process..."); var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\"; var inputPath = Path.Combine(inputDir, "TrainSet\\one_min_recordings"); var trainSetPath = Path.Combine(inputDir, "TrainSet\\train_data"); // var testSetPath = Path.Combine(inputDir, "TestSet"); var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml"; var resultDir = Path.Combine(inputDir, "FeatureLearning"); Directory.CreateDirectory(resultDir); // var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png"); // var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png"); // var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png"); // var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png"); // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp"); // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1-min recordings var configFile = configPath.ToFileInfo(); if (configFile == null) { throw new FileNotFoundException("No config file argument provided"); } else if (!configFile.Exists) { throw new ArgumentException($"Config file {configFile.FullName} not found"); } var configuration = ConfigFile.Deserialize <FeatureLearningSettings>(configFile); int patchWidth = (configuration.MaxFreqBin - configuration.MinFreqBin + 1) / configuration.NumFreqBand; var clusteringOutputList = FeatureLearning.UnsupervisedFeatureLearning(configuration, inputPath); List <double[][]> allBandsCentroids = new List <double[][]>(); for (int i = 0; i < clusteringOutputList.Count; i++) { var clusteringOutput = clusteringOutputList[i]; // writing centroids to a csv file // note that Csv.WriteToCsv can't write data types like dictionary<int, double[]> (problems with arrays) // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv // it might be a better way to do this string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv"); var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray(); Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix()); // sorting clusters based on size and output it to a csv file Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize; int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize); // Write cluster ID and size to a CSV file string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv"); Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize); // Draw cluster image directly from clustering output List <KeyValuePair <int, double[]> > list = clusteringOutput.ClusterIdCentroid.ToList(); double[][] centroids = new double[list.Count][]; for (int j = 0; j < list.Count; j++) { centroids[j] = list[j].Value; } allBandsCentroids.Add(centroids); List <double[, ]> allCentroids = new List <double[, ]>(); for (int k = 0; k < centroids.Length; k++) { // convert each centroid to a matrix in order of cluster ID // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight); // OR: in order of cluster size double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, configuration.PatchHeight); // normalize each centroid double[,] normCent = DataTools.normalise(cent); // add a row of zero to each centroid double[,] cent2 = PatchSampling.AddRow(normCent); allCentroids.Add(cent2); } // concatenate all centroids double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids); // Draw clusters var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix); clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone); var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp"); clusterImage.Save(outputClusteringImage); } // extracting features FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, trainSetPath, resultDir); LoggedConsole.WriteLine("Done..."); }
/// <summary> /// This method assumes that the ribbon spectrograms are composed using the following five indices for RGB /// string[] colourKeys1 = { "ACI", "ENT", "EVN" };. /// string[] colourKeys2 = { "BGN", "PMN", "EVN" };. /// </summary> public static double[,] ReadSpectralIndicesFromTwoFalseColourSpectrogramRibbons(Image image1, Image image2, TimeSpan startTime, TimeSpan duration) { //get start and end minutes int startMinute = (int)startTime.TotalMinutes; int minuteSpan = (int)duration.TotalMinutes; int endMinute = startMinute + minuteSpan; // get index matrices from the two images var matrixList1 = ReadSpectralIndicesFromFalseColourSpectrogram((Image <Rgb24>)image1, startMinute, endMinute); var matrixList2 = ReadSpectralIndicesFromFalseColourSpectrogram((Image <Rgb24>)image2, startMinute, endMinute); //set up the return Matrix containing 1440 rows and 5 x 32 indices var rowCount = matrixList1[0].GetLength((0)); var colCount = matrixList1[0].GetLength((1)); var indexCount = colCount * 5; // 5 because will incorporate 5 indices var matrix = new double[rowCount, indexCount]; // copy indices into return matrix for (int r = 0; r < rowCount; r++) { // copy in ACI row var row = MatrixTools.GetRow(matrixList1[0], r); for (int c = 0; c < colCount; c++) { matrix[r, c] = row[c]; } // copy in ENT row row = MatrixTools.GetRow(matrixList1[1], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount; matrix[r, startColumn + c] = row[c]; } // copy in EVN row row = MatrixTools.GetRow(matrixList1[2], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount * 2; matrix[r, startColumn + c] = row[c]; } // copy in BGN row row = MatrixTools.GetRow(matrixList2[0], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount * 3; matrix[r, startColumn + c] = row[c]; } // copy in PMN row row = MatrixTools.GetRow(matrixList2[1], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount * 4; matrix[r, startColumn + c] = row[c]; } } return(matrix); }
/// <summary> /// The CORE ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double decibelThreshold = double.Parse(configDict["DECIBEL_THRESHOLD"]); //dB double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double callDuration = double.Parse(configDict["CALL_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); int callSpan = (int)Math.Round(callDuration * framesPerSecond); //############################################################################################################################################# //ii: DETECT HARMONICS var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan); double[] dBArray = results.Item1; double[] intensity = results.Item2; //an array of periodicity scores double[] periodicity = results.Item3; //intensity = DataTools.filterMovingAverage(intensity, 3); int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < harmonicIntensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. //expect humans to have max < 1000 Hz double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < noiseBound; j++) { spectrum[j] = 0.0; } int maxIndex = DataTools.GetMaxIndex(spectrum); int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); double discount = 1.0; if (freqWithMaxPower < 1200) { discount = 0.0; } if (intensity[r] > harmonicIntensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS double maxPossibleScore = 0.5; int halfCallSpan = callSpan / 2; var predictedEvents = new List <AcousticEvent>(); for (int i = 0; i < rowCount; i++) { //assume one score position per crow call if (scoreArray[i] < 0.001) { continue; } double startTime = (i - halfCallSpan) / framesPerSecond; AcousticEvent ev = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz); ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth); ev.Score = scoreArray[i]; ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold //ev.Score_MaxPossible = maxPossibleScore; predictedEvents.Add(ev); } //for loop Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaNasutaConfig(); recognizerConfig.ReadConfigFile(configuration); // BETTER TO SET THESE. IGNORE USER! // this default framesize seems to work const int frameSize = 1024; const double windowOverlap = 0.0; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 3.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); //double[] topBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, (rowCount - 1), maxBin + 9); //double[] botBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, (rowCount - 1), minBin - 9); // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var acousticEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; var prunedEvents = new List <AcousticEvent>(); acousticEvents.ForEach(ae => { ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; }); var thresholdedPlot = new double[amplitudeArray.Length]; for (int x = 0; x < amplitudeArray.Length; x++) { if (amplitudeArray[x] > decibelThreshold) { thresholdedPlot[x] = amplitudeArray[x]; } } var maxDb = amplitudeArray.MaxOrDefault(); double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(thresholdedPlot, decibelThreshold, out normalisedScores, out normalisedThreshold); var text = string.Format($"{this.DisplayName} (Fullscale={maxDb:f1}dB)"); var plot = new Plot(text, normalisedScores, normalisedThreshold); if (true) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { plot, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, acousticEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = acousticEvents, }); }
/// <summary> /// returns a Long Duration spectrogram of same image length as the full-scale LdSpectrogram but the frequency scale reduced to the passed vlaue of height. /// This produces a LD spectrogram "ribbon" which can be used in circumstances where the full image is not appropriate. /// Note that if the height passed is a power of 2, then the full frequency scale (also a power of 2 due to FFT) can be scaled down exactly. /// A height of 32 is quite good - small but still discriminates frequency bands. /// </summary> public static Image <Rgb24> GetSpectrogramRibbon(double[,] indices1, double[,] indices2, double[,] indices3) { int height = RibbonPlotHeight; int width = indices1.GetLength(1); var image = new Image <Rgb24>(width, height); // get the reduced spectra of indices in each minute. // calculate the reduction factor i.e. freq bins per pixel row int bandWidth = indices1.GetLength(0) / height; for (int i = 0; i < width; i++) { var spectrum1 = MatrixTools.GetColumn(indices1, i); var spectrum2 = MatrixTools.GetColumn(indices2, i); var spectrum3 = MatrixTools.GetColumn(indices3, i); for (int h = 0; h < height; h++) { int start = h * bandWidth; double[] subArray = DataTools.Subarray(spectrum1, start, bandWidth); // reduce full spectrum to ribbon by taking the AVERAGE of sub-bands. // If the resulting value is NaN, then set the colour to grey by setting index to 0.5. double index = subArray.Average(); if (double.IsNaN(index)) { index = 0.5; } int red = (int)(255 * index); if (red > 255) { red = 255; } subArray = DataTools.Subarray(spectrum2, start, bandWidth); index = subArray.Average(); if (double.IsNaN(index)) { index = 0.5; } int grn = (int)(255 * index); if (grn > 255) { grn = 255; } subArray = DataTools.Subarray(spectrum3, start, bandWidth); index = subArray.Average(); if (double.IsNaN(index)) { index = 0.5; } int blu = (int)(255 * index); if (blu > 255) { blu = 255; } image[i, h] = Color.FromRgb((byte)red, (byte)grn, (byte)blu); } } return(image); }
public void TestFreqScaleOnArtificialSignal2() { int sampleRate = 64000; double duration = 30; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png"); var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); // init the default sonogram config var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.2, SourceFName = "Signal2", NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // pick a row, any row var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5); var peaks = DataTools.GetPeaks(oneSpectrum); var peakIds = new List <int>(); for (int i = 5; i < peaks.Length - 5; i++) { if (peaks[i]) { int peakId = freqScale.BinBounds[i, 0]; peakIds.Add(peakId); LoggedConsole.WriteLine($"Spectral peak located in bin {peakId}, Herz={freqScale.BinBounds[i, 1]}"); } } foreach (int h in harmonics) { LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); } Assert.AreEqual(5, peakIds.Count); Assert.AreEqual(129, peakIds[0]); Assert.AreEqual(257, peakIds[1]); Assert.AreEqual(513, peakIds[2]); Assert.AreEqual(1025, peakIds[3]); Assert.AreEqual(2049, peakIds[4]); var image = sonogram.GetImage(); string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={freqScale.WindowSize}"; image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); image.Save(outputImagePath); // Check that image dimensions are correct Assert.AreEqual(146, image.Width); Assert.AreEqual(310, image.Height); }
internal RecognizerResults Gruntwork(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; config.WindowOverlap = 0.0; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); int frameSize = config.WindowSize; int frameStep = frameSize; // this default = zero overlap double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; // reading in variables from the config file string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = 6; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 256. int minFrameWidth = 7; int maxFrameWidth = 14; double minDuration = (minFrameWidth - 1) * frameStepInSeconds; double maxDuration = maxFrameWidth * frameStepInSeconds; // Calculate Max Amplitude int binMin = (int)Math.Round(minHz / sonogram.FBinWidth); int binMax = (int)Math.Round(maxHz / sonogram.FBinWidth); int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] scores = new double[rowCount]; // predefinition of score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = double.MinValue; int maxId = 0; // loop through bandwidth of L.onvex call and look for dominant frequency for (int binID = 5; binID < binMax; binID++) { if (spectrum[binID] > maxAmplitude) { maxAmplitude = spectrum[binID]; maxId = binID; } } if (maxId < binMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } scores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // Find average amplitude double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix( sonogram.Data, 0, binMin, rowCount - 1, binMax); var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // We now have a list of potential hits for C. tinnula. This needs to be filtered. var startEnds = new List <Point>(); Plot.FindStartsAndEndsOfScoreEvents(highPassFilteredSignal, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out startEnds); // High pass Filter // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= binMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * sonogram.FBinWidth); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. // minimum number of bins covering frequency bandwidth of C. tinnula call// minimum number of bins covering frequency bandwidth of L.convex call int callBinWidth = 14; var templates = GetCtinnulaTemplates(callBinWidth); var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double eventScore = GetEventScore(eventMatrix, templates); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < similarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, minHz, maxHz); newEvent.DominantFreq = avDominantFreq; newEvent.Score = eventScore; newEvent.SetTimeAndFreqScales(framesPerSec, sonogram.FBinWidth); newEvent.Name = string.Empty; // remove name because it hides spectral content of the event. potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold); // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, eventThresholdDb, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("Average amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, eventThresholdDb, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); /* * DataTools.Normalise(scores, eventThresholdDb, out normalisedScores, out normalisedThreshold); * var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); * * * DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); * var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); */ var debugPlots = new List <Plot> { ampltdPlot, demeanedPlot }; Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events foreach (AcousticEvent ae in potentialEvents) { ae.Name = "speciesName"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// ################ THE KEY ANALYSIS METHOD for TRILLS /// /// See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles. /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lwConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaWatjulumConfig lwConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { double intensityThreshold = lwConfig.IntensityThreshold; double minDuration = lwConfig.MinDurationOfTrill; // seconds double maxDuration = lwConfig.MaxDurationOfTrill; // seconds double minPeriod = lwConfig.MinPeriod; // seconds double maxPeriod = lwConfig.MaxPeriod; // seconds if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 4 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7); // Could smooth here rather than above. Above seemed slightly better? //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); //differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, lwConfig.DecibelThreshold, minDuration, maxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // LOOK FOR TRILL EVENTS // init the score array double[] scores = new double[rowCount]; // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedTrillEvents) { int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); double oscilFreq; double period; double intensity; Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}"; ae.Score_MaxInEvent = maximumIntensity; ae.Profile = lwConfig.ProfileNames[0]; confirmedEvents.Add(ae); } } //###################################################################### // LOOK FOR TINK EVENTS // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS double minDurationOfTink = lwConfig.MinDurationOfTink; // seconds double maxDurationOfTink = lwConfig.MaxDurationOfTink; // seconds // want stronger threshold for tink because brief. double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0; var predictedTinkEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, tinkDecibelThreshold, minDurationOfTink, maxDurationOfTink, segmentStartOffset); foreach (var ae2 in predictedTinkEvents) { // Prune the list of potential acoustic events, for example using Cosine Similarity. //rowtop, rowWidth //int eventStart = ae2.Oblong.RowTop; //int eventWidth = ae2.Oblong.RowWidth; //int step = 2; //double maximumIntensity = 0.0; // add abbreviatedSpeciesName into event //if (maximumIntensity >= intensityThreshold) //{ ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}"; //ae2.Score_MaxInEvent = maximumIntensity; ae2.Profile = lwConfig.ProfileNames[1]; confirmedEvents.Add(ae2); //} } //###################################################################### var scorePlot = new Plot(lwConfig.SpeciesName, scores, intensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
/// <summary> /// ################ THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignore those set by user int frameSize = 128; double windowOverlap = 0.5; double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds double minPeriod = double.Parse(configDict["MIN_PERIOD"]); // seconds double maxPeriod = double.Parse(configDict["MAX_PERIOD"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 256 17640 14.5ms 68.9 68.9 ms hz hz // 512 17640 29.0ms 34.4 34.4 ms hz hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2. // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7.. // Therefore do a Xcorrelation between bins 21 and 22. // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second. int midHz = 1500; int lowerBin = (int)(midHz / freqBinWidth) + 1; //because bin[0] = DC int upperBin = lowerBin + 4; int lowerHz = (int)Math.Floor((lowerBin - 1) * freqBinWidth); int upperHz = (int)Math.Ceiling((upperBin - 1) * freqBinWidth); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin); double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin); lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### int step = (int)(framesPerSecond / 40); //take one/tenth second steps int stepCount = rowCount / step; int sampleLength = 32; //16 frames = 232ms - almost 1/4 second. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray == null || upperSubarray == null) { break; } if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 2; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 3); intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 ####################################################################### List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerHz, upperHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; var plots = new List <Plot>(); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25)); //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25)); //plots.Add(new Plot("intensity", DataTools.NormaliseMatrixValues(intensity), intensityThreshold)); plots.Add(new Plot("intensity", intensity, intensityThreshold)); return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration)); } //Analysis()
} //Analysis() /// <summary> /// returns some indices relevant to rain and cicadas from a short (10seconds) chunk of audio /// </summary> /// <param name="signal">signal envelope of a 10s chunk of audio</param> /// <param name="spectrogram">spectrogram of a 10s chunk of audio</param> /// <param name="lowFreqBound"></param> /// <param name="midFreqBound"></param> /// <param name="binWidth"></param> /// <returns></returns> public static RainStruct Get10SecondIndices(double[] signal, double[,] spectrogram, int lowFreqBound, int midFreqBound, TimeSpan frameDuration, double binWidth) { // i: FRAME ENERGIES - double StandardDeviationCount = 0.1; var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signal), StandardDeviationCount); //use Lamel et al. var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); bool[] activeFrames = new bool[dBarray.Length]; //record frames with activity >= threshold dB above background and count for (int i = 0; i < dBarray.Length; i++) { if (dBarray[i] >= ActivityAndCover.DefaultActivityThresholdDb) { activeFrames[i] = true; } } //int activeFrameCount = dBarray.Count((x) => (x >= AcousticIndices.DEFAULT_activityThreshold_dB)); int activeFrameCount = DataTools.CountTrues(activeFrames); double spikeThreshold = 0.05; double spikeIndex = CalculateSpikeIndex(signal, spikeThreshold); //Console.WriteLine("spikeIndex=" + spikeIndex); //DataTools.writeBarGraph(signal); RainStruct rainIndices; // struct in which to store all indices rainIndices.activity = activeFrameCount / (double)dBarray.Length; //fraction of frames having acoustic activity rainIndices.bgNoise = results3.NoiseMode; //bg noise in dB rainIndices.snr = results3.Snr; //snr rainIndices.avSig_dB = 20 * Math.Log10(signal.Average()); //10 times log of amplitude squared rainIndices.temporalEntropy = DataTools.EntropyNormalised(DataTools.SquareValues(signal)); //ENTROPY of ENERGY ENVELOPE rainIndices.spikes = spikeIndex; // ii: calculate the bin id of boundary between mid and low frequency spectrum int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth); var midbandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, spectrogram.GetLength(1) - 1); // iii: ENTROPY OF AVERAGE SPECTRUM and VARIANCE SPECTRUM - at this point the spectrogram is still an amplitude spectrogram var tuple = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(midbandSpectrogram); rainIndices.spectralEntropy = DataTools.EntropyNormalised(tuple.Item1); //ENTROPY of spectral averages if (double.IsNaN(rainIndices.spectralEntropy)) { rainIndices.spectralEntropy = 1.0; } // iv: CALCULATE Acoustic Complexity Index on the AMPLITUDE SPECTRUM var aciArray = AcousticComplexityIndex.CalculateAci(midbandSpectrogram); rainIndices.ACI = aciArray.Average(); //v: remove background noise from the spectrogram double spectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background //double[] modalValues = SNR.CalculateModalValues(spectrogram); //calculate modal value for each freq bin. //modalValues = DataTools.filterMovingAverage(modalValues, 7); //smooth the modal profile //spectrogram = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogram, modalValues); //spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, spectralBgThreshold); //vi: SPECTROGRAM ANALYSIS - SPECTRAL COVER. NOTE: spectrogram is still a noise reduced amplitude spectrogram SpectralActivity sa = ActivityAndCover.CalculateSpectralEvents(spectrogram, spectralBgThreshold, frameDuration, lowFreqBound, midFreqBound, binWidth); rainIndices.lowFreqCover = sa.LowFreqBandCover; rainIndices.midFreqCover = sa.MidFreqBandCover; rainIndices.hiFreqCover = sa.HighFreqBandCover; //double[] coverSpectrum = sa.coverSpectrum; //double[] eventSpectrum = sa.eventSpectrum; return(rainIndices); }