static void PaintTrainingData(DataPointCollection trainingData, PlotCanvas plotCanvas, Bitmap result) { // First few colours are same as those in the book, remainder are random. Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)]; colors[0] = Color.FromArgb(183, 170, 8); colors[1] = Color.FromArgb(194, 32, 14); colors[2] = Color.FromArgb(4, 154, 10); colors[3] = Color.FromArgb(13, 26, 188); System.Random r = new Random(0); // same seed every time so colours will be consistent for (int c = 4; c < colors.Length; c++) { colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255)); } // Also plot the original training data (a little bigger for clarity) using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; // Paint unlabelled data for (int s = 0; s < trainingData.Count(); s++) { if (trainingData.GetIntegerLabel(s) == DataPointCollection.UnknownClassLabel) // unlabelled { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f); g.FillRectangle(new SolidBrush(UnlabelledDataPointColor), rectangle); g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } // Paint labelled data on top for (int s = 0; s < trainingData.Count(); s++) { if (trainingData.GetIntegerLabel(s) != DataPointCollection.UnknownClassLabel) { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 5.0f, x.Y - 5.0f, 10.0f, 10.0f); g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle); g.DrawRectangle(new Pen(Color.White, 2), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } } }
/// <summary> /// Apply a trained forest to some test data. /// </summary> /// <typeparam name="F">Type of split function</typeparam> /// <param name="forest">Trained forest</param> /// <param name="testData">Test data</param> /// <returns>An array of class distributions, one per test data point</returns> public static HistogramAggregator[] Test <F>(Forest <F, HistogramAggregator> forest, DataPointCollection testData) where F : IFeatureResponse { int nClasses = forest.GetTree(0).GetNode(0).TrainingDataStatistics.BinCount; int[][] leafIndicesPerTree = forest.Apply(testData); HistogramAggregator[] result = new HistogramAggregator[testData.Count()]; for (int i = 0; i < testData.Count(); i++) { // Aggregate statistics for this sample over all leaf nodes reached result[i] = new HistogramAggregator(nClasses); for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafIndicesPerTree[t][i]; result[i].Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics); } } return(result); }
static DataPointCollection LoadTrainingData( string path, string alternativePath, int dimension, DataDescriptor dataDescriptor) { System.IO.FileStream stream = null; try { stream = new FileStream(path, FileMode.Open, FileAccess.Read); } catch (Exception) { string a = System.IO.Path.Combine( Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "/", alternativePath); a = System.IO.Path.Combine(a, path); try { stream = new FileStream(a, FileMode.Open, FileAccess.Read); } catch (Exception) { Console.WriteLine("Failed to open training data file at \"{0}\" or \"{1}\".", path, a); Environment.Exit(-1); } } DataPointCollection trainingData = null; try { trainingData = DataPointCollection.Load( stream, dimension, dataDescriptor); } catch (Exception e) { Console.WriteLine("Failed to read training data. " + e.Message); Environment.Exit(-1); } if (trainingData.Count() < 1) { Console.WriteLine("Insufficient training data."); Environment.Exit(-1); } return(trainingData); }
public static Bitmap Visualize( Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation) { // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation); // Apply the trained forest to the test data Console.WriteLine("\nApplying the forest to test data..."); DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height); int[][] leafNodeIndices = forest.Apply(testData); // Compute normalization factors per node int nTrainingPoints = (int)(trainingData.Count()); // could also count over tree nodes if training data no longer accessible double[][] normalizationFactors = new double[forest.TreeCount][]; for (int t = 0; t < forest.TreeCount; t++) { normalizationFactors[t] = new double[forest.GetTree(t).NodeCount]; ComputeNormalizationFactorsRecurse(forest.GetTree(t), 0, nTrainingPoints, new Bounds(2), normalizationFactors[t]); } Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // Paint the test data int index = 0; for (int j = 0; j < PlotSize.Height; j++) { for (int i = 0; i < PlotSize.Width; i++) { // Map pixel coordinate (i,j) in visualization image back to point in input space float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX; float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY; // Aggregate statistics for this sample over all trees double probability = 0.0; for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafNodeIndices[t][index]; probability += normalizationFactors[t][leafIndex] * forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics.GetPdf().GetProbability(x, y); } probability /= forest.TreeCount; // 'Gamma correct' probability density for better display float l = (float)(LuminanceScaleFactor * Math.Pow(probability, Gamma)); if (l < 0) { l = 0; } else if (l > 255) { l = 255; } Color c = Color.FromArgb(255, (byte)(l), 0, 0); result.SetPixel(i, j, c); index++; } } // Also plot the original training data using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; for (int s = 0; s < trainingData.Count(); s++) { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f); g.FillRectangle(new SolidBrush(DataPointColor), rectangle); g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } return(result); }
public static Bitmap Visualize <F>( Forest <F, HistogramAggregator> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation) where F : IFeatureResponse { // Size PlotSize = new Size(300, 300), PointF PlotDilation = new PointF(0.1f, 0.1f) // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation); DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height); Console.WriteLine("\nApplying the forest to test data..."); int[][] leafNodeIndices = forest.Apply(testData); // Form a palette of random colors, one per class Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)]; // First few colours are same as those in the book, remainder are random. colors[0] = Color.FromArgb(183, 170, 8); colors[1] = Color.FromArgb(194, 32, 14); colors[2] = Color.FromArgb(4, 154, 10); colors[3] = Color.FromArgb(13, 26, 188); Color grey = Color.FromArgb(255, 127, 127, 127); System.Random r = new Random(0); // same seed every time so colours will be consistent for (int c = 4; c < colors.Length; c++) { colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255)); } // Create a visualization image Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // For each pixel... int index = 0; for (int j = 0; j < PlotSize.Height; j++) { for (int i = 0; i < PlotSize.Width; i++) { // Aggregate statistics for this sample over all leaf nodes reached HistogramAggregator h = new HistogramAggregator(trainingData.CountClasses()); for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafNodeIndices[t][index]; h.Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics); } // Let's muddy the colors with grey where the entropy is high. float mudiness = 0.5f * (float)(h.Entropy()); float R = 0.0f, G = 0.0f, B = 0.0f; for (int b = 0; b < trainingData.CountClasses(); b++) { float p = (1.0f - mudiness) * h.GetProbability(b); // NB probabilities sum to 1.0 over the classes R += colors[b].R * p; G += colors[b].G * p; B += colors[b].B * p; } R += grey.R * mudiness; G += grey.G * mudiness; B += grey.B * mudiness; Color c = Color.FromArgb(255, (byte)(R), (byte)(G), (byte)(B)); result.SetPixel(i, j, c); // painfully slow but safe index++; } } // Also draw the original training data using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; for (int s = 0; s < trainingData.Count(); s++) { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 3.0f, x.Y - 3.0f, 6.0f, 6.0f); g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle); g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } return(result); }
public Bitmap Run(DataPointCollection trainingData) { // Train the forest Console.WriteLine("Training the forest..."); Random random = new Random(); ITrainingContext <AxisAlignedFeatureResponse, LinearFitAggregator1d> regressionTrainingContext = new RegressionTrainingContext(); var forest = ForestTrainer <AxisAlignedFeatureResponse, LinearFitAggregator1d> .TrainForest( random, TrainingParameters, regressionTrainingContext, trainingData); // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetTargetRange(), PlotSize, PlotDilation); DataPointCollection testData = DataPointCollection.Generate1dGrid(plotCanvas.plotRangeX, PlotSize.Width); // Apply the trained forest to the test data Console.WriteLine("\nApplying the forest to test data..."); int[][] leafNodeIndices = forest.Apply(testData); #region Generate Visualization Image Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // Plot the learned density Color inverseDensityColor = Color.FromArgb(255, 255 - DensityColor.R, 255 - DensityColor.G, 255 - DensityColor.B); double[] mean_y_given_x = new double[PlotSize.Width]; int index = 0; for (int i = 0; i < PlotSize.Width; i++) { double totalProbability = 0.0; for (int j = 0; j < PlotSize.Height; j++) { // Map pixel coordinate (i,j) in visualization image back to point in input space float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX; float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY; double probability = 0.0; // Aggregate statistics for this sample over all trees for (int t = 0; t < forest.TreeCount; t++) { Node <AxisAlignedFeatureResponse, LinearFitAggregator1d> leafNodeCopy = forest.GetTree(t).GetNode(leafNodeIndices[t][i]); LinearFitAggregator1d leafStatistics = leafNodeCopy.TrainingDataStatistics; probability += leafStatistics.GetProbability(x, y); } probability /= forest.TreeCount; mean_y_given_x[i] += probability * y; totalProbability += probability; float scale = 10.0f * (float)probability; Color weightedColor = Color.FromArgb( 255, (byte)(Math.Min(scale * inverseDensityColor.R + 0.5f, 255.0f)), (byte)(Math.Min(scale * inverseDensityColor.G + 0.5f, 255.0f)), (byte)(Math.Min(scale * inverseDensityColor.B + 0.5f, 255.0f))); Color c = Color.FromArgb(255, 255 - weightedColor.R, 255 - weightedColor.G, 255 - weightedColor.G); result.SetPixel(i, j, c); index++; } // NB We don't really compute the mean over y, just over the region of y that is plotted mean_y_given_x[i] /= totalProbability; } // Also plot the mean curve and the original training data using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; using (Pen meanPen = new Pen(MeanColor, 2)) { for (int i = 0; i < PlotSize.Width - 1; i++) { g.DrawLine( meanPen, (float)(i), (float)((mean_y_given_x[i] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY), (float)(i + 1), (float)((mean_y_given_x[i + 1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY)); } } using (Brush dataPointBrush = new SolidBrush(DataPointColor)) using (Pen dataPointBorderPen = new Pen(DataPointBorderColor)) { for (int s = 0; s < trainingData.Count(); s++) { // Map sample coordinate back to a pixel coordinate in the visualization image PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetTarget(s) - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f); g.FillRectangle(dataPointBrush, rectangle); g.DrawRectangle(dataPointBorderPen, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } } return(result); #endregion }