public void Aggregate(IDataPointCollection data, int index) { DataPointCollection concreteData = (DataPointCollection)(data); bins_[concreteData.GetIntegerLabel((int)index)]++; sampleCount_ += 1; }
/// <summary> /// Generate a 2D dataset with data points distributed in a grid pattern. /// Intended for generating visualization images. /// </summary> /// <param name="rangeX">x-axis range</param> /// <param name="nStepsX">Number of grid points in x direction</param> /// <param name="rangeY">y-axis range</param> /// <param name="nStepsY">Number of grid points in y direction</param> /// <returns>A new DataPointCollection</returns> static public DataPointCollection Generate2dGrid( Tuple <float, float> rangeX, int nStepsX, Tuple <float, float> rangeY, int nStepsY) { if (rangeX.Item1 >= rangeX.Item2) { throw new ArgumentException("Invalid x-axis range."); } if (rangeY.Item1 >= rangeY.Item2) { throw new ArgumentException("Invalid y-axis range."); } DataPointCollection result = new DataPointCollection(); result.dimension_ = 2; result.data_ = new List <float[]>(); float stepX = (rangeX.Item2 - rangeX.Item1) / nStepsX; float stepY = (rangeY.Item2 - rangeY.Item1) / nStepsY; for (int j = 0; j < nStepsY; j++) { for (int i = 0; i < nStepsX; i++) { result.data_.Add(new float[] { rangeX.Item1 + i * stepX, rangeY.Item1 + j * stepY }); } } return(result); }
static public Forest <F, HistogramAggregator> Train <F>( DataPointCollection trainingData, IFeatureFactory <F> featureFactory, TrainingParameters TrainingParameters) where F : IFeatureResponse { if (trainingData.Dimensions != 2) { throw new Exception("Training data points must be 2D."); } if (trainingData.HasLabels == false) { throw new Exception("Training data points must be labelled."); } if (trainingData.HasTargetValues == true) { throw new Exception("Training data points should not have target values."); } Console.WriteLine("Running training..."); Random random = new Random(); ITrainingContext <F, HistogramAggregator> classificationContext = new ClassificationTrainingContext <F>(trainingData.CountClasses(), featureFactory, random); var forest = ForestTrainer <F, HistogramAggregator> .TrainForest( random, TrainingParameters, classificationContext, trainingData); return(forest); }
static public Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> Train( DataPointCollection trainingData, TrainingParameters parameters, double a, double b) { if (trainingData.Dimensions != 2) { throw new Exception("Training data points for density estimation were not 2D."); } if (trainingData.HasLabels == true) { throw new Exception("Density estimation training data should not be labelled."); } if (trainingData.HasTargetValues == true) { throw new Exception("Training data should not have target values."); } // Train the forest Console.WriteLine("Training the forest..."); Random random = new Random(); ITrainingContext <AxisAlignedFeatureResponse, GaussianAggregator2d> densityEstimationTrainingContext = new DensityEstimationTrainingContext(a, b); var forest = ForestTrainer <AxisAlignedFeatureResponse, GaussianAggregator2d> .TrainForest( random, parameters, densityEstimationTrainingContext, trainingData); return(forest); }
public static Bitmap VisualizeDensity(Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation) { // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation); // Apply the trained forest to the test data Console.WriteLine("\nApplying the forest to test data..."); DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height); int[][] leafNodeIndices = forest.Apply(testData); Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // Paint the test data int index = 0; for (int j = 0; j < PlotSize.Height; j++) { for (int i = 0; i < PlotSize.Width; i++) { // Map pixel coordinate (i,j) in visualization image back to point in input space float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX; float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY; // Aggregate statistics for this sample over all trees double probability = 0.0; for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafNodeIndices[t][index]; probability += forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics.GaussianAggregator2d.GetPdf().GetProbability(x, y); } probability /= forest.TreeCount; float l = (float)(LuminanceScaleFactor * probability); if (l < 0) { l = 0; } else if (l > 255) { l = 255; } Color c = Color.FromArgb(255, (byte)(l), 0, 0); result.SetPixel(i, j, c); index++; } } PaintTrainingData(trainingData, plotCanvas, result); return(result); }
public void Aggregate(IDataPointCollection data, int index, Object userData) { DataPointCollection concreteData = (DataPointCollection)(data); HistogramData histogramData = (HistogramData)(userData); histogramData.Increment(dataHandle_, concreteData.GetIntegerLabel((int)index)); // bins_[concreteData.GetIntegerLabel((int)index)]++; sampleCount_ += 1; }
static void PaintTrainingData(DataPointCollection trainingData, PlotCanvas plotCanvas, Bitmap result) { // First few colours are same as those in the book, remainder are random. Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)]; colors[0] = Color.FromArgb(183, 170, 8); colors[1] = Color.FromArgb(194, 32, 14); colors[2] = Color.FromArgb(4, 154, 10); colors[3] = Color.FromArgb(13, 26, 188); System.Random r = new Random(0); // same seed every time so colours will be consistent for (int c = 4; c < colors.Length; c++) { colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255)); } // Also plot the original training data (a little bigger for clarity) using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; // Paint unlabelled data for (int s = 0; s < trainingData.Count(); s++) { if (trainingData.GetIntegerLabel(s) == DataPointCollection.UnknownClassLabel) // unlabelled { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f); g.FillRectangle(new SolidBrush(UnlabelledDataPointColor), rectangle); g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } // Paint labelled data on top for (int s = 0; s < trainingData.Count(); s++) { if (trainingData.GetIntegerLabel(s) != DataPointCollection.UnknownClassLabel) { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 5.0f, x.Y - 5.0f, 10.0f, 10.0f); g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle); g.DrawRectangle(new Pen(Color.White, 2), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } } }
public void Aggregate(IDataPointCollection data, int index) { DataPointCollection concreteData = (DataPointCollection)(data); // Always aggregate density statistics GaussianAggregator2d.Aggregate(data, index); // Only aggregate histogram statistics for those data points that have class labels if (concreteData.GetIntegerLabel((int)(index)) != DataPointCollection.UnknownClassLabel) { HistogramAggregator.Aggregate(data, index); } }
static DataPointCollection LoadTrainingData( string path, string alternativePath, int dimension, DataDescriptor dataDescriptor) { System.IO.FileStream stream = null; try { stream = new FileStream(path, FileMode.Open, FileAccess.Read); } catch (Exception) { string a = System.IO.Path.Combine( Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "/", alternativePath); a = System.IO.Path.Combine(a, path); try { stream = new FileStream(a, FileMode.Open, FileAccess.Read); } catch (Exception) { Console.WriteLine("Failed to open training data file at \"{0}\" or \"{1}\".", path, a); Environment.Exit(-1); } } DataPointCollection trainingData = null; try { trainingData = DataPointCollection.Load( stream, dimension, dataDescriptor); } catch (Exception e) { Console.WriteLine("Failed to read training data. " + e.Message); Environment.Exit(-1); } if (trainingData.Count() < 1) { Console.WriteLine("Insufficient training data."); Environment.Exit(-1); } return(trainingData); }
public void Aggregate(IDataPointCollection data, int index) { DataPointCollection concreteData = (DataPointCollection)(data); sx_ += concreteData.GetDataPoint((int)index)[0]; sy_ += concreteData.GetDataPoint((int)index)[1]; sxx_ += Math.Pow(concreteData.GetDataPoint((int)index)[0], 2.0); syy_ += Math.Pow(concreteData.GetDataPoint((int)index)[1], 2.0); sxy_ += concreteData.GetDataPoint((int)index)[0] * concreteData.GetDataPoint((int)index)[1]; sampleCount_ += 1; }
public void Aggregate(IDataPointCollection data, int index) { DataPointCollection concreteData = (DataPointCollection)(data); float[] datum = concreteData.GetDataPoint((int)index); float target = concreteData.GetTarget((int)index); XT_X_11_ += datum[0] * datum[0]; XT_X_12_ += datum[0]; XT_X_21_ += datum[0]; XT_X_22_ += 1.0; XT_Y_1_ += datum[0] * target; XT_Y_2_ += target; Y2_ += target * target; sampleCount_ += 1; }
/// <summary> /// Generate a 1D dataset containing a given number of data points /// distributed at regular intervals within a given range. Intended for /// generating visualization images. /// </summary> /// <param name="range">Range</param> /// <param name="nStepsX">Number of grid points</param> /// <returns>A new DataPointCollection</returns> static public DataPointCollection Generate1dGrid(Tuple <float, float> range, int nSteps) { if (range.Item1 >= range.Item2) { throw new ArgumentException("Invalid range."); } DataPointCollection result = new DataPointCollection(); result.dimension_ = 1; result.data_ = new List <float[]>(); float step = (range.Item2 - range.Item1) / nSteps; for (int i = 0; i < nSteps; i++) { result.data_.Add(new float[] { range.Item1 + i * step }); } return(result); }
static void Main(string[] args) { if (args.Length == 0 || args[0] == "/?" || args[0].ToLower() == "help") { DisplayHelp(); return; } // These command line parameters are reused over several command line modes... StringParameter trainingDataPath = new StringParameter("path", "Path of file containing training data."); NaturalParameter T = new NaturalParameter("t", "No. of trees in the forest (default = {0}).", 10); NaturalParameter D = new NaturalParameter("d", "Maximum tree levels (default = {0}).", 10, 20); NaturalParameter F = new NaturalParameter("f", "No. of candidate feature responses per decision node (default = {0}).", 10); NaturalParameter L = new NaturalParameter("l", "No. of candidate thresholds per feature response (default = {0}).", 1); SingleParameter a = new SingleParameter("a", "The number of 'effective' prior observations (default = {0}).", true, false, 10.0f); SingleParameter b = new SingleParameter("b", "The variance of the effective observations (default = {0}).", true, true, 400.0f); SimpleSwitchParameter verboseSwitch = new SimpleSwitchParameter("Enables verbose progress indication."); SingleParameter plotPaddingX = new SingleParameter("padx", "Pad plot horizontally (default = {0}).", true, false, 0.1f); SingleParameter plotPaddingY = new SingleParameter("pady", "Pad plot vertically (default = {0}).", true, false, 0.1f); EnumParameter split = new EnumParameter( "s", "Specify what kind of split function to use (default = {0}).", new string[] { "axis", "linear" }, new string[] { "axis-aligned split", "linear split" }, "axis"); // Behaviour depends on command line mode... string mode = args[0].ToLower(); // first argument defines the command line mode if (mode == "clas" || mode == "class") { #region Supervised classification CommandLineParser parser = new CommandLineParser(); parser.Command = "SW " + mode.ToUpper(); parser.AddArgument(trainingDataPath); parser.AddSwitch("T", T); parser.AddSwitch("D", D); parser.AddSwitch("F", F); parser.AddSwitch("L", L); parser.AddSwitch("SPLIT", split); parser.AddSwitch("PADX", plotPaddingX); parser.AddSwitch("PADY", plotPaddingY); parser.AddSwitch("VERBOSE", verboseSwitch); // Default values up above should be fine here. if (args.Length == 1) { parser.PrintHelp(); DisplayTextFiles(CLAS_DATA_PATH); return; } if (parser.Parse(args, 1) == false) { return; } TrainingParameters trainingParameters = new TrainingParameters() { MaxDecisionLevels = D.Value - 1, NumberOfCandidateFeatures = F.Value, NumberOfCandidateThresholdsPerFeature = L.Value, NumberOfTrees = T.Value, Verbose = verboseSwitch.Used }; PointF plotDilation = new PointF(plotPaddingX.Value, plotPaddingY.Value); DataPointCollection trainingData = LoadTrainingData( trainingDataPath.Value, CLAS_DATA_PATH, 2, DataDescriptor.HasClassLabels); if (split.Value == "linear") { Forest <LinearFeatureResponse2d, HistogramAggregator> forest = ClassificationExample.Train( trainingData, new LinearFeatureFactory(), trainingParameters); using (Bitmap result = ClassificationExample.Visualize(forest, trainingData, new Size(300, 300), plotDilation)) { ShowVisualizationImage(result); } } else if (split.Value == "axis") { Forest <AxisAlignedFeatureResponse, HistogramAggregator> forest = ClassificationExample.Train( trainingData, new AxisAlignedFeatureFactory(), trainingParameters); using (Bitmap result = ClassificationExample.Visualize(forest, trainingData, new Size(300, 300), plotDilation)) { ShowVisualizationImage(result); } } #endregion } else if (mode == "density") { #region Density Estimation CommandLineParser parser = new CommandLineParser(); parser.Command = "SW " + mode.ToUpper(); parser.AddArgument(trainingDataPath); parser.AddSwitch("T", T); parser.AddSwitch("D", D); parser.AddSwitch("F", F); parser.AddSwitch("L", L); // For density estimation (and semi-supervised learning) we add // a command line option to set the hyperparameters of the prior. parser.AddSwitch("a", a); parser.AddSwitch("b", b); parser.AddSwitch("PADX", plotPaddingX); parser.AddSwitch("PADY", plotPaddingY); parser.AddSwitch("VERBOSE", verboseSwitch); // Override default values for command line options. T.Value = 1; D.Value = 3; F.Value = 5; L.Value = 1; a.Value = 0; b.Value = 900; if (args.Length == 1) { parser.PrintHelp(); DisplayTextFiles(DENSITY_DATA_PATH); return; } if (parser.Parse(args, 1) == false) { return; } TrainingParameters parameters = new TrainingParameters() { MaxDecisionLevels = D.Value - 1, NumberOfCandidateFeatures = F.Value, NumberOfCandidateThresholdsPerFeature = L.Value, NumberOfTrees = T.Value, Verbose = verboseSwitch.Used }; DataPointCollection trainingData = LoadTrainingData( trainingDataPath.Value, DENSITY_DATA_PATH, 2, DataDescriptor.Unadorned); Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> forest = DensityEstimationExample.Train(trainingData, parameters, a.Value, b.Value); PointF plotDilation = new PointF(plotPaddingX.Value, plotPaddingY.Value); using (Bitmap result = DensityEstimationExample.Visualize(forest, trainingData, new Size(300, 300), plotDilation)) { ShowVisualizationImage(result); } #endregion } else if (mode == "ssclas" || mode == "ssclas") { #region Semi-supervised classification CommandLineParser parser = new CommandLineParser(); parser.Command = "SW " + mode.ToUpper(); parser.AddArgument(trainingDataPath); parser.AddSwitch("T", T); parser.AddSwitch("D", D); parser.AddSwitch("F", F); parser.AddSwitch("L", L); parser.AddSwitch("split", split); parser.AddSwitch("a", a); parser.AddSwitch("b", b); EnumParameter plotMode = new EnumParameter( "plot", "Determines what to plot", new string[] { "density", "labels" }, new string[] { "plot recovered density estimate", "plot class likelihood" }, "labels"); parser.AddSwitch("plot", plotMode); parser.AddSwitch("PADX", plotPaddingX); parser.AddSwitch("PADY", plotPaddingY); parser.AddSwitch("VERBOSE", verboseSwitch); // Override default values for command line options. T.Value = 10; D.Value = 12 - 1; F.Value = 30; L.Value = 1; if (args.Length == 1) { parser.PrintHelp(); DisplayTextFiles(SSCLAS_DATA_PATH); return; } if (parser.Parse(args, 1) == false) { return; } DataPointCollection trainingData = LoadTrainingData( trainingDataPath.Value, SSCLAS_DATA_PATH, 2, DataDescriptor.HasClassLabels); TrainingParameters parameters = new TrainingParameters() { MaxDecisionLevels = D.Value - 1, NumberOfCandidateFeatures = F.Value, NumberOfCandidateThresholdsPerFeature = L.Value, NumberOfTrees = T.Value, Verbose = verboseSwitch.Used }; Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> forest = SemiSupervisedClassificationExample.Train( trainingData, parameters, a.Value, b.Value); PointF plotPadding = new PointF(plotPaddingX.Value, plotPaddingY.Value); if (plotMode.Value == "labels") { using (Bitmap result = SemiSupervisedClassificationExample.VisualizeLabels(forest, trainingData, new Size(300, 300), plotPadding)) { ShowVisualizationImage(result); } } else if (plotMode.Value == "density") { using (Bitmap result = SemiSupervisedClassificationExample.VisualizeDensity(forest, trainingData, new Size(300, 300), plotPadding)) { ShowVisualizationImage(result); } } #endregion } else if (mode == "regression") { #region Regression CommandLineParser parser = new CommandLineParser(); parser.Command = "SW " + mode.ToUpper(); parser.AddArgument(trainingDataPath); parser.AddSwitch("T", T); parser.AddSwitch("D", D); parser.AddSwitch("F", F); parser.AddSwitch("L", L); parser.AddSwitch("PADX", plotPaddingX); parser.AddSwitch("PADY", plotPaddingY); parser.AddSwitch("VERBOSE", verboseSwitch); // Override default values for command line options T.Value = 10; D.Value = 2; a.Value = 0; // prior turned off by default b.Value = 900; if (args.Length == 1) { parser.PrintHelp(); DisplayTextFiles(REGRESSION_DATA_PATH); return; } if (parser.Parse(args, 1) == false) { return; } RegressionExample regressionDemo = new RegressionExample(); regressionDemo.PlotDilation.X = plotPaddingX.Value; regressionDemo.PlotDilation.Y = plotPaddingY.Value; regressionDemo.TrainingParameters = new TrainingParameters() { MaxDecisionLevels = D.Value - 1, NumberOfCandidateFeatures = F.Value, NumberOfCandidateThresholdsPerFeature = L.Value, NumberOfTrees = T.Value, Verbose = verboseSwitch.Used }; DataPointCollection trainingData = LoadTrainingData( trainingDataPath.Value, REGRESSION_DATA_PATH, 1, DataDescriptor.HasTargetValues); using (Bitmap result = regressionDemo.Run(trainingData)) { ShowVisualizationImage(result); } #endregion } else { Console.WriteLine("Unrecognized command line argument, try SW HELP."); return; } }
/// <summary> /// Generate a 1D dataset containing a given number of data points /// distributed at regular intervals within a given range. Intended for /// generating visualization images. /// </summary> /// <param name="range">Range</param> /// <param name="nStepsX">Number of grid points</param> /// <returns>A new DataPointCollection</returns> public static DataPointCollection Generate1dGrid(Tuple<float, float> range, int nSteps) { if (range.Item1 >= range.Item2) throw new ArgumentException("Invalid range."); DataPointCollection result = new DataPointCollection(); result.dimension_ = 1; result.data_ = new List<float[]>(); float step = (range.Item2 - range.Item1) / nSteps; for (int i = 0; i < nSteps; i++) result.data_.Add(new float[] { range.Item1 + i * step }); return result; }
/// <summary> /// Load a collection of data from a tab-delimited file with one data point /// per line. The data may optionally have associated with class labels /// (first element on line) and/or target values (last element on line). /// </summary> /// <param name="path">Path of file to be read.</param> /// <param name="bHasClassLabels">Are the data associated with class labels?</param> /// <param name="dataDimension">Dimension of the data (excluding class labels and target values).</param> /// <param name="bHasTargetValues">Are the data associated with target values.</param> public static DataPointCollection Load(System.IO.Stream stream, int dataDimension, DataDescriptor descriptor) { bool bHasTargetValues = (descriptor & DataDescriptor.HasTargetValues) == DataDescriptor.HasTargetValues; bool bHasClassLabels = (descriptor & DataDescriptor.HasClassLabels) == DataDescriptor.HasClassLabels; DataPointCollection result = new DataPointCollection(); result.data_ = new List<float[]>(); result.labels_ = bHasClassLabels ? new List<int>() : null; result.targets_ = bHasTargetValues ? new List<float>() : null; result.dimension_ = dataDimension; char[] seperators = new char[] { '\t' }; int elementsPerLine = (bHasClassLabels ? 1 : 0) + dataDimension + (bHasTargetValues ? 1 : 0); using (System.IO.StreamReader r = new System.IO.StreamReader(stream)) { string line; while ((line = r.ReadLine()) != null) { string[] elements = line.Split(seperators); if (elements.Length != elementsPerLine) throw new Exception("Encountered line with unexpected number of elements."); int index = 0; if (bHasClassLabels) { if (!String.IsNullOrEmpty(elements[index])) { if (!result.labelIndices_.ContainsKey(elements[index])) result.labelIndices_.Add(elements[index], result.labelIndices_.Count); result.labels_.Add(result.labelIndices_[elements[index++]]); } else { result.labels_.Add(UnknownClassLabel); index++; } } float[] datum = new float[dataDimension]; for (int i = 0; i < dataDimension; i++) datum[i] = Convert.ToSingle(elements[index++]); result.data_.Add(datum); if (bHasTargetValues) result.targets_.Add(Convert.ToSingle(elements[index++])); } } return result; }
/// <summary> /// Generate a 2D dataset with data points distributed in a grid pattern. /// Intended for generating visualization images. /// </summary> /// <param name="rangeX">x-axis range</param> /// <param name="nStepsX">Number of grid points in x direction</param> /// <param name="rangeY">y-axis range</param> /// <param name="nStepsY">Number of grid points in y direction</param> /// <returns>A new DataPointCollection</returns> public static DataPointCollection Generate2dGrid( Tuple<float, float> rangeX, int nStepsX, Tuple<float, float> rangeY, int nStepsY) { if (rangeX.Item1 >= rangeX.Item2) throw new ArgumentException("Invalid x-axis range."); if (rangeY.Item1 >= rangeY.Item2) throw new ArgumentException("Invalid y-axis range."); DataPointCollection result = new DataPointCollection(); result.dimension_ = 2; result.data_ = new List<float[]>(); float stepX = (rangeX.Item2 - rangeX.Item1) / nStepsX; float stepY = (rangeY.Item2 - rangeY.Item1) / nStepsY; for (int j = 0; j < nStepsY; j++) for (int i = 0; i < nStepsX; i++) result.data_.Add(new float[] { rangeX.Item1 + i * stepX, rangeY.Item1 + j * stepY }); return result; }
public static Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> Train( DataPointCollection trainingData, TrainingParameters parameters, double a_, double b_) { // Train the forest Console.WriteLine("Training the forest..."); Random random = new Random(); ITrainingContext <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> classificationContext = new SemiSupervisedClassificationTrainingContext(trainingData.CountClasses(), random, a_, b_); var forest = ForestTrainer <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> .TrainForest( random, parameters, classificationContext, trainingData); // Label transduction to unlabelled leaves from nearest labelled leaf List <int> unlabelledLeafIndices = null; List <int> labelledLeafIndices = null; int[] closestLabelledLeafIndices = null; List <int> leafIndices = null; for (int t = 0; t < forest.TreeCount; t++) { var tree = forest.GetTree(t); leafIndices = new List <int>(); unlabelledLeafIndices = new List <int>(); labelledLeafIndices = new List <int>(); for (int n = 0; n < tree.NodeCount; n++) { if (tree.GetNode(n).IsLeaf) { if (tree.GetNode(n).TrainingDataStatistics.HistogramAggregator.SampleCount == 0) { unlabelledLeafIndices.Add(leafIndices.Count); } else { labelledLeafIndices.Add(leafIndices.Count); } leafIndices.Add(n); } } // Build an upper triangular matrix of inter-leaf distances float[,] interLeafDistances = new float[leafIndices.Count, leafIndices.Count]; for (int i = 0; i < leafIndices.Count; i++) { for (int j = i + 1; j < leafIndices.Count; j++) { SemiSupervisedClassificationStatisticsAggregator a = tree.GetNode(leafIndices[i]).TrainingDataStatistics; SemiSupervisedClassificationStatisticsAggregator b = tree.GetNode(leafIndices[j]).TrainingDataStatistics; GaussianPdf2d x = a.GaussianAggregator2d.GetPdf(); GaussianPdf2d y = b.GaussianAggregator2d.GetPdf(); interLeafDistances[i, j] = (float)(Math.Max( x.GetNegativeLogProbability((float)(y.MeanX), (float)(y.MeanY)), +y.GetNegativeLogProbability((float)(x.MeanX), (float)(x.MeanY)))); } } // Find shortest paths between all pairs of nodes in the graph of leaf nodes FloydWarshall pathFinder = new FloydWarshall(interLeafDistances); // Find the closest labelled leaf to each unlabelled leaf float[] minDistances = new float[unlabelledLeafIndices.Count]; closestLabelledLeafIndices = new int[unlabelledLeafIndices.Count]; for (int i = 0; i < minDistances.Length; i++) { minDistances[i] = float.PositiveInfinity; closestLabelledLeafIndices[i] = -1; // unused so deliberately invalid } for (int l = 0; l < labelledLeafIndices.Count; l++) { for (int u = 0; u < unlabelledLeafIndices.Count; u++) { if (pathFinder.GetMinimumDistance(unlabelledLeafIndices[u], labelledLeafIndices[l]) < minDistances[u]) { minDistances[u] = pathFinder.GetMinimumDistance(unlabelledLeafIndices[u], labelledLeafIndices[l]); closestLabelledLeafIndices[u] = leafIndices[labelledLeafIndices[l]]; } } } // Propagate class probability distributions to each unlabelled // leaf from its nearest labelled leaf. for (int u = 0; u < unlabelledLeafIndices.Count; u++) { // Unhelpfully, C# only allows us to pass value types by value // so Tree.GetNode() returns only a COPY of the Node. We update // this copy and then copy it back over the top of the // original via Tree.SetNode(). // The C++ version is a lot better! var unlabelledLeafCopy = tree.GetNode(leafIndices[unlabelledLeafIndices[u]]); var labelledLeafCopy = tree.GetNode(closestLabelledLeafIndices[u]); unlabelledLeafCopy.TrainingDataStatistics.HistogramAggregator = (HistogramAggregator)(labelledLeafCopy.TrainingDataStatistics.HistogramAggregator.DeepClone()); tree.SetNode(leafIndices[unlabelledLeafIndices[u]], unlabelledLeafCopy); } } return(forest); }
/// <summary> /// Apply a trained forest to some test data. /// </summary> /// <typeparam name="F">Type of split function</typeparam> /// <param name="forest">Trained forest</param> /// <param name="testData">Test data</param> /// <returns>An array of class distributions, one per test data point</returns> public static HistogramAggregator[] Test <F>(Forest <F, HistogramAggregator> forest, DataPointCollection testData) where F : IFeatureResponse { int nClasses = forest.GetTree(0).GetNode(0).TrainingDataStatistics.BinCount; int[][] leafIndicesPerTree = forest.Apply(testData); HistogramAggregator[] result = new HistogramAggregator[testData.Count()]; for (int i = 0; i < testData.Count(); i++) { // Aggregate statistics for this sample over all leaf nodes reached result[i] = new HistogramAggregator(nClasses); for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafIndicesPerTree[t][i]; result[i].Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics); } } return(result); }
public static Bitmap Visualize <F>( Forest <F, HistogramAggregator> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation) where F : IFeatureResponse { // Size PlotSize = new Size(300, 300), PointF PlotDilation = new PointF(0.1f, 0.1f) // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation); DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height); Console.WriteLine("\nApplying the forest to test data..."); int[][] leafNodeIndices = forest.Apply(testData); // Form a palette of random colors, one per class Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)]; // First few colours are same as those in the book, remainder are random. colors[0] = Color.FromArgb(183, 170, 8); colors[1] = Color.FromArgb(194, 32, 14); colors[2] = Color.FromArgb(4, 154, 10); colors[3] = Color.FromArgb(13, 26, 188); Color grey = Color.FromArgb(255, 127, 127, 127); System.Random r = new Random(0); // same seed every time so colours will be consistent for (int c = 4; c < colors.Length; c++) { colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255)); } // Create a visualization image Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // For each pixel... int index = 0; for (int j = 0; j < PlotSize.Height; j++) { for (int i = 0; i < PlotSize.Width; i++) { // Aggregate statistics for this sample over all leaf nodes reached HistogramAggregator h = new HistogramAggregator(trainingData.CountClasses()); for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafNodeIndices[t][index]; h.Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics); } // Let's muddy the colors with grey where the entropy is high. float mudiness = 0.5f * (float)(h.Entropy()); float R = 0.0f, G = 0.0f, B = 0.0f; for (int b = 0; b < trainingData.CountClasses(); b++) { float p = (1.0f - mudiness) * h.GetProbability(b); // NB probabilities sum to 1.0 over the classes R += colors[b].R * p; G += colors[b].G * p; B += colors[b].B * p; } R += grey.R * mudiness; G += grey.G * mudiness; B += grey.B * mudiness; Color c = Color.FromArgb(255, (byte)(R), (byte)(G), (byte)(B)); result.SetPixel(i, j, c); // painfully slow but safe index++; } } // Also draw the original training data using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; for (int s = 0; s < trainingData.Count(); s++) { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 3.0f, x.Y - 3.0f, 6.0f, 6.0f); g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle); g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } return(result); }
public static Bitmap VisualizeLabels(Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation) { // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation); // Apply the trained forest to the test data Console.WriteLine("\nApplying the forest to test data..."); DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height); int[][] leafNodeIndices = forest.Apply(testData); Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // Paint the test data GaussianPdf2d[][] leafDistributions = new GaussianPdf2d[forest.TreeCount][]; for (int t = 0; t < forest.TreeCount; t++) { leafDistributions[t] = new GaussianPdf2d[forest.GetTree(t).NodeCount]; for (int i = 0; i < forest.GetTree(t).NodeCount; i++) { Node <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> nodeCopy = forest.GetTree(t).GetNode(i); if (nodeCopy.IsLeaf) { leafDistributions[t][i] = nodeCopy.TrainingDataStatistics.GaussianAggregator2d.GetPdf(); } } } // Form a palette of random colors, one per class Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)]; // First few colours are same as those in the book, remainder are random. colors[0] = Color.FromArgb(183, 170, 8); colors[1] = Color.FromArgb(194, 32, 14); colors[2] = Color.FromArgb(4, 154, 10); colors[3] = Color.FromArgb(13, 26, 188); Color grey = Color.FromArgb(255, 127, 127, 127); System.Random r = new Random(0); // same seed every time so colours will be consistent for (int c = 4; c < colors.Length; c++) { colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255)); } int index = 0; for (int j = 0; j < PlotSize.Height; j++) { for (int i = 0; i < PlotSize.Width; i++) { // Aggregate statistics for this sample over all leaf nodes reached HistogramAggregator h = new HistogramAggregator(trainingData.CountClasses()); for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafNodeIndices[t][index]; SemiSupervisedClassificationStatisticsAggregator a = forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics; h.Aggregate(a.HistogramAggregator); } // Let's muddy the colors with a little grey where entropy is high. float mudiness = 0.5f * (float)(h.Entropy()); float R = 0.0f, G = 0.0f, B = 0.0f; for (int b = 0; b < trainingData.CountClasses(); b++) { float p = (1.0f - mudiness) * h.GetProbability(b); // NB probabilities sum to 1.0 over the classes R += colors[b].R * p; G += colors[b].G * p; B += colors[b].B * p; } R += grey.R * mudiness; G += grey.G * mudiness; B += grey.B * mudiness; Color c = Color.FromArgb(255, (byte)(R), (byte)(G), (byte)(B)); result.SetPixel(i, j, c); index++; } } PaintTrainingData(trainingData, plotCanvas, result); return(result); }
public float GetResponse(IDataPointCollection data, int sampleIndex) { DataPointCollection concreteData = (DataPointCollection)(data); return(concreteData.GetDataPoint((int)sampleIndex)[axis_]); }
public float GetResponse(IDataPointCollection data, int index) { DataPointCollection concreteData = (DataPointCollection)(data); return(dx_ * concreteData.GetDataPoint((int)index)[0] + dy_ * concreteData.GetDataPoint((int)index)[1]); }
/// <summary> /// Load a collection of data from a tab-delimited file with one data point /// per line. The data may optionally have associated with class labels /// (first element on line) and/or target values (last element on line). /// </summary> /// <param name="path">Path of file to be read.</param> /// <param name="bHasClassLabels">Are the data associated with class labels?</param> /// <param name="dataDimension">Dimension of the data (excluding class labels and target values).</param> /// <param name="bHasTargetValues">Are the data associated with target values.</param> static public DataPointCollection Load(System.IO.Stream stream, int dataDimension, DataDescriptor descriptor) { bool bHasTargetValues = (descriptor & DataDescriptor.HasTargetValues) == DataDescriptor.HasTargetValues; bool bHasClassLabels = (descriptor & DataDescriptor.HasClassLabels) == DataDescriptor.HasClassLabels; DataPointCollection result = new DataPointCollection(); result.data_ = new List <float[]>(); result.labels_ = bHasClassLabels ? new List <int>() : null; result.targets_ = bHasTargetValues ? new List <float>() : null; result.dimension_ = dataDimension; char[] seperators = new char[] { '\t' }; int elementsPerLine = (bHasClassLabels ? 1 : 0) + dataDimension + (bHasTargetValues ? 1 : 0); using (System.IO.StreamReader r = new System.IO.StreamReader(stream)) { string line; while ((line = r.ReadLine()) != null) { string[] elements = line.Split(seperators); if (elements.Length != elementsPerLine) { throw new Exception("Encountered line with unexpected number of elements."); } int index = 0; if (bHasClassLabels) { if (!String.IsNullOrEmpty(elements[index])) { if (!result.labelIndices_.ContainsKey(elements[index])) { result.labelIndices_.Add(elements[index], result.labelIndices_.Count); } result.labels_.Add(result.labelIndices_[elements[index++]]); } else { result.labels_.Add(UnknownClassLabel); index++; } } float[] datum = new float[dataDimension]; for (int i = 0; i < dataDimension; i++) { datum[i] = Convert.ToSingle(elements[index++]); } result.data_.Add(datum); if (bHasTargetValues) { result.targets_.Add(Convert.ToSingle(elements[index++])); } } } return(result); }
public static Bitmap Visualize( Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation) { // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation); // Apply the trained forest to the test data Console.WriteLine("\nApplying the forest to test data..."); DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height); int[][] leafNodeIndices = forest.Apply(testData); // Compute normalization factors per node int nTrainingPoints = (int)(trainingData.Count()); // could also count over tree nodes if training data no longer accessible double[][] normalizationFactors = new double[forest.TreeCount][]; for (int t = 0; t < forest.TreeCount; t++) { normalizationFactors[t] = new double[forest.GetTree(t).NodeCount]; ComputeNormalizationFactorsRecurse(forest.GetTree(t), 0, nTrainingPoints, new Bounds(2), normalizationFactors[t]); } Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // Paint the test data int index = 0; for (int j = 0; j < PlotSize.Height; j++) { for (int i = 0; i < PlotSize.Width; i++) { // Map pixel coordinate (i,j) in visualization image back to point in input space float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX; float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY; // Aggregate statistics for this sample over all trees double probability = 0.0; for (int t = 0; t < forest.TreeCount; t++) { int leafIndex = leafNodeIndices[t][index]; probability += normalizationFactors[t][leafIndex] * forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics.GetPdf().GetProbability(x, y); } probability /= forest.TreeCount; // 'Gamma correct' probability density for better display float l = (float)(LuminanceScaleFactor * Math.Pow(probability, Gamma)); if (l < 0) { l = 0; } else if (l > 255) { l = 255; } Color c = Color.FromArgb(255, (byte)(l), 0, 0); result.SetPixel(i, j, c); index++; } } // Also plot the original training data using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; for (int s = 0; s < trainingData.Count(); s++) { PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f); g.FillRectangle(new SolidBrush(DataPointColor), rectangle); g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } return(result); }
public Bitmap Run(DataPointCollection trainingData) { // Train the forest Console.WriteLine("Training the forest..."); Random random = new Random(); ITrainingContext <AxisAlignedFeatureResponse, LinearFitAggregator1d> regressionTrainingContext = new RegressionTrainingContext(); var forest = ForestTrainer <AxisAlignedFeatureResponse, LinearFitAggregator1d> .TrainForest( random, TrainingParameters, regressionTrainingContext, trainingData); // Generate some test samples in a grid pattern (a useful basis for creating visualization images) PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetTargetRange(), PlotSize, PlotDilation); DataPointCollection testData = DataPointCollection.Generate1dGrid(plotCanvas.plotRangeX, PlotSize.Width); // Apply the trained forest to the test data Console.WriteLine("\nApplying the forest to test data..."); int[][] leafNodeIndices = forest.Apply(testData); #region Generate Visualization Image Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height); // Plot the learned density Color inverseDensityColor = Color.FromArgb(255, 255 - DensityColor.R, 255 - DensityColor.G, 255 - DensityColor.B); double[] mean_y_given_x = new double[PlotSize.Width]; int index = 0; for (int i = 0; i < PlotSize.Width; i++) { double totalProbability = 0.0; for (int j = 0; j < PlotSize.Height; j++) { // Map pixel coordinate (i,j) in visualization image back to point in input space float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX; float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY; double probability = 0.0; // Aggregate statistics for this sample over all trees for (int t = 0; t < forest.TreeCount; t++) { Node <AxisAlignedFeatureResponse, LinearFitAggregator1d> leafNodeCopy = forest.GetTree(t).GetNode(leafNodeIndices[t][i]); LinearFitAggregator1d leafStatistics = leafNodeCopy.TrainingDataStatistics; probability += leafStatistics.GetProbability(x, y); } probability /= forest.TreeCount; mean_y_given_x[i] += probability * y; totalProbability += probability; float scale = 10.0f * (float)probability; Color weightedColor = Color.FromArgb( 255, (byte)(Math.Min(scale * inverseDensityColor.R + 0.5f, 255.0f)), (byte)(Math.Min(scale * inverseDensityColor.G + 0.5f, 255.0f)), (byte)(Math.Min(scale * inverseDensityColor.B + 0.5f, 255.0f))); Color c = Color.FromArgb(255, 255 - weightedColor.R, 255 - weightedColor.G, 255 - weightedColor.G); result.SetPixel(i, j, c); index++; } // NB We don't really compute the mean over y, just over the region of y that is plotted mean_y_given_x[i] /= totalProbability; } // Also plot the mean curve and the original training data using (Graphics g = Graphics.FromImage(result)) { g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic; g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; using (Pen meanPen = new Pen(MeanColor, 2)) { for (int i = 0; i < PlotSize.Width - 1; i++) { g.DrawLine( meanPen, (float)(i), (float)((mean_y_given_x[i] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY), (float)(i + 1), (float)((mean_y_given_x[i + 1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY)); } } using (Brush dataPointBrush = new SolidBrush(DataPointColor)) using (Pen dataPointBorderPen = new Pen(DataPointBorderColor)) { for (int s = 0; s < trainingData.Count(); s++) { // Map sample coordinate back to a pixel coordinate in the visualization image PointF x = new PointF( (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX, (trainingData.GetTarget(s) - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY); RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f); g.FillRectangle(dataPointBrush, rectangle); g.DrawRectangle(dataPointBorderPen, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } } return(result); #endregion }