public void Aggregate(IDataPointCollection data, int index)
        {
            DataPointCollection concreteData = (DataPointCollection)(data);

            bins_[concreteData.GetIntegerLabel((int)index)]++;
            sampleCount_ += 1;
        }
        /// <summary>
        /// Generate a 2D dataset with data points distributed in a grid pattern.
        /// Intended for generating visualization images.
        /// </summary>
        /// <param name="rangeX">x-axis range</param>
        /// <param name="nStepsX">Number of grid points in x direction</param>
        /// <param name="rangeY">y-axis range</param>
        /// <param name="nStepsY">Number of grid points in y direction</param>
        /// <returns>A new DataPointCollection</returns>
        static public DataPointCollection Generate2dGrid(
            Tuple <float, float> rangeX, int nStepsX,
            Tuple <float, float> rangeY, int nStepsY)
        {
            if (rangeX.Item1 >= rangeX.Item2)
            {
                throw new ArgumentException("Invalid x-axis range.");
            }
            if (rangeY.Item1 >= rangeY.Item2)
            {
                throw new ArgumentException("Invalid y-axis range.");
            }

            DataPointCollection result = new DataPointCollection();

            result.dimension_ = 2;
            result.data_      = new List <float[]>();

            float stepX = (rangeX.Item2 - rangeX.Item1) / nStepsX;
            float stepY = (rangeY.Item2 - rangeY.Item1) / nStepsY;

            for (int j = 0; j < nStepsY; j++)
            {
                for (int i = 0; i < nStepsX; i++)
                {
                    result.data_.Add(new float[] { rangeX.Item1 + i * stepX, rangeY.Item1 + j * stepY });
                }
            }

            return(result);
        }
        static public Forest <F, HistogramAggregator> Train <F>(
            DataPointCollection trainingData,
            IFeatureFactory <F> featureFactory,
            TrainingParameters TrainingParameters) where F : IFeatureResponse
        {
            if (trainingData.Dimensions != 2)
            {
                throw new Exception("Training data points must be 2D.");
            }
            if (trainingData.HasLabels == false)
            {
                throw new Exception("Training data points must be labelled.");
            }
            if (trainingData.HasTargetValues == true)
            {
                throw new Exception("Training data points should not have target values.");
            }

            Console.WriteLine("Running training...");

            Random random = new Random();
            ITrainingContext <F, HistogramAggregator> classificationContext =
                new ClassificationTrainingContext <F>(trainingData.CountClasses(), featureFactory, random);

            var forest = ForestTrainer <F, HistogramAggregator> .TrainForest(
                random,
                TrainingParameters,
                classificationContext,
                trainingData);

            return(forest);
        }
        static public Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> Train(
            DataPointCollection trainingData,
            TrainingParameters parameters,
            double a,
            double b)
        {
            if (trainingData.Dimensions != 2)
            {
                throw new Exception("Training data points for density estimation were not 2D.");
            }
            if (trainingData.HasLabels == true)
            {
                throw new Exception("Density estimation training data should not be labelled.");
            }
            if (trainingData.HasTargetValues == true)
            {
                throw new Exception("Training data should not have target values.");
            }

            // Train the forest
            Console.WriteLine("Training the forest...");

            Random random = new Random();

            ITrainingContext <AxisAlignedFeatureResponse, GaussianAggregator2d> densityEstimationTrainingContext =
                new DensityEstimationTrainingContext(a, b);
            var forest = ForestTrainer <AxisAlignedFeatureResponse, GaussianAggregator2d> .TrainForest(
                random,
                parameters,
                densityEstimationTrainingContext,
                trainingData);

            return(forest);
        }
        public static Bitmap VisualizeDensity(Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation)
        {
            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation);

            // Apply the trained forest to the test data
            Console.WriteLine("\nApplying the forest to test data...");

            DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height);

            int[][] leafNodeIndices = forest.Apply(testData);

            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // Paint the test data
            int index = 0;

            for (int j = 0; j < PlotSize.Height; j++)
            {
                for (int i = 0; i < PlotSize.Width; i++)
                {
                    // Map pixel coordinate (i,j) in visualization image back to point in input space
                    float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX;
                    float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY;

                    // Aggregate statistics for this sample over all trees
                    double probability = 0.0;
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        int leafIndex = leafNodeIndices[t][index];
                        probability += forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics.GaussianAggregator2d.GetPdf().GetProbability(x, y);
                    }

                    probability /= forest.TreeCount;

                    float l = (float)(LuminanceScaleFactor * probability);

                    if (l < 0)
                    {
                        l = 0;
                    }
                    else if (l > 255)
                    {
                        l = 255;
                    }

                    Color c = Color.FromArgb(255, (byte)(l), 0, 0);
                    result.SetPixel(i, j, c);

                    index++;
                }
            }

            PaintTrainingData(trainingData, plotCanvas, result);

            return(result);
        }
Beispiel #6
0
        public void Aggregate(IDataPointCollection data, int index, Object userData)
        {
            DataPointCollection concreteData  = (DataPointCollection)(data);
            HistogramData       histogramData = (HistogramData)(userData);

            histogramData.Increment(dataHandle_, concreteData.GetIntegerLabel((int)index));
            // bins_[concreteData.GetIntegerLabel((int)index)]++;
            sampleCount_ += 1;
        }
        static void PaintTrainingData(DataPointCollection trainingData, PlotCanvas plotCanvas, Bitmap result)
        {
            // First few colours are same as those in the book, remainder are random.
            Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)];
            colors[0] = Color.FromArgb(183, 170, 8);
            colors[1] = Color.FromArgb(194, 32, 14);
            colors[2] = Color.FromArgb(4, 154, 10);
            colors[3] = Color.FromArgb(13, 26, 188);

            System.Random r = new Random(0); // same seed every time so colours will be consistent
            for (int c = 4; c < colors.Length; c++)
            {
                colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255));
            }

            // Also plot the original training data (a little bigger for clarity)
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                // Paint unlabelled data
                for (int s = 0; s < trainingData.Count(); s++)
                {
                    if (trainingData.GetIntegerLabel(s) == DataPointCollection.UnknownClassLabel) // unlabelled
                    {
                        PointF x = new PointF(
                            (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                            (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                        RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f);
                        g.FillRectangle(new SolidBrush(UnlabelledDataPointColor), rectangle);
                        g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                    }
                }

                // Paint labelled data on top
                for (int s = 0; s < trainingData.Count(); s++)
                {
                    if (trainingData.GetIntegerLabel(s) != DataPointCollection.UnknownClassLabel)
                    {
                        PointF x = new PointF(
                            (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                            (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                        RectangleF rectangle = new RectangleF(x.X - 5.0f, x.Y - 5.0f, 10.0f, 10.0f);
                        g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle);
                        g.DrawRectangle(new Pen(Color.White, 2), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                    }
                }
            }
        }
        public void Aggregate(IDataPointCollection data, int index)
        {
            DataPointCollection concreteData = (DataPointCollection)(data);

            // Always aggregate density statistics
            GaussianAggregator2d.Aggregate(data, index);

            // Only aggregate histogram statistics for those data points that have class labels
            if (concreteData.GetIntegerLabel((int)(index)) != DataPointCollection.UnknownClassLabel)
            {
                HistogramAggregator.Aggregate(data, index);
            }
        }
Beispiel #9
0
        static DataPointCollection LoadTrainingData(
            string path,
            string alternativePath,
            int dimension,
            DataDescriptor dataDescriptor)
        {
            System.IO.FileStream stream = null;
            try
            {
                stream = new FileStream(path, FileMode.Open, FileAccess.Read);
            }
            catch (Exception)
            {
                string a = System.IO.Path.Combine(
                    Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "/",
                    alternativePath);
                a = System.IO.Path.Combine(a, path);
                try
                {
                    stream = new FileStream(a, FileMode.Open, FileAccess.Read);
                }
                catch (Exception)
                {
                    Console.WriteLine("Failed to open training data file at \"{0}\" or \"{1}\".", path, a);
                    Environment.Exit(-1);
                }
            }

            DataPointCollection trainingData = null;

            try
            {
                trainingData = DataPointCollection.Load(
                    stream,
                    dimension,
                    dataDescriptor);
            }
            catch (Exception e)
            {
                Console.WriteLine("Failed to read training data. " + e.Message);
                Environment.Exit(-1);
            }

            if (trainingData.Count() < 1)
            {
                Console.WriteLine("Insufficient training data.");
                Environment.Exit(-1);
            }

            return(trainingData);
        }
        public void Aggregate(IDataPointCollection data, int index)
        {
            DataPointCollection concreteData = (DataPointCollection)(data);

            sx_ += concreteData.GetDataPoint((int)index)[0];
            sy_ += concreteData.GetDataPoint((int)index)[1];

            sxx_ += Math.Pow(concreteData.GetDataPoint((int)index)[0], 2.0);
            syy_ += Math.Pow(concreteData.GetDataPoint((int)index)[1], 2.0);

            sxy_ += concreteData.GetDataPoint((int)index)[0] * concreteData.GetDataPoint((int)index)[1];

            sampleCount_ += 1;
        }
        public void Aggregate(IDataPointCollection data, int index)
        {
            DataPointCollection concreteData = (DataPointCollection)(data);

            float[] datum  = concreteData.GetDataPoint((int)index);
            float   target = concreteData.GetTarget((int)index);

            XT_X_11_ += datum[0] * datum[0];
            XT_X_12_ += datum[0];
            XT_X_21_ += datum[0];
            XT_X_22_ += 1.0;

            XT_Y_1_ += datum[0] * target;
            XT_Y_2_ += target;

            Y2_ += target * target;

            sampleCount_ += 1;
        }
        /// <summary>
        /// Generate a 1D dataset containing a given number of data points
        /// distributed at regular intervals within a given range. Intended for
        /// generating visualization images.
        /// </summary>
        /// <param name="range">Range</param>
        /// <param name="nStepsX">Number of grid points</param>
        /// <returns>A new DataPointCollection</returns>
        static public DataPointCollection Generate1dGrid(Tuple <float, float> range, int nSteps)
        {
            if (range.Item1 >= range.Item2)
            {
                throw new ArgumentException("Invalid range.");
            }

            DataPointCollection result = new DataPointCollection();

            result.dimension_ = 1;
            result.data_      = new List <float[]>();

            float step = (range.Item2 - range.Item1) / nSteps;

            for (int i = 0; i < nSteps; i++)
            {
                result.data_.Add(new float[] { range.Item1 + i * step });
            }

            return(result);
        }
Beispiel #13
0
        static void Main(string[] args)
        {
            if (args.Length == 0 || args[0] == "/?" || args[0].ToLower() == "help")
            {
                DisplayHelp();
                return;
            }

            // These command line parameters are reused over several command line modes...
            StringParameter       trainingDataPath = new StringParameter("path", "Path of file containing training data.");
            NaturalParameter      T             = new NaturalParameter("t", "No. of trees in the forest (default = {0}).", 10);
            NaturalParameter      D             = new NaturalParameter("d", "Maximum tree levels (default = {0}).", 10, 20);
            NaturalParameter      F             = new NaturalParameter("f", "No. of candidate feature responses per decision node (default = {0}).", 10);
            NaturalParameter      L             = new NaturalParameter("l", "No. of candidate thresholds per feature response (default = {0}).", 1);
            SingleParameter       a             = new SingleParameter("a", "The number of 'effective' prior observations (default = {0}).", true, false, 10.0f);
            SingleParameter       b             = new SingleParameter("b", "The variance of the effective observations (default = {0}).", true, true, 400.0f);
            SimpleSwitchParameter verboseSwitch = new SimpleSwitchParameter("Enables verbose progress indication.");
            SingleParameter       plotPaddingX  = new SingleParameter("padx", "Pad plot horizontally (default = {0}).", true, false, 0.1f);
            SingleParameter       plotPaddingY  = new SingleParameter("pady", "Pad plot vertically (default = {0}).", true, false, 0.1f);
            EnumParameter         split         = new EnumParameter(
                "s",
                "Specify what kind of split function to use (default = {0}).",
                new string[] { "axis", "linear" },
                new string[] { "axis-aligned split", "linear split" },
                "axis");

            // Behaviour depends on command line mode...
            string mode = args[0].ToLower(); // first argument defines the command line mode

            if (mode == "clas" || mode == "class")
            {
                #region Supervised classification
                CommandLineParser parser = new CommandLineParser();

                parser.Command = "SW " + mode.ToUpper();

                parser.AddArgument(trainingDataPath);
                parser.AddSwitch("T", T);
                parser.AddSwitch("D", D);
                parser.AddSwitch("F", F);
                parser.AddSwitch("L", L);
                parser.AddSwitch("SPLIT", split);

                parser.AddSwitch("PADX", plotPaddingX);
                parser.AddSwitch("PADY", plotPaddingY);
                parser.AddSwitch("VERBOSE", verboseSwitch);

                // Default values up above should be fine here.

                if (args.Length == 1)
                {
                    parser.PrintHelp();
                    DisplayTextFiles(CLAS_DATA_PATH);
                    return;
                }

                if (parser.Parse(args, 1) == false)
                {
                    return;
                }

                TrainingParameters trainingParameters = new TrainingParameters()
                {
                    MaxDecisionLevels                     = D.Value - 1,
                    NumberOfCandidateFeatures             = F.Value,
                    NumberOfCandidateThresholdsPerFeature = L.Value,
                    NumberOfTrees = T.Value,
                    Verbose       = verboseSwitch.Used
                };

                PointF plotDilation = new PointF(plotPaddingX.Value, plotPaddingY.Value);

                DataPointCollection trainingData = LoadTrainingData(
                    trainingDataPath.Value,
                    CLAS_DATA_PATH,
                    2,
                    DataDescriptor.HasClassLabels);

                if (split.Value == "linear")
                {
                    Forest <LinearFeatureResponse2d, HistogramAggregator> forest = ClassificationExample.Train(
                        trainingData,
                        new LinearFeatureFactory(),
                        trainingParameters);

                    using (Bitmap result = ClassificationExample.Visualize(forest, trainingData, new Size(300, 300), plotDilation))
                    {
                        ShowVisualizationImage(result);
                    }
                }
                else if (split.Value == "axis")
                {
                    Forest <AxisAlignedFeatureResponse, HistogramAggregator> forest = ClassificationExample.Train(
                        trainingData,
                        new AxisAlignedFeatureFactory(),
                        trainingParameters);

                    using (Bitmap result = ClassificationExample.Visualize(forest, trainingData, new Size(300, 300), plotDilation))
                    {
                        ShowVisualizationImage(result);
                    }
                }
                #endregion
            }
            else if (mode == "density")
            {
                #region Density Estimation
                CommandLineParser parser = new CommandLineParser();

                parser.Command = "SW " + mode.ToUpper();

                parser.AddArgument(trainingDataPath);
                parser.AddSwitch("T", T);
                parser.AddSwitch("D", D);
                parser.AddSwitch("F", F);
                parser.AddSwitch("L", L);

                // For density estimation (and semi-supervised learning) we add
                // a command line option to set the hyperparameters of the prior.
                parser.AddSwitch("a", a);
                parser.AddSwitch("b", b);

                parser.AddSwitch("PADX", plotPaddingX);
                parser.AddSwitch("PADY", plotPaddingY);
                parser.AddSwitch("VERBOSE", verboseSwitch);

                // Override default values for command line options.
                T.Value = 1;
                D.Value = 3;
                F.Value = 5;
                L.Value = 1;
                a.Value = 0;
                b.Value = 900;

                if (args.Length == 1)
                {
                    parser.PrintHelp();
                    DisplayTextFiles(DENSITY_DATA_PATH);
                    return;
                }

                if (parser.Parse(args, 1) == false)
                {
                    return;
                }

                TrainingParameters parameters = new TrainingParameters()
                {
                    MaxDecisionLevels                     = D.Value - 1,
                    NumberOfCandidateFeatures             = F.Value,
                    NumberOfCandidateThresholdsPerFeature = L.Value,
                    NumberOfTrees = T.Value,
                    Verbose       = verboseSwitch.Used
                };

                DataPointCollection trainingData = LoadTrainingData(
                    trainingDataPath.Value,
                    DENSITY_DATA_PATH,
                    2,
                    DataDescriptor.Unadorned);

                Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> forest = DensityEstimationExample.Train(trainingData, parameters, a.Value, b.Value);

                PointF plotDilation = new PointF(plotPaddingX.Value, plotPaddingY.Value);

                using (Bitmap result = DensityEstimationExample.Visualize(forest, trainingData, new Size(300, 300), plotDilation))
                {
                    ShowVisualizationImage(result);
                }
                #endregion
            }
            else if (mode == "ssclas" || mode == "ssclas")
            {
                #region Semi-supervised classification

                CommandLineParser parser = new CommandLineParser();

                parser.Command = "SW " + mode.ToUpper();

                parser.AddArgument(trainingDataPath);
                parser.AddSwitch("T", T);
                parser.AddSwitch("D", D);
                parser.AddSwitch("F", F);
                parser.AddSwitch("L", L);

                parser.AddSwitch("split", split);

                parser.AddSwitch("a", a);
                parser.AddSwitch("b", b);

                EnumParameter plotMode = new EnumParameter(
                    "plot",
                    "Determines what to plot",
                    new string[] { "density", "labels" },
                    new string[] { "plot recovered density estimate", "plot class likelihood" },
                    "labels");
                parser.AddSwitch("plot", plotMode);

                parser.AddSwitch("PADX", plotPaddingX);
                parser.AddSwitch("PADY", plotPaddingY);

                parser.AddSwitch("VERBOSE", verboseSwitch);

                // Override default values for command line options.
                T.Value = 10;
                D.Value = 12 - 1;
                F.Value = 30;
                L.Value = 1;

                if (args.Length == 1)
                {
                    parser.PrintHelp();
                    DisplayTextFiles(SSCLAS_DATA_PATH);
                    return;
                }

                if (parser.Parse(args, 1) == false)
                {
                    return;
                }

                DataPointCollection trainingData = LoadTrainingData(
                    trainingDataPath.Value,
                    SSCLAS_DATA_PATH,
                    2,
                    DataDescriptor.HasClassLabels);

                TrainingParameters parameters = new TrainingParameters()
                {
                    MaxDecisionLevels                     = D.Value - 1,
                    NumberOfCandidateFeatures             = F.Value,
                    NumberOfCandidateThresholdsPerFeature = L.Value,
                    NumberOfTrees = T.Value,
                    Verbose       = verboseSwitch.Used
                };

                Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> forest = SemiSupervisedClassificationExample.Train(
                    trainingData, parameters, a.Value, b.Value);

                PointF plotPadding = new PointF(plotPaddingX.Value, plotPaddingY.Value);

                if (plotMode.Value == "labels")
                {
                    using (Bitmap result = SemiSupervisedClassificationExample.VisualizeLabels(forest, trainingData, new Size(300, 300), plotPadding))
                    {
                        ShowVisualizationImage(result);
                    }
                }
                else if (plotMode.Value == "density")
                {
                    using (Bitmap result = SemiSupervisedClassificationExample.VisualizeDensity(forest, trainingData, new Size(300, 300), plotPadding))
                    {
                        ShowVisualizationImage(result);
                    }
                }
                #endregion
            }
            else if (mode == "regression")
            {
                #region Regression
                CommandLineParser parser = new CommandLineParser();
                parser.Command = "SW " + mode.ToUpper();

                parser.AddArgument(trainingDataPath);
                parser.AddSwitch("T", T);
                parser.AddSwitch("D", D);
                parser.AddSwitch("F", F);
                parser.AddSwitch("L", L);

                parser.AddSwitch("PADX", plotPaddingX);
                parser.AddSwitch("PADY", plotPaddingY);
                parser.AddSwitch("VERBOSE", verboseSwitch);

                // Override default values for command line options
                T.Value = 10;
                D.Value = 2;
                a.Value = 0; // prior turned off by default
                b.Value = 900;

                if (args.Length == 1)
                {
                    parser.PrintHelp();
                    DisplayTextFiles(REGRESSION_DATA_PATH);
                    return;
                }

                if (parser.Parse(args, 1) == false)
                {
                    return;
                }

                RegressionExample regressionDemo = new RegressionExample();

                regressionDemo.PlotDilation.X = plotPaddingX.Value;
                regressionDemo.PlotDilation.Y = plotPaddingY.Value;

                regressionDemo.TrainingParameters = new TrainingParameters()
                {
                    MaxDecisionLevels                     = D.Value - 1,
                    NumberOfCandidateFeatures             = F.Value,
                    NumberOfCandidateThresholdsPerFeature = L.Value,
                    NumberOfTrees = T.Value,
                    Verbose       = verboseSwitch.Used
                };

                DataPointCollection trainingData = LoadTrainingData(
                    trainingDataPath.Value,
                    REGRESSION_DATA_PATH,
                    1,
                    DataDescriptor.HasTargetValues);

                using (Bitmap result = regressionDemo.Run(trainingData))
                {
                    ShowVisualizationImage(result);
                }
                #endregion
            }
            else
            {
                Console.WriteLine("Unrecognized command line argument, try SW HELP.");
                return;
            }
        }
        /// <summary>
        /// Generate a 1D dataset containing a given number of data points
        /// distributed at regular intervals within a given range. Intended for
        /// generating visualization images.
        /// </summary>
        /// <param name="range">Range</param>
        /// <param name="nStepsX">Number of grid points</param>
        /// <returns>A new DataPointCollection</returns>
        public static DataPointCollection Generate1dGrid(Tuple<float, float> range, int nSteps)
        {
            if (range.Item1 >= range.Item2)
            throw new ArgumentException("Invalid range.");

              DataPointCollection result = new DataPointCollection();

              result.dimension_ = 1;
              result.data_ = new List<float[]>();

              float step = (range.Item2 - range.Item1) / nSteps;

              for (int i = 0; i < nSteps; i++)
            result.data_.Add(new float[] { range.Item1 + i * step });

              return result;
        }
        /// <summary>
        /// Load a collection of data from a tab-delimited file with one data point
        /// per line. The data may optionally have associated with class labels
        /// (first element on line) and/or target values (last element on line).
        /// </summary>
        /// <param name="path">Path of file to be read.</param>
        /// <param name="bHasClassLabels">Are the data associated with class labels?</param>
        /// <param name="dataDimension">Dimension of the data (excluding class labels and target values).</param>
        /// <param name="bHasTargetValues">Are the data associated with target values.</param>
        public static DataPointCollection Load(System.IO.Stream stream, int dataDimension, DataDescriptor descriptor)
        {
            bool bHasTargetValues = (descriptor & DataDescriptor.HasTargetValues) == DataDescriptor.HasTargetValues;
              bool bHasClassLabels = (descriptor & DataDescriptor.HasClassLabels) == DataDescriptor.HasClassLabels;

              DataPointCollection result = new DataPointCollection();
              result.data_ = new List<float[]>();
              result.labels_ = bHasClassLabels ? new List<int>() : null;
              result.targets_ = bHasTargetValues ? new List<float>() : null;
              result.dimension_ = dataDimension;

              char[] seperators = new char[] { '\t' };

              int elementsPerLine = (bHasClassLabels ? 1 : 0) + dataDimension + (bHasTargetValues ? 1 : 0);

              using (System.IO.StreamReader r = new System.IO.StreamReader(stream))
              {
            string line;
            while ((line = r.ReadLine()) != null)
            {
              string[] elements = line.Split(seperators);

              if (elements.Length != elementsPerLine)
            throw new Exception("Encountered line with unexpected number of elements.");

              int index = 0;

              if (bHasClassLabels)
              {
            if (!String.IsNullOrEmpty(elements[index]))
            {
              if (!result.labelIndices_.ContainsKey(elements[index]))
                result.labelIndices_.Add(elements[index], result.labelIndices_.Count);
              result.labels_.Add(result.labelIndices_[elements[index++]]);
            }
            else
            {
              result.labels_.Add(UnknownClassLabel);
              index++;
            }
              }

              float[] datum = new float[dataDimension];
              for (int i = 0; i < dataDimension; i++)
            datum[i] = Convert.ToSingle(elements[index++]);

              result.data_.Add(datum);

              if (bHasTargetValues)
            result.targets_.Add(Convert.ToSingle(elements[index++]));
            }
              }

              return result;
        }
        /// <summary>
        /// Generate a 2D dataset with data points distributed in a grid pattern.
        /// Intended for generating visualization images.
        /// </summary>
        /// <param name="rangeX">x-axis range</param>
        /// <param name="nStepsX">Number of grid points in x direction</param>
        /// <param name="rangeY">y-axis range</param>
        /// <param name="nStepsY">Number of grid points in y direction</param>
        /// <returns>A new DataPointCollection</returns>
        public static DataPointCollection Generate2dGrid(
        Tuple<float, float> rangeX, int nStepsX,
        Tuple<float, float> rangeY, int nStepsY)
        {
            if (rangeX.Item1 >= rangeX.Item2)
            throw new ArgumentException("Invalid x-axis range.");
              if (rangeY.Item1 >= rangeY.Item2)
            throw new ArgumentException("Invalid y-axis range.");

              DataPointCollection result = new DataPointCollection();

              result.dimension_ = 2;
              result.data_ = new List<float[]>();

              float stepX = (rangeX.Item2 - rangeX.Item1) / nStepsX;
              float stepY = (rangeY.Item2 - rangeY.Item1) / nStepsY;

              for (int j = 0; j < nStepsY; j++)
            for (int i = 0; i < nStepsX; i++)
              result.data_.Add(new float[] { rangeX.Item1 + i * stepX, rangeY.Item1 + j * stepY });

              return result;
        }
        public static Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> Train(
            DataPointCollection trainingData,
            TrainingParameters parameters,
            double a_,
            double b_)
        {
            // Train the forest
            Console.WriteLine("Training the forest...");

            Random random = new Random();

            ITrainingContext <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> classificationContext
                = new SemiSupervisedClassificationTrainingContext(trainingData.CountClasses(), random, a_, b_);
            var forest = ForestTrainer <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> .TrainForest(
                random,
                parameters,
                classificationContext,
                trainingData);

            // Label transduction to unlabelled leaves from nearest labelled leaf
            List <int> unlabelledLeafIndices = null;
            List <int> labelledLeafIndices   = null;

            int[]      closestLabelledLeafIndices = null;
            List <int> leafIndices = null;

            for (int t = 0; t < forest.TreeCount; t++)
            {
                var tree = forest.GetTree(t);
                leafIndices = new List <int>();

                unlabelledLeafIndices = new List <int>();
                labelledLeafIndices   = new List <int>();

                for (int n = 0; n < tree.NodeCount; n++)
                {
                    if (tree.GetNode(n).IsLeaf)
                    {
                        if (tree.GetNode(n).TrainingDataStatistics.HistogramAggregator.SampleCount == 0)
                        {
                            unlabelledLeafIndices.Add(leafIndices.Count);
                        }
                        else
                        {
                            labelledLeafIndices.Add(leafIndices.Count);
                        }

                        leafIndices.Add(n);
                    }
                }

                // Build an upper triangular matrix of inter-leaf distances
                float[,] interLeafDistances = new float[leafIndices.Count, leafIndices.Count];
                for (int i = 0; i < leafIndices.Count; i++)
                {
                    for (int j = i + 1; j < leafIndices.Count; j++)
                    {
                        SemiSupervisedClassificationStatisticsAggregator a = tree.GetNode(leafIndices[i]).TrainingDataStatistics;
                        SemiSupervisedClassificationStatisticsAggregator b = tree.GetNode(leafIndices[j]).TrainingDataStatistics;
                        GaussianPdf2d x = a.GaussianAggregator2d.GetPdf();
                        GaussianPdf2d y = b.GaussianAggregator2d.GetPdf();

                        interLeafDistances[i, j] = (float)(Math.Max(
                                                               x.GetNegativeLogProbability((float)(y.MeanX), (float)(y.MeanY)),
                                                               +y.GetNegativeLogProbability((float)(x.MeanX), (float)(x.MeanY))));
                    }
                }

                // Find shortest paths between all pairs of nodes in the graph of leaf nodes
                FloydWarshall pathFinder = new FloydWarshall(interLeafDistances);

                // Find the closest labelled leaf to each unlabelled leaf
                float[] minDistances = new float[unlabelledLeafIndices.Count];
                closestLabelledLeafIndices = new int[unlabelledLeafIndices.Count];
                for (int i = 0; i < minDistances.Length; i++)
                {
                    minDistances[i] = float.PositiveInfinity;
                    closestLabelledLeafIndices[i] = -1; // unused so deliberately invalid
                }

                for (int l = 0; l < labelledLeafIndices.Count; l++)
                {
                    for (int u = 0; u < unlabelledLeafIndices.Count; u++)
                    {
                        if (pathFinder.GetMinimumDistance(unlabelledLeafIndices[u], labelledLeafIndices[l]) < minDistances[u])
                        {
                            minDistances[u] = pathFinder.GetMinimumDistance(unlabelledLeafIndices[u], labelledLeafIndices[l]);
                            closestLabelledLeafIndices[u] = leafIndices[labelledLeafIndices[l]];
                        }
                    }
                }

                // Propagate class probability distributions to each unlabelled
                // leaf from its nearest labelled leaf.
                for (int u = 0; u < unlabelledLeafIndices.Count; u++)
                {
                    // Unhelpfully, C# only allows us to pass value types by value
                    // so Tree.GetNode() returns only a COPY of the Node. We update
                    // this copy and then copy it back over the top of the
                    // original via Tree.SetNode().

                    // The C++ version is a lot better!

                    var unlabelledLeafCopy = tree.GetNode(leafIndices[unlabelledLeafIndices[u]]);
                    var labelledLeafCopy   = tree.GetNode(closestLabelledLeafIndices[u]);

                    unlabelledLeafCopy.TrainingDataStatistics.HistogramAggregator
                        = (HistogramAggregator)(labelledLeafCopy.TrainingDataStatistics.HistogramAggregator.DeepClone());

                    tree.SetNode(leafIndices[unlabelledLeafIndices[u]], unlabelledLeafCopy);
                }
            }

            return(forest);
        }
        /// <summary>
        /// Apply a trained forest to some test data.
        /// </summary>
        /// <typeparam name="F">Type of split function</typeparam>
        /// <param name="forest">Trained forest</param>
        /// <param name="testData">Test data</param>
        /// <returns>An array of class distributions, one per test data point</returns>
        public static HistogramAggregator[] Test <F>(Forest <F, HistogramAggregator> forest, DataPointCollection testData) where F : IFeatureResponse
        {
            int nClasses = forest.GetTree(0).GetNode(0).TrainingDataStatistics.BinCount;

            int[][] leafIndicesPerTree = forest.Apply(testData);

            HistogramAggregator[] result = new HistogramAggregator[testData.Count()];

            for (int i = 0; i < testData.Count(); i++)
            {
                // Aggregate statistics for this sample over all leaf nodes reached
                result[i] = new HistogramAggregator(nClasses);
                for (int t = 0; t < forest.TreeCount; t++)
                {
                    int leafIndex = leafIndicesPerTree[t][i];
                    result[i].Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics);
                }
            }

            return(result);
        }
        public static Bitmap Visualize <F>(
            Forest <F, HistogramAggregator> forest,
            DataPointCollection trainingData,
            Size PlotSize,
            PointF PlotDilation) where F : IFeatureResponse
        {
            // Size PlotSize = new Size(300, 300), PointF PlotDilation = new PointF(0.1f, 0.1f)
            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation);

            DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height);

            Console.WriteLine("\nApplying the forest to test data...");
            int[][] leafNodeIndices = forest.Apply(testData);

            // Form a palette of random colors, one per class
            Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)];

            // First few colours are same as those in the book, remainder are random.
            colors[0] = Color.FromArgb(183, 170, 8);
            colors[1] = Color.FromArgb(194, 32, 14);
            colors[2] = Color.FromArgb(4, 154, 10);
            colors[3] = Color.FromArgb(13, 26, 188);

            Color grey = Color.FromArgb(255, 127, 127, 127);

            System.Random r = new Random(0); // same seed every time so colours will be consistent
            for (int c = 4; c < colors.Length; c++)
            {
                colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255));
            }

            // Create a visualization image
            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // For each pixel...
            int index = 0;

            for (int j = 0; j < PlotSize.Height; j++)
            {
                for (int i = 0; i < PlotSize.Width; i++)
                {
                    // Aggregate statistics for this sample over all leaf nodes reached
                    HistogramAggregator h = new HistogramAggregator(trainingData.CountClasses());
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        int leafIndex = leafNodeIndices[t][index];
                        h.Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics);
                    }

                    // Let's muddy the colors with grey where the entropy is high.
                    float mudiness = 0.5f * (float)(h.Entropy());

                    float R = 0.0f, G = 0.0f, B = 0.0f;

                    for (int b = 0; b < trainingData.CountClasses(); b++)
                    {
                        float p = (1.0f - mudiness) * h.GetProbability(b); // NB probabilities sum to 1.0 over the classes

                        R += colors[b].R * p;
                        G += colors[b].G * p;
                        B += colors[b].B * p;
                    }

                    R += grey.R * mudiness;
                    G += grey.G * mudiness;
                    B += grey.B * mudiness;

                    Color c = Color.FromArgb(255, (byte)(R), (byte)(G), (byte)(B));

                    result.SetPixel(i, j, c); // painfully slow but safe

                    index++;
                }
            }

            // Also draw the original training data
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                for (int s = 0; s < trainingData.Count(); s++)
                {
                    PointF x = new PointF(
                        (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                        (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                    RectangleF rectangle = new RectangleF(x.X - 3.0f, x.Y - 3.0f, 6.0f, 6.0f);
                    g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle);
                    g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                }
            }

            return(result);
        }
        public static Bitmap VisualizeLabels(Forest <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> forest, DataPointCollection trainingData, Size PlotSize, PointF PlotDilation)
        {
            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation);

            // Apply the trained forest to the test data
            Console.WriteLine("\nApplying the forest to test data...");

            DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height);

            int[][] leafNodeIndices = forest.Apply(testData);

            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // Paint the test data
            GaussianPdf2d[][] leafDistributions = new GaussianPdf2d[forest.TreeCount][];
            for (int t = 0; t < forest.TreeCount; t++)
            {
                leafDistributions[t] = new GaussianPdf2d[forest.GetTree(t).NodeCount];
                for (int i = 0; i < forest.GetTree(t).NodeCount; i++)
                {
                    Node <LinearFeatureResponse2d, SemiSupervisedClassificationStatisticsAggregator> nodeCopy = forest.GetTree(t).GetNode(i);

                    if (nodeCopy.IsLeaf)
                    {
                        leafDistributions[t][i] = nodeCopy.TrainingDataStatistics.GaussianAggregator2d.GetPdf();
                    }
                }
            }

            // Form a palette of random colors, one per class
            Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)];

            // First few colours are same as those in the book, remainder are random.
            colors[0] = Color.FromArgb(183, 170, 8);
            colors[1] = Color.FromArgb(194, 32, 14);
            colors[2] = Color.FromArgb(4, 154, 10);
            colors[3] = Color.FromArgb(13, 26, 188);

            Color grey = Color.FromArgb(255, 127, 127, 127);

            System.Random r = new Random(0); // same seed every time so colours will be consistent
            for (int c = 4; c < colors.Length; c++)
            {
                colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255));
            }

            int index = 0;

            for (int j = 0; j < PlotSize.Height; j++)
            {
                for (int i = 0; i < PlotSize.Width; i++)
                {
                    // Aggregate statistics for this sample over all leaf nodes reached
                    HistogramAggregator h = new HistogramAggregator(trainingData.CountClasses());
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        int leafIndex = leafNodeIndices[t][index];

                        SemiSupervisedClassificationStatisticsAggregator a = forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics;

                        h.Aggregate(a.HistogramAggregator);
                    }

                    // Let's muddy the colors with a little grey where entropy is high.
                    float mudiness = 0.5f * (float)(h.Entropy());

                    float R = 0.0f, G = 0.0f, B = 0.0f;

                    for (int b = 0; b < trainingData.CountClasses(); b++)
                    {
                        float p = (1.0f - mudiness) * h.GetProbability(b); // NB probabilities sum to 1.0 over the classes

                        R += colors[b].R * p;
                        G += colors[b].G * p;
                        B += colors[b].B * p;
                    }

                    R += grey.R * mudiness;
                    G += grey.G * mudiness;
                    B += grey.B * mudiness;

                    Color c = Color.FromArgb(255, (byte)(R), (byte)(G), (byte)(B));

                    result.SetPixel(i, j, c);

                    index++;
                }
            }

            PaintTrainingData(trainingData, plotCanvas, result);

            return(result);
        }
        public float GetResponse(IDataPointCollection data, int sampleIndex)
        {
            DataPointCollection concreteData = (DataPointCollection)(data);

            return(concreteData.GetDataPoint((int)sampleIndex)[axis_]);
        }
        public float GetResponse(IDataPointCollection data, int index)
        {
            DataPointCollection concreteData = (DataPointCollection)(data);

            return(dx_ * concreteData.GetDataPoint((int)index)[0] + dy_ * concreteData.GetDataPoint((int)index)[1]);
        }
        /// <summary>
        /// Load a collection of data from a tab-delimited file with one data point
        /// per line. The data may optionally have associated with class labels
        /// (first element on line) and/or target values (last element on line).
        /// </summary>
        /// <param name="path">Path of file to be read.</param>
        /// <param name="bHasClassLabels">Are the data associated with class labels?</param>
        /// <param name="dataDimension">Dimension of the data (excluding class labels and target values).</param>
        /// <param name="bHasTargetValues">Are the data associated with target values.</param>
        static public DataPointCollection Load(System.IO.Stream stream, int dataDimension, DataDescriptor descriptor)
        {
            bool bHasTargetValues = (descriptor & DataDescriptor.HasTargetValues) == DataDescriptor.HasTargetValues;
            bool bHasClassLabels  = (descriptor & DataDescriptor.HasClassLabels) == DataDescriptor.HasClassLabels;

            DataPointCollection result = new DataPointCollection();

            result.data_      = new List <float[]>();
            result.labels_    = bHasClassLabels ? new List <int>() : null;
            result.targets_   = bHasTargetValues ? new List <float>() : null;
            result.dimension_ = dataDimension;

            char[] seperators = new char[] { '\t' };

            int elementsPerLine = (bHasClassLabels ? 1 : 0) + dataDimension + (bHasTargetValues ? 1 : 0);

            using (System.IO.StreamReader r = new System.IO.StreamReader(stream))
            {
                string line;
                while ((line = r.ReadLine()) != null)
                {
                    string[] elements = line.Split(seperators);

                    if (elements.Length != elementsPerLine)
                    {
                        throw new Exception("Encountered line with unexpected number of elements.");
                    }

                    int index = 0;

                    if (bHasClassLabels)
                    {
                        if (!String.IsNullOrEmpty(elements[index]))
                        {
                            if (!result.labelIndices_.ContainsKey(elements[index]))
                            {
                                result.labelIndices_.Add(elements[index], result.labelIndices_.Count);
                            }
                            result.labels_.Add(result.labelIndices_[elements[index++]]);
                        }
                        else
                        {
                            result.labels_.Add(UnknownClassLabel);
                            index++;
                        }
                    }

                    float[] datum = new float[dataDimension];
                    for (int i = 0; i < dataDimension; i++)
                    {
                        datum[i] = Convert.ToSingle(elements[index++]);
                    }

                    result.data_.Add(datum);

                    if (bHasTargetValues)
                    {
                        result.targets_.Add(Convert.ToSingle(elements[index++]));
                    }
                }
            }

            return(result);
        }
        public static Bitmap Visualize(
            Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> forest,
            DataPointCollection trainingData,
            Size PlotSize,
            PointF PlotDilation)
        {
            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation);

            // Apply the trained forest to the test data
            Console.WriteLine("\nApplying the forest to test data...");

            DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height);

            int[][] leafNodeIndices = forest.Apply(testData);

            // Compute normalization factors per node
            int nTrainingPoints = (int)(trainingData.Count()); // could also count over tree nodes if training data no longer accessible

            double[][] normalizationFactors = new double[forest.TreeCount][];
            for (int t = 0; t < forest.TreeCount; t++)
            {
                normalizationFactors[t] = new double[forest.GetTree(t).NodeCount];
                ComputeNormalizationFactorsRecurse(forest.GetTree(t), 0, nTrainingPoints, new Bounds(2), normalizationFactors[t]);
            }

            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // Paint the test data
            int index = 0;

            for (int j = 0; j < PlotSize.Height; j++)
            {
                for (int i = 0; i < PlotSize.Width; i++)
                {
                    // Map pixel coordinate (i,j) in visualization image back to point in input space
                    float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX;
                    float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY;

                    // Aggregate statistics for this sample over all trees
                    double probability = 0.0;
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        int leafIndex = leafNodeIndices[t][index];

                        probability += normalizationFactors[t][leafIndex] * forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics.GetPdf().GetProbability(x, y);
                    }

                    probability /= forest.TreeCount;

                    // 'Gamma correct' probability density for better display
                    float l = (float)(LuminanceScaleFactor * Math.Pow(probability, Gamma));

                    if (l < 0)
                    {
                        l = 0;
                    }
                    else if (l > 255)
                    {
                        l = 255;
                    }

                    Color c = Color.FromArgb(255, (byte)(l), 0, 0);
                    result.SetPixel(i, j, c);

                    index++;
                }
            }

            // Also plot the original training data
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                for (int s = 0; s < trainingData.Count(); s++)
                {
                    PointF x = new PointF(
                        (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                        (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                    RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f);
                    g.FillRectangle(new SolidBrush(DataPointColor), rectangle);
                    g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                }
            }

            return(result);
        }
        public Bitmap Run(DataPointCollection trainingData)
        {
            // Train the forest
            Console.WriteLine("Training the forest...");

            Random random = new Random();
            ITrainingContext <AxisAlignedFeatureResponse, LinearFitAggregator1d> regressionTrainingContext = new RegressionTrainingContext();

            var forest = ForestTrainer <AxisAlignedFeatureResponse, LinearFitAggregator1d> .TrainForest(
                random,
                TrainingParameters,
                regressionTrainingContext,
                trainingData);

            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetTargetRange(), PlotSize, PlotDilation);

            DataPointCollection testData = DataPointCollection.Generate1dGrid(plotCanvas.plotRangeX, PlotSize.Width);

            // Apply the trained forest to the test data
            Console.WriteLine("\nApplying the forest to test data...");

            int[][] leafNodeIndices = forest.Apply(testData);

            #region Generate Visualization Image
            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // Plot the learned density
            Color inverseDensityColor = Color.FromArgb(255, 255 - DensityColor.R, 255 - DensityColor.G, 255 - DensityColor.B);

            double[] mean_y_given_x = new double[PlotSize.Width];

            int index = 0;
            for (int i = 0; i < PlotSize.Width; i++)
            {
                double totalProbability = 0.0;
                for (int j = 0; j < PlotSize.Height; j++)
                {
                    // Map pixel coordinate (i,j) in visualization image back to point in input space
                    float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX;
                    float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY;

                    double probability = 0.0;

                    // Aggregate statistics for this sample over all trees
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        Node <AxisAlignedFeatureResponse, LinearFitAggregator1d> leafNodeCopy = forest.GetTree(t).GetNode(leafNodeIndices[t][i]);

                        LinearFitAggregator1d leafStatistics = leafNodeCopy.TrainingDataStatistics;

                        probability += leafStatistics.GetProbability(x, y);
                    }

                    probability /= forest.TreeCount;

                    mean_y_given_x[i] += probability * y;
                    totalProbability  += probability;

                    float scale = 10.0f * (float)probability;

                    Color weightedColor = Color.FromArgb(
                        255,
                        (byte)(Math.Min(scale * inverseDensityColor.R + 0.5f, 255.0f)),
                        (byte)(Math.Min(scale * inverseDensityColor.G + 0.5f, 255.0f)),
                        (byte)(Math.Min(scale * inverseDensityColor.B + 0.5f, 255.0f)));

                    Color c = Color.FromArgb(255, 255 - weightedColor.R, 255 - weightedColor.G, 255 - weightedColor.G);

                    result.SetPixel(i, j, c);

                    index++;
                }

                // NB We don't really compute the mean over y, just over the region of y that is plotted
                mean_y_given_x[i] /= totalProbability;
            }

            // Also plot the mean curve and the original training data
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                using (Pen meanPen = new Pen(MeanColor, 2))
                {
                    for (int i = 0; i < PlotSize.Width - 1; i++)
                    {
                        g.DrawLine(
                            meanPen,
                            (float)(i),
                            (float)((mean_y_given_x[i] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY),
                            (float)(i + 1),
                            (float)((mean_y_given_x[i + 1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY));
                    }
                }

                using (Brush dataPointBrush = new SolidBrush(DataPointColor))
                    using (Pen dataPointBorderPen = new Pen(DataPointBorderColor))
                    {
                        for (int s = 0; s < trainingData.Count(); s++)
                        {
                            // Map sample coordinate back to a pixel coordinate in the visualization image
                            PointF x = new PointF(
                                (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                                (trainingData.GetTarget(s) - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                            RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f);
                            g.FillRectangle(dataPointBrush, rectangle);
                            g.DrawRectangle(dataPointBorderPen, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                        }
                    }
            }

            return(result);

            #endregion
        }