static void PaintTrainingData(DataPointCollection trainingData, PlotCanvas plotCanvas, Bitmap result)
        {
            // First few colours are same as those in the book, remainder are random.
            Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)];
            colors[0] = Color.FromArgb(183, 170, 8);
            colors[1] = Color.FromArgb(194, 32, 14);
            colors[2] = Color.FromArgb(4, 154, 10);
            colors[3] = Color.FromArgb(13, 26, 188);

            System.Random r = new Random(0); // same seed every time so colours will be consistent
            for (int c = 4; c < colors.Length; c++)
            {
                colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255));
            }

            // Also plot the original training data (a little bigger for clarity)
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                // Paint unlabelled data
                for (int s = 0; s < trainingData.Count(); s++)
                {
                    if (trainingData.GetIntegerLabel(s) == DataPointCollection.UnknownClassLabel) // unlabelled
                    {
                        PointF x = new PointF(
                            (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                            (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                        RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f);
                        g.FillRectangle(new SolidBrush(UnlabelledDataPointColor), rectangle);
                        g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                    }
                }

                // Paint labelled data on top
                for (int s = 0; s < trainingData.Count(); s++)
                {
                    if (trainingData.GetIntegerLabel(s) != DataPointCollection.UnknownClassLabel)
                    {
                        PointF x = new PointF(
                            (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                            (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                        RectangleF rectangle = new RectangleF(x.X - 5.0f, x.Y - 5.0f, 10.0f, 10.0f);
                        g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle);
                        g.DrawRectangle(new Pen(Color.White, 2), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                    }
                }
            }
        }
        /// <summary>
        /// Apply a trained forest to some test data.
        /// </summary>
        /// <typeparam name="F">Type of split function</typeparam>
        /// <param name="forest">Trained forest</param>
        /// <param name="testData">Test data</param>
        /// <returns>An array of class distributions, one per test data point</returns>
        public static HistogramAggregator[] Test <F>(Forest <F, HistogramAggregator> forest, DataPointCollection testData) where F : IFeatureResponse
        {
            int nClasses = forest.GetTree(0).GetNode(0).TrainingDataStatistics.BinCount;

            int[][] leafIndicesPerTree = forest.Apply(testData);

            HistogramAggregator[] result = new HistogramAggregator[testData.Count()];

            for (int i = 0; i < testData.Count(); i++)
            {
                // Aggregate statistics for this sample over all leaf nodes reached
                result[i] = new HistogramAggregator(nClasses);
                for (int t = 0; t < forest.TreeCount; t++)
                {
                    int leafIndex = leafIndicesPerTree[t][i];
                    result[i].Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics);
                }
            }

            return(result);
        }
示例#3
0
        static DataPointCollection LoadTrainingData(
            string path,
            string alternativePath,
            int dimension,
            DataDescriptor dataDescriptor)
        {
            System.IO.FileStream stream = null;
            try
            {
                stream = new FileStream(path, FileMode.Open, FileAccess.Read);
            }
            catch (Exception)
            {
                string a = System.IO.Path.Combine(
                    Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "/",
                    alternativePath);
                a = System.IO.Path.Combine(a, path);
                try
                {
                    stream = new FileStream(a, FileMode.Open, FileAccess.Read);
                }
                catch (Exception)
                {
                    Console.WriteLine("Failed to open training data file at \"{0}\" or \"{1}\".", path, a);
                    Environment.Exit(-1);
                }
            }

            DataPointCollection trainingData = null;

            try
            {
                trainingData = DataPointCollection.Load(
                    stream,
                    dimension,
                    dataDescriptor);
            }
            catch (Exception e)
            {
                Console.WriteLine("Failed to read training data. " + e.Message);
                Environment.Exit(-1);
            }

            if (trainingData.Count() < 1)
            {
                Console.WriteLine("Insufficient training data.");
                Environment.Exit(-1);
            }

            return(trainingData);
        }
        public static Bitmap Visualize(
            Forest <AxisAlignedFeatureResponse, GaussianAggregator2d> forest,
            DataPointCollection trainingData,
            Size PlotSize,
            PointF PlotDilation)
        {
            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation);

            // Apply the trained forest to the test data
            Console.WriteLine("\nApplying the forest to test data...");

            DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height);

            int[][] leafNodeIndices = forest.Apply(testData);

            // Compute normalization factors per node
            int nTrainingPoints = (int)(trainingData.Count()); // could also count over tree nodes if training data no longer accessible

            double[][] normalizationFactors = new double[forest.TreeCount][];
            for (int t = 0; t < forest.TreeCount; t++)
            {
                normalizationFactors[t] = new double[forest.GetTree(t).NodeCount];
                ComputeNormalizationFactorsRecurse(forest.GetTree(t), 0, nTrainingPoints, new Bounds(2), normalizationFactors[t]);
            }

            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // Paint the test data
            int index = 0;

            for (int j = 0; j < PlotSize.Height; j++)
            {
                for (int i = 0; i < PlotSize.Width; i++)
                {
                    // Map pixel coordinate (i,j) in visualization image back to point in input space
                    float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX;
                    float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY;

                    // Aggregate statistics for this sample over all trees
                    double probability = 0.0;
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        int leafIndex = leafNodeIndices[t][index];

                        probability += normalizationFactors[t][leafIndex] * forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics.GetPdf().GetProbability(x, y);
                    }

                    probability /= forest.TreeCount;

                    // 'Gamma correct' probability density for better display
                    float l = (float)(LuminanceScaleFactor * Math.Pow(probability, Gamma));

                    if (l < 0)
                    {
                        l = 0;
                    }
                    else if (l > 255)
                    {
                        l = 255;
                    }

                    Color c = Color.FromArgb(255, (byte)(l), 0, 0);
                    result.SetPixel(i, j, c);

                    index++;
                }
            }

            // Also plot the original training data
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                for (int s = 0; s < trainingData.Count(); s++)
                {
                    PointF x = new PointF(
                        (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                        (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                    RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f);
                    g.FillRectangle(new SolidBrush(DataPointColor), rectangle);
                    g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                }
            }

            return(result);
        }
        public static Bitmap Visualize <F>(
            Forest <F, HistogramAggregator> forest,
            DataPointCollection trainingData,
            Size PlotSize,
            PointF PlotDilation) where F : IFeatureResponse
        {
            // Size PlotSize = new Size(300, 300), PointF PlotDilation = new PointF(0.1f, 0.1f)
            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetRange(1), PlotSize, PlotDilation);

            DataPointCollection testData = DataPointCollection.Generate2dGrid(plotCanvas.plotRangeX, PlotSize.Width, plotCanvas.plotRangeY, PlotSize.Height);

            Console.WriteLine("\nApplying the forest to test data...");
            int[][] leafNodeIndices = forest.Apply(testData);

            // Form a palette of random colors, one per class
            Color[] colors = new Color[Math.Max(trainingData.CountClasses(), 4)];

            // First few colours are same as those in the book, remainder are random.
            colors[0] = Color.FromArgb(183, 170, 8);
            colors[1] = Color.FromArgb(194, 32, 14);
            colors[2] = Color.FromArgb(4, 154, 10);
            colors[3] = Color.FromArgb(13, 26, 188);

            Color grey = Color.FromArgb(255, 127, 127, 127);

            System.Random r = new Random(0); // same seed every time so colours will be consistent
            for (int c = 4; c < colors.Length; c++)
            {
                colors[c] = Color.FromArgb(255, r.Next(0, 255), r.Next(0, 255), r.Next(0, 255));
            }

            // Create a visualization image
            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // For each pixel...
            int index = 0;

            for (int j = 0; j < PlotSize.Height; j++)
            {
                for (int i = 0; i < PlotSize.Width; i++)
                {
                    // Aggregate statistics for this sample over all leaf nodes reached
                    HistogramAggregator h = new HistogramAggregator(trainingData.CountClasses());
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        int leafIndex = leafNodeIndices[t][index];
                        h.Aggregate(forest.GetTree(t).GetNode(leafIndex).TrainingDataStatistics);
                    }

                    // Let's muddy the colors with grey where the entropy is high.
                    float mudiness = 0.5f * (float)(h.Entropy());

                    float R = 0.0f, G = 0.0f, B = 0.0f;

                    for (int b = 0; b < trainingData.CountClasses(); b++)
                    {
                        float p = (1.0f - mudiness) * h.GetProbability(b); // NB probabilities sum to 1.0 over the classes

                        R += colors[b].R * p;
                        G += colors[b].G * p;
                        B += colors[b].B * p;
                    }

                    R += grey.R * mudiness;
                    G += grey.G * mudiness;
                    B += grey.B * mudiness;

                    Color c = Color.FromArgb(255, (byte)(R), (byte)(G), (byte)(B));

                    result.SetPixel(i, j, c); // painfully slow but safe

                    index++;
                }
            }

            // Also draw the original training data
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                for (int s = 0; s < trainingData.Count(); s++)
                {
                    PointF x = new PointF(
                        (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                        (trainingData.GetDataPoint(s)[1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                    RectangleF rectangle = new RectangleF(x.X - 3.0f, x.Y - 3.0f, 6.0f, 6.0f);
                    g.FillRectangle(new SolidBrush(colors[trainingData.GetIntegerLabel(s)]), rectangle);
                    g.DrawRectangle(new Pen(Color.Black), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                }
            }

            return(result);
        }
示例#6
0
        public Bitmap Run(DataPointCollection trainingData)
        {
            // Train the forest
            Console.WriteLine("Training the forest...");

            Random random = new Random();
            ITrainingContext <AxisAlignedFeatureResponse, LinearFitAggregator1d> regressionTrainingContext = new RegressionTrainingContext();

            var forest = ForestTrainer <AxisAlignedFeatureResponse, LinearFitAggregator1d> .TrainForest(
                random,
                TrainingParameters,
                regressionTrainingContext,
                trainingData);

            // Generate some test samples in a grid pattern (a useful basis for creating visualization images)
            PlotCanvas plotCanvas = new PlotCanvas(trainingData.GetRange(0), trainingData.GetTargetRange(), PlotSize, PlotDilation);

            DataPointCollection testData = DataPointCollection.Generate1dGrid(plotCanvas.plotRangeX, PlotSize.Width);

            // Apply the trained forest to the test data
            Console.WriteLine("\nApplying the forest to test data...");

            int[][] leafNodeIndices = forest.Apply(testData);

            #region Generate Visualization Image
            Bitmap result = new Bitmap(PlotSize.Width, PlotSize.Height);

            // Plot the learned density
            Color inverseDensityColor = Color.FromArgb(255, 255 - DensityColor.R, 255 - DensityColor.G, 255 - DensityColor.B);

            double[] mean_y_given_x = new double[PlotSize.Width];

            int index = 0;
            for (int i = 0; i < PlotSize.Width; i++)
            {
                double totalProbability = 0.0;
                for (int j = 0; j < PlotSize.Height; j++)
                {
                    // Map pixel coordinate (i,j) in visualization image back to point in input space
                    float x = plotCanvas.plotRangeX.Item1 + i * plotCanvas.stepX;
                    float y = plotCanvas.plotRangeY.Item1 + j * plotCanvas.stepY;

                    double probability = 0.0;

                    // Aggregate statistics for this sample over all trees
                    for (int t = 0; t < forest.TreeCount; t++)
                    {
                        Node <AxisAlignedFeatureResponse, LinearFitAggregator1d> leafNodeCopy = forest.GetTree(t).GetNode(leafNodeIndices[t][i]);

                        LinearFitAggregator1d leafStatistics = leafNodeCopy.TrainingDataStatistics;

                        probability += leafStatistics.GetProbability(x, y);
                    }

                    probability /= forest.TreeCount;

                    mean_y_given_x[i] += probability * y;
                    totalProbability  += probability;

                    float scale = 10.0f * (float)probability;

                    Color weightedColor = Color.FromArgb(
                        255,
                        (byte)(Math.Min(scale * inverseDensityColor.R + 0.5f, 255.0f)),
                        (byte)(Math.Min(scale * inverseDensityColor.G + 0.5f, 255.0f)),
                        (byte)(Math.Min(scale * inverseDensityColor.B + 0.5f, 255.0f)));

                    Color c = Color.FromArgb(255, 255 - weightedColor.R, 255 - weightedColor.G, 255 - weightedColor.G);

                    result.SetPixel(i, j, c);

                    index++;
                }

                // NB We don't really compute the mean over y, just over the region of y that is plotted
                mean_y_given_x[i] /= totalProbability;
            }

            // Also plot the mean curve and the original training data
            using (Graphics g = Graphics.FromImage(result))
            {
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.SmoothingMode     = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                using (Pen meanPen = new Pen(MeanColor, 2))
                {
                    for (int i = 0; i < PlotSize.Width - 1; i++)
                    {
                        g.DrawLine(
                            meanPen,
                            (float)(i),
                            (float)((mean_y_given_x[i] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY),
                            (float)(i + 1),
                            (float)((mean_y_given_x[i + 1] - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY));
                    }
                }

                using (Brush dataPointBrush = new SolidBrush(DataPointColor))
                    using (Pen dataPointBorderPen = new Pen(DataPointBorderColor))
                    {
                        for (int s = 0; s < trainingData.Count(); s++)
                        {
                            // Map sample coordinate back to a pixel coordinate in the visualization image
                            PointF x = new PointF(
                                (trainingData.GetDataPoint(s)[0] - plotCanvas.plotRangeX.Item1) / plotCanvas.stepX,
                                (trainingData.GetTarget(s) - plotCanvas.plotRangeY.Item1) / plotCanvas.stepY);

                            RectangleF rectangle = new RectangleF(x.X - 2.0f, x.Y - 2.0f, 4.0f, 4.0f);
                            g.FillRectangle(dataPointBrush, rectangle);
                            g.DrawRectangle(dataPointBorderPen, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
                        }
                    }
            }

            return(result);

            #endregion
        }