示例#1
0
        private static ScatterplotBox show(String title, Func <double, double> function, double?min, double?max, double?step)
        {
            if (min == null || max == null)
            {
                DoubleRange range;
                if (GetRange(function, out range))
                {
                    min = range.Min;
                    max = range.Max;
                }
                else
                {
                    min = 0;
                    max = 1;
                }
            }

            if (step == null)
            {
                step = (max - min) / 1000;
            }

            double[] input  = Matrix.Interval(min.Value, max.Value, step.Value);
            double[] output = Matrix.Apply(input, function);

            Scatterplot scatterplot = new Scatterplot(title ?? "Scatter plot");

            scatterplot.Compute(input, output);

            return(show(scatterplot));
        }
示例#2
0
        public void PlotTrainingAndValidationCurves(
            IEnumerable <double> errors,
            IEnumerable <double> validationErrors,
            int epochCount)
        {
            IEnumerable <double> tmp = Enumerable
                                       .Range(1, epochCount)
                                       .Select(v => (double)v)
                                       .ToList();

            double[] x = tmp.Concat(tmp).ToArray();
            double[] y = errors.Concat(validationErrors).ToArray();
            int[]    z = Enumerable
                         .Repeat(1, epochCount)
                         .Concat(Enumerable.Repeat(2, epochCount))
                         .ToArray();

            Scatterplot plot = new Scatterplot(
                TrainingAndValidationPlotResources.Title,
                TrainingAndValidationPlotResources.XAxisTitle,
                TrainingAndValidationPlotResources.YAxisTitle);

            plot.Compute(x, y, z);

            ScatterplotBox.Show(plot);
        }
示例#3
0
        /// <summary>
        /// The main application entry point.
        /// </summary>
        /// <param name="args">Command line arguments.</param>
        public static void Main(string[] args)
        {
            // get data
            Console.WriteLine("Loading data....");
            var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..");

            path = Path.Combine(path, "..");
            path = Path.Combine(path, "california_housing.csv");
            var housing = Frame.ReadCsv(path, separators: ",");
            // housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // set up a few series
            var total_rooms        = housing["total_rooms"];
            var median_house_value = housing["median_house_value"];
            var median_income      = housing["median_income"];

            // convert the house value range to thousands
            median_house_value /= 1000;

            // set up feature and label
            var feature = total_rooms.Values.ToArray();
            // var feature = median_income.Values.ToArray();
            var labels = median_house_value.Values.ToArray();

            // train the model
            Console.WriteLine("Training model....");
            var learner = new OrdinaryLeastSquares();
            var model   = learner.Learn(feature, labels);

            // show results
            Console.WriteLine($"Slope:       {model.Slope}");
            Console.WriteLine($"Intercept:   {model.Intercept}");

            // validate the model
            var predictions = model.Transform(feature);
            var rmse        = Math.Sqrt(new SquareLoss(labels).Loss(predictions));

            var range = Math.Abs(labels.Max() - labels.Min());

            Console.WriteLine($"Label range: {range}");
            Console.WriteLine($"RMSE:        {rmse} {rmse / range * 100:0.00}%");

            // generate plot arrays
            var x = feature.Concat(feature).ToArray();
            var y = predictions.Concat(labels).ToArray();

            // set up color array
            var colors1 = Enumerable.Repeat(1, labels.Length).ToArray();
            var colors2 = Enumerable.Repeat(2, labels.Length).ToArray();
            var c       = colors1.Concat(colors2).ToArray();

            // plot the data
            var plot = new Scatterplot("Training", "feature", "label");

            plot.Compute(x, y, c);
            ScatterplotBox.Show(plot);

            Console.ReadLine();
        }
        /// <summary>
        ///   Displays a scatter plot with the specified data.
        /// </summary>
        ///
        /// <param name="title">The title for the plot window.</param>
        /// <param name="x">A two column matrix containing the (x,y) data pairs as rows.</param>
        /// <param name="z">The corresponding labels for the (x,y) pairs.</param>
        ///
        public static ScatterplotBox Show(string title, double[][] x, int[] z)
        {
            Scatterplot scatterplot = new Scatterplot(title);

            scatterplot.Compute(x, z);

            return(show(scatterplot));
        }
示例#5
0
        /// <summary>
        ///   Displays a scatter plot with the specified data.
        /// </summary>
        ///
        /// <param name="title">The title for the plot window.</param>
        /// <param name="x">A two column matrix containing the (x,y) data pairs as rows.</param>
        /// <param name="z">The corresponding labels for the (x,y) pairs.</param>
        /// <param name="nonBlocking">If set to <c>true</c>, the caller will continue
        /// executing while the form is shown on screen. If set to <c>false</c>,
        /// the caller will be blocked until the user closes the form. Default
        /// is <c>false</c>.</param>
        ///
        public static ScatterplotBox Show(string title, double[][] x, int[] z = null, bool nonBlocking = false)
        {
            Scatterplot scatterplot = new Scatterplot(title);

            scatterplot.Compute(x, z);

            return(show(scatterplot, nonBlocking));
        }
        /// <summary>
        ///   Displays a scatter plot with the specified data.
        /// </summary>
        ///
        /// <param name="title">The title for the plot window.</param>
        /// <param name="x">A two column matrix containing the (x,y) data pairs as rows.</param>
        ///
        public static ScatterplotBox Show(string title, double[,] x)
        {
            Scatterplot scatterplot = new Scatterplot(title);

            scatterplot.Compute(x);

            return(show(scatterplot));
        }
示例#7
0
        /// <summary>
        ///   Generates a <see cref="Scatterplot"/> representing the ROC curve.
        /// </summary>
        ///
        /// <param name="includeRandom">
        ///   True to include a plot of the random curve (a diagonal line
        ///   going from lower left to upper right); false otherwise.</param>
        ///
        public Scatterplot GetScatterplot(bool includeRandom = false)
        {
            Scatterplot plot = new Scatterplot("Area under the ROC curve");

            plot.XAxisTitle = "1 - Specificity";
            plot.YAxisTitle = "Sensitivity";

            double[] x = Points.GetOneMinusSpecificity();
            double[] y = Points.GetSensitivity();

            if (includeRandom)
            {
                int      points = x.Length;
                double[] newx   = new double[points + 2];
                double[] newy   = new double[points + 2];
                int[]    labels = new int[points + 2];

                Array.Copy(x, newx, x.Length);
                Array.Copy(y, newy, y.Length);

                newx[points + 0]   = 0;
                newy[points + 0]   = 0;
                labels[points + 0] = 1;

                newx[points + 1]   = 1;
                newy[points + 1]   = 1;
                labels[points + 1] = 1;

                plot.Compute(newx, newy, labels);
                plot.Classes[0].Text = "Curve";
                plot.Classes[1].Text = "Random";
            }
            else
            {
                plot.Compute(x, y);
            }

            return(plot);
        }
        /// <summary>
        /// Plot the training errors.
        /// </summary>
        /// <param name="trainingErrors">The traininer errors to plot</param>
        /// <param name="title">The chart title</param>
        /// <param name="xAxisLabel">The chart x-ais label</param>
        /// <param name="yAxisLabel">The chart y-axis label</param>
        private static void Plot(
            List <double> trainingErrors,
            string title,
            string xAxisLabel,
            string yAxisLabel)
        {
            var epochs = trainingErrors.Count();
            var x      = Enumerable.Range(0, epochs).Select(v => (double)v).ToArray();
            var y      = trainingErrors.ToArray();
            var plot   = new Scatterplot(title, xAxisLabel, yAxisLabel);

            plot.Compute(x, y);
            ScatterplotBox.Show(plot);
        }
示例#9
0
        private void featuresListBox_SelectedIndexChanged(object sender, EventArgs e)
        {
            if (!isDataLoaded)
            {
                Utilities.InfoMessageBox("Please load data or wait until data is loaded first.");
                return;
            }

            var selectedFeature = featuresListBox.SelectedItem as string;
            var featureIndex    = dataSet.dataFull.Columns.IndexOf(selectedFeature);

            Scatterplot plot = new Scatterplot(selectedFeature);

            plot.Compute(dataSet.X_Test.GetColumn(featureIndex), dataSet.Y_Test.Select(x => (double)x).ToArray <double>());
            scatterplotView.Scatterplot = plot;
        }
示例#10
0
        /// <summary>
        /// Plot a histogram.
        /// </summary>
        /// <param name="histogram">The histogram to plot</param>
        /// <param name="title">The plot title</param>
        /// <param name="xAxisLabel">The x-axis label</param>
        /// <param name="yAxisLabel">The y-axis label</param>
        private static void Plot(Histogram histogram, string title, string xAxisLabel, string yAxisLabel)
        {
            var x = new List <double>();
            var y = new List <double>();

            for (int i = 0; i < histogram.Values.Length; i++)
            {
                var xcor = histogram.Bins[i].Range.Min;
                x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor);
                y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0);
            }
            var plot = new Scatterplot(title, xAxisLabel, yAxisLabel);

            plot.Compute(x.ToArray(), y.ToArray());
            ScatterplotBox.Show(plot);
        }
示例#11
0
        public void ComputeTest2()
        {
            ScatterplotView target = new ScatterplotView();

            Scatterplot histogram = new Scatterplot();

            histogram.Compute(new double[] { 200.0, 200.0, 200.0 });

            target.DataSource = null;

            target.DataSource = histogram;

            target.DataSource = null;

            // ScatterplotBox.Show(histogram);
        }
        /// <summary>
        /// Plot a graph on screen.
        /// </summary>
        /// <param name="xSeries">The x-series to plot</param>
        /// <param name="ySeries">The y-series to plot</param>
        /// <param name="title">The plot title</param>
        /// <param name="xAxisLabel">The x-axis label</param>
        /// <param name="yAxisLabel">The y-axis label</param>
        public static void Plot(
            Series <int, double> xSeries,
            Series <int, double> ySeries,
            string title,
            string xAxisLabel,
            string yAxisLabel)
        {
            // generate plot arrays
            var x = xSeries.Values.ToArray();
            var y = ySeries.Values.ToArray();

            // plot the graph
            var plot = new Scatterplot(title, xAxisLabel, yAxisLabel);

            plot.Compute(x, y);
            ScatterplotBox.Show(plot);
        }
示例#13
0
        public void PlotValidationCurve(IEnumerable <double> errors, int epochCount)
        {
            double[] x = Enumerable
                         .Range(1, epochCount)
                         .Select(v => (double)v)
                         .ToArray();

            double[] y = errors.ToArray();

            Scatterplot plot = new Scatterplot(
                "График изменения квадратичной ошибки тестирования",
                TrainingPlotResources.YAxisTitle,
                "Ошибки тестирования");

            plot.Compute(x, y);

            ScatterplotBox.Show(plot);
        }
示例#14
0
        public void PlotTrainingCurve(IEnumerable <double> errors, int epochCount)
        {
            double[] x = Enumerable
                         .Range(1, epochCount)
                         .Select(v => (double)v)
                         .ToArray();

            double[] y = errors.ToArray();

            Scatterplot plot = new Scatterplot(
                TrainingPlotResources.Title,
                TrainingPlotResources.YAxisTitle,
                TrainingPlotResources.XAxisTitle);

            plot.Compute(x, y);

            ScatterplotBox.Show(plot);
        }
示例#15
0
        private void PrintRandomDigit(Frame <int, string> training)
        {
            Console.WriteLine(@"Вывод случайного тренировочного образца...");
            Random rnd         = new Random();
            int    row         = rnd.Next(1, training.RowCount);
            string randomDigit = training.Rows[row]["Column1"].ToString();

            double[] x = Enumerable.Range(0, 784).Select(v => (double)(v % 28)).ToArray();
            double[] y = Enumerable.Range(0, 784).Select(v => (double)(-v / 28)).ToArray();
            int[]    z = Enumerable.Range(2, 784)
                         .Select(i => new { i, v = training.Rows[row][$"Column{i}"] as double? })
                         .Select(t => t.v > 0.5 ? 1 : 0).ToArray();

            Scatterplot plot = new Scatterplot($"Цифра {randomDigit}", "x", "y");

            plot.Compute(x, y, z);
            ScatterplotBox.Show(plot);
        }
示例#16
0
        /// <summary>
        /// Plot a graph on screen.
        /// </summary>
        /// <param name="feature">The features to plot</param>
        /// <param name="labels">The labels to plot</param>
        /// <param name="predictions">The predictions to plot</param>
        /// <param name="title">The plot title</param>
        /// <param name="xAxisLabel">The x-axis label</param>
        /// <param name="yAxisLabel">The y-axis label</param>
        public static void Plot(
            double[] feature,
            double[] labels,
            double[] predictions,
            string title,
            string xAxisLabel,
            string yAxisLabel)
        {
            // generate plot arrays
            var x = feature.Concat(feature).ToArray();
            var y = predictions.Concat(labels).ToArray();

            // set up color arrays
            var colors1 = Enumerable.Repeat(1, labels.Length).ToArray();
            var colors2 = Enumerable.Repeat(2, labels.Length).ToArray();
            var c       = colors1.Concat(colors2).ToArray();

            // plot the graph
            var plot = new Scatterplot(title, xAxisLabel, yAxisLabel);

            plot.Compute(x, y, c);
            ScatterplotBox.Show(plot);
        }
示例#17
0
        /// <summary>
        /// Run the lesson.
        /// </summary>
        public static void Run()
        {
            // get data
            Console.WriteLine("Loading data....");
            var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..");

            path = Path.Combine(path, "..");
            path = Path.Combine(path, "california_housing.csv");
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // create the median_high_house_value feature
            housing.AddColumn("median_high_house_value",
                              housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0));

            // shuffle the frame
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            housing = housing.IndexRowsWith(indices).SortByRowKey();

            // create training, validation, and test frames
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            // build the list of features we're going to use
            var columns = new string[] {
                "latitude",
                "longitude",
                "housing_median_age",
                "total_rooms",
                "total_bedrooms",
                "population",
                "households",
                "median_income"
            };

            // train the model using a linear regressor
            // train the model using a logistic regressor
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations = 100
            };
            var regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // get probabilities
            var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged();
            var label_validation    = validation["median_high_house_value"].Values.ToArray();
            var probabilities       = regression.Probability(features_validation);

            // calculate the histogram of probabilities
            var histogram = new Histogram();

            histogram.Compute(probabilities, 0.05);

            // draw the histogram
            var x = new List <double>();
            var y = new List <double>();

            for (int i = 0; i < histogram.Values.Length; i++)
            {
                var xcor = histogram.Bins[i].Range.Min;
                x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor);
                y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0);
            }
            var plot = new Scatterplot("", "prediction", "count");

            plot.Compute(x.ToArray(), y.ToArray());
            ScatterplotBox.Show(plot);

            // get predictions and actuals
            var predictions = regression.Decide(features_validation);
            var actuals     = label_validation.Select(v => v == 1.0 ? true : false).ToArray();

            // create confusion matrix
            var confusion = new ConfusionMatrix(predictions, actuals);

            // display classification scores
            Console.WriteLine($"True Positives:  {confusion.TruePositives}");
            Console.WriteLine($"True Negatives:  {confusion.TrueNegatives}");
            Console.WriteLine($"False Positives: {confusion.FalsePositives}");
            Console.WriteLine($"False Negatives: {confusion.FalseNegatives}");
            Console.WriteLine();

            // display accuracy, precision, and recall
            Console.WriteLine($"Accuracy:        {confusion.Accuracy}");
            Console.WriteLine($"Precision:       {confusion.Precision}");
            Console.WriteLine($"Recall:          {confusion.Recall}");
            Console.WriteLine();

            // display TPR and FPR
            Console.WriteLine($"TPR:             {confusion.Sensitivity}");
            Console.WriteLine($"FPR:             {confusion.FalsePositiveRate}");
            Console.WriteLine();

            // calculate roc curve
            var roc = new ReceiverOperatingCharacteristic(
                actuals,
                predictions.Select(v => v ? 1 : 0).ToArray());

            roc.Compute(100);

            // generate the scatter plot
            plot = roc.GetScatterplot(true);

            // show roc curve
            var box      = ScatterplotBox.Show(plot);
            var callback = new Action(() =>
            {
                box.SetLinesVisible(true);
                box.SetSymbolSize(0);
                box.SetScaleTight(true);
            });

            System.Threading.Thread.Sleep(100);
            box.Invoke(callback);

            // show the auc
            Console.WriteLine($"AUC:             {roc.Area}");
        }
示例#18
0
        /// <summary>
        /// The main application entry point.
        /// </summary>
        /// <param name="args">Command line arguments.</param>
        public static void Main(string[] args)
        {
            // get data
            Console.WriteLine("Loading data....");
            var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..");

            path = Path.Combine(path, "..");
            path = Path.Combine(path, "california_housing.csv");
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // convert the house value range to thousands
            housing["median_house_value"] /= 1000;

            // shuffle row indices
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            // shuffle the frame using the indices
            housing = housing.IndexRowsWith(indices).SortByRowKey();

            // create training, validation, and test frames
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            // plot the training data
            var x    = training["longitude"].Values.ToArray();
            var y    = training["latitude"].Values.ToArray();
            var plot = new Scatterplot("Training", "longitude", "latitude");

            plot.Compute(x, y);
            ScatterplotBox.Show(plot);

            // plot the test data
            var x2    = test["longitude"].Values.ToArray();
            var y2    = test["latitude"].Values.ToArray();
            var plot2 = new Scatterplot("Test", "longitude", "latitude");

            plot2.Compute(x2, y2);
            ScatterplotBox.Show(plot2);

            // set up training features and labels
            var training_features = training["median_income"].Values.ToArray();
            var training_labels   = training["median_house_value"].Values.ToArray();

            // train the model
            Console.WriteLine("Training model....");
            var learner = new OrdinaryLeastSquares();
            var model   = learner.Learn(training_features, training_labels);

            // show results
            Console.WriteLine("TRAINING RESULTS");
            Console.WriteLine($"Slope:       {model.Slope}");
            Console.WriteLine($"Intercept:   {model.Intercept}");
            Console.WriteLine();

            // get training predictions
            var training_predictions = model.Transform(training_features);

            // set up training features and labels
            var validation_features = validation["median_income"].Values.ToArray();
            var validation_labels   = validation["median_house_value"].Values.ToArray();

            // validate the model
            var validation_predictions = model.Transform(validation_features);
            var validation_rmse        = Math.Sqrt(new SquareLoss(validation_labels).Loss(validation_predictions));

            // show validation results
            var validation_range = Math.Abs(validation_labels.Max() - validation_labels.Min());

            Console.WriteLine("VALIDATION RESULTS");
            Console.WriteLine($"Label range: {validation_range}");
            Console.WriteLine($"RMSE:        {validation_rmse} {validation_rmse / validation_range * 100:0.00}%");
            Console.WriteLine();

            // set up test features and labels
            var test_features = test["median_income"].Values.ToArray();
            var test_labels   = test["median_house_value"].Values.ToArray();

            // validate the model
            var test_predictions = model.Transform(test_features);
            var test_rmse        = Math.Sqrt(new SquareLoss(test_labels).Loss(test_predictions));

            // show validation results
            var test_range = Math.Abs(test_labels.Max() - test_labels.Min());

            Console.WriteLine("TEST RESULTS");
            Console.WriteLine($"Label range: {test_range}");
            Console.WriteLine($"RMSE:        {test_rmse} {test_rmse / test_range * 100:0.00}%");
            Console.WriteLine();

            // show training plot
            x = training_features.Concat(training_features).ToArray();
            y = training_predictions.Concat(training_labels).ToArray();
            var colors1 = Enumerable.Repeat(1, training_labels.Length).ToArray();
            var colors2 = Enumerable.Repeat(2, training_labels.Length).ToArray();
            var c       = colors1.Concat(colors2).ToArray();

            plot = new Scatterplot("Training", "feature", "label");
            plot.Compute(x, y, c);
            ScatterplotBox.Show(plot);

            // show validation plot
            x       = validation_features.Concat(validation_features).ToArray();
            y       = validation_predictions.Concat(validation_labels).ToArray();
            colors1 = Enumerable.Repeat(1, validation_labels.Length).ToArray();
            colors2 = Enumerable.Repeat(2, validation_labels.Length).ToArray();
            c       = colors1.Concat(colors2).ToArray();
            plot    = new Scatterplot("Validation", "feature", "label");
            plot.Compute(x, y, c);
            ScatterplotBox.Show(plot);

            // show test plot
            x       = test_features.Concat(test_features).ToArray();
            y       = test_predictions.Concat(test_labels).ToArray();
            colors1 = Enumerable.Repeat(1, test_labels.Length).ToArray();
            colors2 = Enumerable.Repeat(2, test_labels.Length).ToArray();
            c       = colors1.Concat(colors2).ToArray();
            plot    = new Scatterplot("Test", "feature", "label");
            plot.Compute(x, y, c);
            ScatterplotBox.Show(plot);

            Console.ReadLine();
        }
示例#19
0
        /// <summary>
        /// The main application entry point.
        /// </summary>
        /// <param name="args">Command line arguments.</param>
        public static void Main(string[] args)
        {
            // get data
            Console.WriteLine("Loading data....");
            var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..");

            path = Path.Combine(path, "..");
            path = Path.Combine(path, "california_housing.csv");
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // convert the house value range to thousands
            housing["median_house_value"] /= 1000;

            // shuffle row indices
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            // shuffle the frame using the indices
            housing = housing.IndexRowsWith(indices).SortByRowKey();

            // create the rooms_per_person feature
            //housing.AddColumn("rooms_per_person", housing["total_rooms"] / housing["population"]);
            housing.AddColumn("rooms_per_person",
                              (housing["total_rooms"] / housing["population"]).Select(v => v.Value <= 4.0 ? v.Value : 4.0));

            // calculate the correlation matrix
            var correlation = Measures.Correlation(housing.ToArray2D <double>());

            // show the correlation matrix
            Console.WriteLine(housing.ColumnKeys.ToArray().ToString <string>());
            Console.WriteLine(correlation.ToString <double>("0.0"));

            // calculate binned latitudes
            var bins            = from b in Enumerable.Range(32, 10) select(Min: b, Max: b + 1);
            var binned_latitude =
                from l in housing["latitude"].Values
                let bin = (from b in bins where l >= b.Min && l < b.Max select b)
                          select bin.First().Min;

            // add one-hot encoding columns
            foreach (var i in Enumerable.Range(32, 10))
            {
                housing.AddColumn($"latitude {i}-{i + 1}",
                                  from l in binned_latitude
                                  select l == i ? 1 : 0);
            }

            // drop the latitude column
            housing.DropColumn("latitude");

            // show the data frame on the console
            housing.Print();

            // calculate rooms_per_person histogram
            var histogram = new Histogram();

            histogram.Compute(housing["rooms_per_person"].Values.ToArray(), 0.1); // use 1.0 without clipping

            // draw the histogram
            var x = new List <double>();
            var y = new List <double>();

            for (int i = 0; i < histogram.Values.Length; i++)
            {
                var xcor = histogram.Bins[i].Range.Min;
                x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor);
                y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0);
            }
            var plot = new Scatterplot("", "rooms per person", "count");

            plot.Compute(x.ToArray(), y.ToArray());
            ScatterplotBox.Show(plot);

            Console.ReadLine();
        }
示例#20
0
        /// <summary>
        /// The main application entry point.
        /// </summary>
        /// <param name="args">Command line arguments.</param>
        public static void Main(string[] args)
        {
            // get data
            Console.WriteLine("Loading data....");
            var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..");

            path = Path.Combine(path, "..");
            path = Path.Combine(path, "california_housing.csv");
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // shuffle the frame
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            housing = housing.IndexRowsWith(indices).SortByRowKey();

            // convert the house value range to thousands
            housing["median_house_value"] /= 1000;

            // create training, validation, and test frames
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            // set up model columns
            var columns = new string[] {
                "latitude",
                "longitude",
                "housing_median_age",
                "total_rooms",
                "total_bedrooms",
                "population",
                "households",
                "median_income"
            };

            // build a neural network
            var network = new ActivationNetwork(
                new RectifiedLinearFunction(),  // the activation function
                8,                              // number of input features
                8,                              // hidden layer with 8 nodes
                1);                             // output layer with 1 node

            // set up a backpropagation learner
            var learner = new ParallelResilientBackpropagationLearning(network);

            // prep training feature and label arrays
            var features = training.Columns[columns].ToArray2D <double>().ToJagged();
            var labels   = (from l in training["median_house_value"].Values
                            select new double[] { l }).ToArray();

            // prep validation feature and label arrays
            var features_v = validation.Columns[columns].ToArray2D <double>().ToJagged();
            var labels_v   = (from l in validation["median_house_value"].Values
                              select new double[] { l }).ToArray();

            // warm up the network
            network.Randomize();
            for (var i = 0; i < 15; i++)
            {
                learner.RunEpoch(features, labels);
            }

            // train the neural network
            var errors   = new List <double>();
            var errors_v = new List <double>();

            for (var epoch = 0; epoch < 100; epoch++)
            {
                learner.RunEpoch(features, labels);
                var rmse   = Math.Sqrt(2 * learner.ComputeError(features, labels) / labels.GetLength(0));
                var rmse_v = Math.Sqrt(2 * learner.ComputeError(features_v, labels_v) / labels_v.GetLength(0));
                errors.Add(rmse);
                errors_v.Add(rmse_v);
                Console.WriteLine($"Epoch: {epoch}, Training RMSE: {rmse}, Validation RMSE: {rmse_v}");
            }

            // plot the training curve
            var x    = Enumerable.Range(0, 100).Concat(Enumerable.Range(0, 100)).Select(v => (double)v).ToArray();
            var y    = errors.Concat(errors_v).ToArray();
            var sets = Enumerable.Repeat(1, 100).Concat(Enumerable.Repeat(2, 100)).ToArray();
            var plot = new Scatterplot("", "Epoch", "RMSE");

            plot.Compute(x, y, sets);
            ScatterplotBox.Show(plot);

            Console.ReadLine();
        }
示例#21
0
        /// <summary>
        /// The main application entry point.
        /// </summary>
        /// <param name="args">Command line arguments.</param>
        public static void Main(string[] args)
        {
            // get data
            Console.WriteLine("Loading data....");
            var path    = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\california_housing.csv"));
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // shuffle the frame
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            housing = housing.IndexRowsWith(indices).SortRowsByKey();

            // create the median_high_house_value feature
            housing.AddColumn("median_high_house_value",
                              housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0));

            // create one-hot vectors for longitude and latitude
            var vectors_long =
                from l in housing["longitude"].Values
                select Vector.Create <double>(
                    1,
                    (from b in RangeFromTo(-125, -114, 1)
                     select l >= b.Min && l < b.Max).ToArray());

            var vectors_lat =
                from l in housing["latitude"].Values
                select Vector.Create <double>(
                    1,
                    (from b in RangeFromTo(32, 43, 1)
                     select l >= b.Min && l < b.Max).ToArray());

            // multiply vectors and create columns
            var vectors_cross =
                vectors_long.Zip(vectors_lat, (lng, lat) => lng.Outer(lat));

            for (var i = 0; i < 12; i++)
            {
                for (var j = 0; j < 12; j++)
                {
                    housing.AddColumn($"location {i},{j}", from v in vectors_cross select v[i, j]);
                }
            }

            // set up model columns
            var columns = (from i in Enumerable.Range(0, 12)
                           from j in Enumerable.Range(0, 12)
                           select $"location {i},{j}").ToList();

            columns.Add("housing_median_age");
            columns.Add("total_rooms");
            columns.Add("total_bedrooms");
            columns.Add("population");
            columns.Add("households");
            columns.Add("median_income");

            // create training, validation, and test partitions
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            ////////////////////////////////////////////////////////////////////////
            // Without regularization
            ////////////////////////////////////////////////////////////////////////

            // train the model
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 50,
                Regularization = 0
            };
            var regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // display training results
            Console.WriteLine("TRAINING WITHOUT REGULARIZATION");
            Console.WriteLine($"Weights:     {regression.Weights.ToString<double>("0.00")}");
            Console.WriteLine($"Intercept:   {regression.Intercept}");
            Console.WriteLine();

            // plot a histogram of the nonzero weights
            var histogram = new Histogram();

            histogram.Compute(regression.Weights, 0.1); // set to 1.0 when regularization is disabled

            // draw the histogram
            var x = new List <double>();
            var y = new List <double>();

            for (int i = 0; i < histogram.Values.Length; i++)
            {
                var xcor = histogram.Bins[i].Range.Min;
                x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor);
                y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0);
            }
            var plot = new Scatterplot("Without Regularization", "prediction", "count");

            plot.Compute(x.ToArray(), y.ToArray());
            ScatterplotBox.Show(plot);

            ////////////////////////////////////////////////////////////////////////
            // With regularization
            ////////////////////////////////////////////////////////////////////////

            // train the model
            learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 50,
                Regularization = 50
            };
            regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // display training results
            Console.WriteLine("TRAINING WITH REGULARIZATION");
            Console.WriteLine($"Weights:     {regression.Weights.ToString<double>("0.00")}");
            Console.WriteLine($"Intercept:   {regression.Intercept}");
            Console.WriteLine();

            // plot a histogram of the nonzero weights
            histogram = new Histogram();
            histogram.Compute(regression.Weights, 0.1); // set to 1.0 when regularization is disabled

            // draw the histogram
            x = new List <double>();
            y = new List <double>();
            for (int i = 0; i < histogram.Values.Length; i++)
            {
                var xcor = histogram.Bins[i].Range.Min;
                x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor);
                y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0);
            }
            plot = new Scatterplot("With Regularization", "prediction", "count");
            plot.Compute(x.ToArray(), y.ToArray());
            ScatterplotBox.Show(plot);

            Console.ReadLine();
        }
        /// <summary>
        /// We are going to assume that there is a linear relationship between the total number of rooms in a housing block and the median house value of that same block.
        /// To test our hypothesis, we're going to run a linear regression on the data and see if we get a good fit.
        /// We're going to use the machine learning classes in the Accord.NET library. You can find them in the Accord.MachineLearning package.
        /// The Accord regression classes expect data in the form of array of double. So we need to convert our Deedle series into double arrays.
        /// The ValuesAll property is exactly what we need; it returns all values in the series as an enumeration.So we get the following code:
        /// </summary>
        /// <param name="total_rooms"></param>
        /// <param name="median_house_value"></param>
        private static void RunningSimpleLinearRegression(Series <int, double> total_rooms, Series <int, double> median_house_value)
        {
            // set up feature and label
            // This gets us both the input features (total_rooms) and the output labels (median_house_value) as arrays of double.
            var feature = total_rooms.Values.ToArray();
            var labels  = median_house_value.Values.ToArray();



            // The next step is to pick the learning algorithm.
            // We could use gradient descent, but since we're doing linear regression with only a single input feature, there is an even better solution
            // that will give us the perfect fit in just a single pass: the ### OrdinaryLeastSquares ### class. Here's how that works:

            // train the model
            // This code snippet will run a linear regression on the data, using the ordinary least squares algorithm to find the optimal solution.
            var learner = new OrdinaryLeastSquares();
            var model   = learner.Learn(feature, labels);

            // We can access the discovered model parameters by reading the Slope and Intercept properties, like this:
            Console.WriteLine($"Slope:      {model.Slope}");
            Console.WriteLine($"Intercept:  {model.Intercept}");

            // #####################
            // Validating The Result
            // #####################

            // So is this a good fit? To find out, we must validate the model. We can do this by running every single feature through the model; this will yield a set of predictions.
            // Then we can compare each prediction with the actual label, and calculate the Root Mean Squared Error (RMSE) value:

            // validate the model
            var predictions = model.Transform(feature);
            var rmse        = Math.Sqrt(new SquareLoss(labels).Loss(predictions));

            // The RMSE indicates the uncertainty in each prediction.
            // We can compare it to the range of labels to get a feel for the accuracy of the model:

            var range = Math.Abs(labels.Max() - labels.Min());

            Console.WriteLine($"Label range:    {range}");
            Console.WriteLine($"RMSE:           {rmse} {rmse / range * 100:0.00}%");

            // RESULTS
            // Slope:       0.006969381760507163
            // Intercept:   188.8762058206879
            // Label range: 485.00199999999995
            // RMSE:        114.98100785209695 23.71%

            // We get an RMSE of 114, which is more than 23% of the label range. That's not very good.


            // Let's plot the data and the regression line to get a better feel for the data.
            // Accord.NET has a built-in graph library for quickly creating scatterplots and histograms.
            // To use it, you first need to install the Accord.Controls Nuget package.

            // Now we need to get a little creative.
            // Accord can work with separate x- and y data arrays (corresponding nicely to our feature and labels variables),
            // but we need to plot two data series: the labels and the model predictions.
            // To get this to work, we need to concatenate the labels and predictions arrays together.
            // The following code sets up two x- and y value arrays for the plot:

            // generate plot arrays
            var x = feature.Concat(feature).ToArray();
            var y = predictions.Concat(labels).ToArray();

            // Finally, we need a third array to tell Accord what color to use when drawing the two series.
            // We will generate an array with the value 1 for all predictions, and 2 for all labels:

            // set up color array
            var colors1 = Enumerable.Repeat(1, labels.Length).ToArray();
            var colors2 = Enumerable.Repeat(2, labels.Length).ToArray();
            var c       = colors1.Concat(colors2).ToArray();

            // And now we can generate the scatterplot:

            // plot the data
            var plot = new Scatterplot("Training", "feature", "label");

            plot.Compute(x, y, c);

            // ScatterplotBox DOESN'T ACCEPT .NET CORE
            //ScatterplotBox.Show(plot);
        }
示例#23
0
        /// <summary>
        /// Run the lesson.
        /// </summary>
        public static void Run()
        {
            // get data
            Console.WriteLine("Loading data....");
            var path    = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\california_housing.csv"));
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // create the median_high_house_value feature
            housing.AddColumn("median_high_house_value",
                              housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0));

            // shuffle the frame
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            housing = housing.IndexRowsWith(indices).SortRowsByKey();

            // create training, validation, and test frames
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            // build the list of features we're going to use
            var columns = new string[] {
                "latitude",
                "longitude",
                "housing_median_age",
                "total_rooms",
                "total_bedrooms",
                "population",
                "households",
                "median_income"
            };

            // train the model using a linear regressor
            var learner = new OrdinaryLeastSquares()
            {
                IsRobust = true
            };
            var regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // get probabilities
            var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged();
            var label_validation    = validation["median_high_house_value"].Values.ToArray();
            var probabilities       = regression.Transform(features_validation);

            // calculate the histogram of probabilities
            var histogram = new Histogram();

            histogram.Compute(probabilities, 0.05);

            // draw the histogram
            var x = new List <double>();
            var y = new List <double>();

            for (int i = 0; i < histogram.Values.Length; i++)
            {
                var xcor = histogram.Bins[i].Range.Min;
                x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor);
                y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0);
            }
            var plot = new Scatterplot("", "prediction", "count");

            plot.Compute(x.ToArray(), y.ToArray());
            ScatterplotBox.Show(plot);
        }
示例#24
0
        /// <summary>
        /// The main application entry point.
        /// </summary>
        /// <param name="args">Command line arguments.</param>
        public static void Main(string[] args)
        {
            // read data
            Console.WriteLine("Loading data....");
            var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..");

            path = Path.Combine(path, "..");
            path = Path.Combine(path, "handwritten_digits_medium.csv");
            var digits = Frame.ReadCsv(path, separators: ",", hasHeaders: false);

            Console.WriteLine($"    {digits.RowCount} rows loaded");

            // normalize pixel values to 0..1
            for (var i = 2; i <= 785; i++)
            {
                digits[$"Column{i}"] /= 255.0;
            }

            // grab a random digit
            var rnd   = new Random();
            var row   = rnd.Next(1, digits.RowCount);
            var label = digits.Rows[row]["Column1"].ToString();

            // plot the digit
            var x = Enumerable.Range(0, 784).Select(v => (double)(v % 28));
            var y = Enumerable.Range(0, 784).Select(v => (double)(-v / 28));
            var z = from i in Enumerable.Range(2, 784)
                    let v = (double)digits.Rows[row][$"Column{i}"]
                            select v > 0.5 ? 1 : 0;
            Scatterplot plot = new Scatterplot($"Digit {label}", "x", "y");

            plot.Compute(x.ToArray(), y.ToArray(), z.ToArray());
            ScatterplotBox.Show(plot);

            // create one-hot label columns
            for (var i = 0; i < 10; i++)
            {
                digits.AddColumn($"Label{i}", from v in digits["Column1"].Values select(int) v == i ? 1.0 : 0.0);
            }

            // print label columns
            digits.Columns[new string[] { "Column1", "Label0", "Label1", "Label2", "Label3", "Label4",
                                          "Label5", "Label6", "Label7", "Label8", "Label9" }].Print();

            // create training and validation partitions
            var numRows    = digits.RowKeys.Count();
            var pivot      = (int)(numRows * 0.8);
            var training   = digits.Rows[Enumerable.Range(0, pivot)];
            var validation = digits.Rows[Enumerable.Range(pivot, numRows - pivot)];

            // set up feature and label column names
            var featureColumns = Enumerable.Range(2, 784).Select(v => $"Column{v}").ToArray();
            var labelColumns   = Enumerable.Range(0, 10).Select(v => $"Label{v}").ToArray();

            // set up feature and label arrays
            var features = training.Columns[featureColumns].ToArray2D <double>().ToJagged();
            var labels   = training.Columns[labelColumns].ToArray2D <double>().ToJagged();

            // build a neural network
            var network = new ActivationNetwork(
                new SigmoidFunction(),
                784,
                100,
                100,
                10);

            // randomize network weights
            new GaussianWeights(network, 0.1).Randomize();

            // set up a backpropagation learner
            var learner = new BackPropagationLearning(network)
            {
                LearningRate = 0.05
            };

            // train the network and validate it in each epoch
            Console.WriteLine("Training neural network....");
            var errors           = new List <double>();
            var validationErrors = new List <double>();

            for (var epoch = 0; epoch < 50; epoch++)
            {
                var error           = learner.RunEpoch(features, labels) / labels.GetLength(0);
                var validationError = Validate(validation, network);
                errors.Add(error);
                validationErrors.Add(validationError);
                Console.WriteLine($"Epoch: {epoch}, Training error: {error}, Validation error: {validationError}");
            }

            // test the network on the validation data
            Console.WriteLine($"Validating neural network on {validation.RowCount} records....");
            int mistakes = 0;

            foreach (var key in validation.RowKeys)
            {
                var record      = validation.Rows[key];
                var digit       = (int)record.Values.First();
                var input       = record.Values.Skip(1).Take(784).Cast <double>();
                var predictions = network.Compute(input.ToArray());
                // Console.Write($"    {digit}: {predictions.ToString("0.00")} ");

                // calculate best prediction
                var best = Enumerable.Range(0, 10)
                           .Select(v => new { Digit = v, Prediction = predictions[v] })
                           .OrderByDescending(v => v.Prediction)
                           .First();
                //Console.Write($" -> {digit} = {best.Digit} ({100 * best.Prediction:0.00}%) ");

                // count incorrect predictions
                if (best.Digit != digit)
                {
                    Console.Write($"    {digit}: {predictions.ToString("0.00")} ");
                    Console.WriteLine($" -> {digit} = {best.Digit} ({100 * best.Prediction:0.00}%) WRONG");
                    //Console.Write("WRONG");
                    mistakes++;
                }
                //Console.WriteLine();
            }

            // report total mistakes
            var accuracy = 100.0 * (validation.Rows.KeyCount - mistakes) / validation.Rows.KeyCount;

            Console.WriteLine($"Total mistakes: {mistakes}, Accuracy: {accuracy:0.00}%");

            // plot the training and validation curves
            var tmp = Enumerable.Range(1, 50).Select(v => (double)v);

            x    = tmp.Concat(tmp);
            y    = errors.Concat(validationErrors);
            z    = Enumerable.Repeat(1, 50).Concat(Enumerable.Repeat(2, 50));
            plot = new Scatterplot("Training & validation curves", "epochs", "training error");
            plot.Compute(x.ToArray(), y.ToArray(), z.ToArray());
            ScatterplotBox.Show(plot);

            Console.ReadLine();
        }