private static ScatterplotBox show(String title, Func <double, double> function, double?min, double?max, double?step) { if (min == null || max == null) { DoubleRange range; if (GetRange(function, out range)) { min = range.Min; max = range.Max; } else { min = 0; max = 1; } } if (step == null) { step = (max - min) / 1000; } double[] input = Matrix.Interval(min.Value, max.Value, step.Value); double[] output = Matrix.Apply(input, function); Scatterplot scatterplot = new Scatterplot(title ?? "Scatter plot"); scatterplot.Compute(input, output); return(show(scatterplot)); }
public void PlotTrainingAndValidationCurves( IEnumerable <double> errors, IEnumerable <double> validationErrors, int epochCount) { IEnumerable <double> tmp = Enumerable .Range(1, epochCount) .Select(v => (double)v) .ToList(); double[] x = tmp.Concat(tmp).ToArray(); double[] y = errors.Concat(validationErrors).ToArray(); int[] z = Enumerable .Repeat(1, epochCount) .Concat(Enumerable.Repeat(2, epochCount)) .ToArray(); Scatterplot plot = new Scatterplot( TrainingAndValidationPlotResources.Title, TrainingAndValidationPlotResources.XAxisTitle, TrainingAndValidationPlotResources.YAxisTitle); plot.Compute(x, y, z); ScatterplotBox.Show(plot); }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // get data Console.WriteLine("Loading data...."); var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, ".."); path = Path.Combine(path, ".."); path = Path.Combine(path, "california_housing.csv"); var housing = Frame.ReadCsv(path, separators: ","); // housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // set up a few series var total_rooms = housing["total_rooms"]; var median_house_value = housing["median_house_value"]; var median_income = housing["median_income"]; // convert the house value range to thousands median_house_value /= 1000; // set up feature and label var feature = total_rooms.Values.ToArray(); // var feature = median_income.Values.ToArray(); var labels = median_house_value.Values.ToArray(); // train the model Console.WriteLine("Training model...."); var learner = new OrdinaryLeastSquares(); var model = learner.Learn(feature, labels); // show results Console.WriteLine($"Slope: {model.Slope}"); Console.WriteLine($"Intercept: {model.Intercept}"); // validate the model var predictions = model.Transform(feature); var rmse = Math.Sqrt(new SquareLoss(labels).Loss(predictions)); var range = Math.Abs(labels.Max() - labels.Min()); Console.WriteLine($"Label range: {range}"); Console.WriteLine($"RMSE: {rmse} {rmse / range * 100:0.00}%"); // generate plot arrays var x = feature.Concat(feature).ToArray(); var y = predictions.Concat(labels).ToArray(); // set up color array var colors1 = Enumerable.Repeat(1, labels.Length).ToArray(); var colors2 = Enumerable.Repeat(2, labels.Length).ToArray(); var c = colors1.Concat(colors2).ToArray(); // plot the data var plot = new Scatterplot("Training", "feature", "label"); plot.Compute(x, y, c); ScatterplotBox.Show(plot); Console.ReadLine(); }
/// <summary> /// Displays a scatter plot with the specified data. /// </summary> /// /// <param name="title">The title for the plot window.</param> /// <param name="x">A two column matrix containing the (x,y) data pairs as rows.</param> /// <param name="z">The corresponding labels for the (x,y) pairs.</param> /// public static ScatterplotBox Show(string title, double[][] x, int[] z) { Scatterplot scatterplot = new Scatterplot(title); scatterplot.Compute(x, z); return(show(scatterplot)); }
/// <summary> /// Displays a scatter plot with the specified data. /// </summary> /// /// <param name="title">The title for the plot window.</param> /// <param name="x">A two column matrix containing the (x,y) data pairs as rows.</param> /// <param name="z">The corresponding labels for the (x,y) pairs.</param> /// <param name="nonBlocking">If set to <c>true</c>, the caller will continue /// executing while the form is shown on screen. If set to <c>false</c>, /// the caller will be blocked until the user closes the form. Default /// is <c>false</c>.</param> /// public static ScatterplotBox Show(string title, double[][] x, int[] z = null, bool nonBlocking = false) { Scatterplot scatterplot = new Scatterplot(title); scatterplot.Compute(x, z); return(show(scatterplot, nonBlocking)); }
/// <summary> /// Displays a scatter plot with the specified data. /// </summary> /// /// <param name="title">The title for the plot window.</param> /// <param name="x">A two column matrix containing the (x,y) data pairs as rows.</param> /// public static ScatterplotBox Show(string title, double[,] x) { Scatterplot scatterplot = new Scatterplot(title); scatterplot.Compute(x); return(show(scatterplot)); }
/// <summary> /// Generates a <see cref="Scatterplot"/> representing the ROC curve. /// </summary> /// /// <param name="includeRandom"> /// True to include a plot of the random curve (a diagonal line /// going from lower left to upper right); false otherwise.</param> /// public Scatterplot GetScatterplot(bool includeRandom = false) { Scatterplot plot = new Scatterplot("Area under the ROC curve"); plot.XAxisTitle = "1 - Specificity"; plot.YAxisTitle = "Sensitivity"; double[] x = Points.GetOneMinusSpecificity(); double[] y = Points.GetSensitivity(); if (includeRandom) { int points = x.Length; double[] newx = new double[points + 2]; double[] newy = new double[points + 2]; int[] labels = new int[points + 2]; Array.Copy(x, newx, x.Length); Array.Copy(y, newy, y.Length); newx[points + 0] = 0; newy[points + 0] = 0; labels[points + 0] = 1; newx[points + 1] = 1; newy[points + 1] = 1; labels[points + 1] = 1; plot.Compute(newx, newy, labels); plot.Classes[0].Text = "Curve"; plot.Classes[1].Text = "Random"; } else { plot.Compute(x, y); } return(plot); }
/// <summary> /// Plot the training errors. /// </summary> /// <param name="trainingErrors">The traininer errors to plot</param> /// <param name="title">The chart title</param> /// <param name="xAxisLabel">The chart x-ais label</param> /// <param name="yAxisLabel">The chart y-axis label</param> private static void Plot( List <double> trainingErrors, string title, string xAxisLabel, string yAxisLabel) { var epochs = trainingErrors.Count(); var x = Enumerable.Range(0, epochs).Select(v => (double)v).ToArray(); var y = trainingErrors.ToArray(); var plot = new Scatterplot(title, xAxisLabel, yAxisLabel); plot.Compute(x, y); ScatterplotBox.Show(plot); }
private void featuresListBox_SelectedIndexChanged(object sender, EventArgs e) { if (!isDataLoaded) { Utilities.InfoMessageBox("Please load data or wait until data is loaded first."); return; } var selectedFeature = featuresListBox.SelectedItem as string; var featureIndex = dataSet.dataFull.Columns.IndexOf(selectedFeature); Scatterplot plot = new Scatterplot(selectedFeature); plot.Compute(dataSet.X_Test.GetColumn(featureIndex), dataSet.Y_Test.Select(x => (double)x).ToArray <double>()); scatterplotView.Scatterplot = plot; }
/// <summary> /// Plot a histogram. /// </summary> /// <param name="histogram">The histogram to plot</param> /// <param name="title">The plot title</param> /// <param name="xAxisLabel">The x-axis label</param> /// <param name="yAxisLabel">The y-axis label</param> private static void Plot(Histogram histogram, string title, string xAxisLabel, string yAxisLabel) { var x = new List <double>(); var y = new List <double>(); for (int i = 0; i < histogram.Values.Length; i++) { var xcor = histogram.Bins[i].Range.Min; x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor); y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0); } var plot = new Scatterplot(title, xAxisLabel, yAxisLabel); plot.Compute(x.ToArray(), y.ToArray()); ScatterplotBox.Show(plot); }
public void ComputeTest2() { ScatterplotView target = new ScatterplotView(); Scatterplot histogram = new Scatterplot(); histogram.Compute(new double[] { 200.0, 200.0, 200.0 }); target.DataSource = null; target.DataSource = histogram; target.DataSource = null; // ScatterplotBox.Show(histogram); }
/// <summary> /// Plot a graph on screen. /// </summary> /// <param name="xSeries">The x-series to plot</param> /// <param name="ySeries">The y-series to plot</param> /// <param name="title">The plot title</param> /// <param name="xAxisLabel">The x-axis label</param> /// <param name="yAxisLabel">The y-axis label</param> public static void Plot( Series <int, double> xSeries, Series <int, double> ySeries, string title, string xAxisLabel, string yAxisLabel) { // generate plot arrays var x = xSeries.Values.ToArray(); var y = ySeries.Values.ToArray(); // plot the graph var plot = new Scatterplot(title, xAxisLabel, yAxisLabel); plot.Compute(x, y); ScatterplotBox.Show(plot); }
public void PlotValidationCurve(IEnumerable <double> errors, int epochCount) { double[] x = Enumerable .Range(1, epochCount) .Select(v => (double)v) .ToArray(); double[] y = errors.ToArray(); Scatterplot plot = new Scatterplot( "График изменения квадратичной ошибки тестирования", TrainingPlotResources.YAxisTitle, "Ошибки тестирования"); plot.Compute(x, y); ScatterplotBox.Show(plot); }
public void PlotTrainingCurve(IEnumerable <double> errors, int epochCount) { double[] x = Enumerable .Range(1, epochCount) .Select(v => (double)v) .ToArray(); double[] y = errors.ToArray(); Scatterplot plot = new Scatterplot( TrainingPlotResources.Title, TrainingPlotResources.YAxisTitle, TrainingPlotResources.XAxisTitle); plot.Compute(x, y); ScatterplotBox.Show(plot); }
private void PrintRandomDigit(Frame <int, string> training) { Console.WriteLine(@"Вывод случайного тренировочного образца..."); Random rnd = new Random(); int row = rnd.Next(1, training.RowCount); string randomDigit = training.Rows[row]["Column1"].ToString(); double[] x = Enumerable.Range(0, 784).Select(v => (double)(v % 28)).ToArray(); double[] y = Enumerable.Range(0, 784).Select(v => (double)(-v / 28)).ToArray(); int[] z = Enumerable.Range(2, 784) .Select(i => new { i, v = training.Rows[row][$"Column{i}"] as double? }) .Select(t => t.v > 0.5 ? 1 : 0).ToArray(); Scatterplot plot = new Scatterplot($"Цифра {randomDigit}", "x", "y"); plot.Compute(x, y, z); ScatterplotBox.Show(plot); }
/// <summary> /// Plot a graph on screen. /// </summary> /// <param name="feature">The features to plot</param> /// <param name="labels">The labels to plot</param> /// <param name="predictions">The predictions to plot</param> /// <param name="title">The plot title</param> /// <param name="xAxisLabel">The x-axis label</param> /// <param name="yAxisLabel">The y-axis label</param> public static void Plot( double[] feature, double[] labels, double[] predictions, string title, string xAxisLabel, string yAxisLabel) { // generate plot arrays var x = feature.Concat(feature).ToArray(); var y = predictions.Concat(labels).ToArray(); // set up color arrays var colors1 = Enumerable.Repeat(1, labels.Length).ToArray(); var colors2 = Enumerable.Repeat(2, labels.Length).ToArray(); var c = colors1.Concat(colors2).ToArray(); // plot the graph var plot = new Scatterplot(title, xAxisLabel, yAxisLabel); plot.Compute(x, y, c); ScatterplotBox.Show(plot); }
/// <summary> /// Run the lesson. /// </summary> public static void Run() { // get data Console.WriteLine("Loading data...."); var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, ".."); path = Path.Combine(path, ".."); path = Path.Combine(path, "california_housing.csv"); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // create the median_high_house_value feature housing.AddColumn("median_high_house_value", housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0)); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortByRowKey(); // create training, validation, and test frames var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; // build the list of features we're going to use var columns = new string[] { "latitude", "longitude", "housing_median_age", "total_rooms", "total_bedrooms", "population", "households", "median_income" }; // train the model using a linear regressor // train the model using a logistic regressor var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100 }; var regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // get probabilities var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged(); var label_validation = validation["median_high_house_value"].Values.ToArray(); var probabilities = regression.Probability(features_validation); // calculate the histogram of probabilities var histogram = new Histogram(); histogram.Compute(probabilities, 0.05); // draw the histogram var x = new List <double>(); var y = new List <double>(); for (int i = 0; i < histogram.Values.Length; i++) { var xcor = histogram.Bins[i].Range.Min; x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor); y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0); } var plot = new Scatterplot("", "prediction", "count"); plot.Compute(x.ToArray(), y.ToArray()); ScatterplotBox.Show(plot); // get predictions and actuals var predictions = regression.Decide(features_validation); var actuals = label_validation.Select(v => v == 1.0 ? true : false).ToArray(); // create confusion matrix var confusion = new ConfusionMatrix(predictions, actuals); // display classification scores Console.WriteLine($"True Positives: {confusion.TruePositives}"); Console.WriteLine($"True Negatives: {confusion.TrueNegatives}"); Console.WriteLine($"False Positives: {confusion.FalsePositives}"); Console.WriteLine($"False Negatives: {confusion.FalseNegatives}"); Console.WriteLine(); // display accuracy, precision, and recall Console.WriteLine($"Accuracy: {confusion.Accuracy}"); Console.WriteLine($"Precision: {confusion.Precision}"); Console.WriteLine($"Recall: {confusion.Recall}"); Console.WriteLine(); // display TPR and FPR Console.WriteLine($"TPR: {confusion.Sensitivity}"); Console.WriteLine($"FPR: {confusion.FalsePositiveRate}"); Console.WriteLine(); // calculate roc curve var roc = new ReceiverOperatingCharacteristic( actuals, predictions.Select(v => v ? 1 : 0).ToArray()); roc.Compute(100); // generate the scatter plot plot = roc.GetScatterplot(true); // show roc curve var box = ScatterplotBox.Show(plot); var callback = new Action(() => { box.SetLinesVisible(true); box.SetSymbolSize(0); box.SetScaleTight(true); }); System.Threading.Thread.Sleep(100); box.Invoke(callback); // show the auc Console.WriteLine($"AUC: {roc.Area}"); }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // get data Console.WriteLine("Loading data...."); var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, ".."); path = Path.Combine(path, ".."); path = Path.Combine(path, "california_housing.csv"); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // convert the house value range to thousands housing["median_house_value"] /= 1000; // shuffle row indices var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); // shuffle the frame using the indices housing = housing.IndexRowsWith(indices).SortByRowKey(); // create training, validation, and test frames var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; // plot the training data var x = training["longitude"].Values.ToArray(); var y = training["latitude"].Values.ToArray(); var plot = new Scatterplot("Training", "longitude", "latitude"); plot.Compute(x, y); ScatterplotBox.Show(plot); // plot the test data var x2 = test["longitude"].Values.ToArray(); var y2 = test["latitude"].Values.ToArray(); var plot2 = new Scatterplot("Test", "longitude", "latitude"); plot2.Compute(x2, y2); ScatterplotBox.Show(plot2); // set up training features and labels var training_features = training["median_income"].Values.ToArray(); var training_labels = training["median_house_value"].Values.ToArray(); // train the model Console.WriteLine("Training model...."); var learner = new OrdinaryLeastSquares(); var model = learner.Learn(training_features, training_labels); // show results Console.WriteLine("TRAINING RESULTS"); Console.WriteLine($"Slope: {model.Slope}"); Console.WriteLine($"Intercept: {model.Intercept}"); Console.WriteLine(); // get training predictions var training_predictions = model.Transform(training_features); // set up training features and labels var validation_features = validation["median_income"].Values.ToArray(); var validation_labels = validation["median_house_value"].Values.ToArray(); // validate the model var validation_predictions = model.Transform(validation_features); var validation_rmse = Math.Sqrt(new SquareLoss(validation_labels).Loss(validation_predictions)); // show validation results var validation_range = Math.Abs(validation_labels.Max() - validation_labels.Min()); Console.WriteLine("VALIDATION RESULTS"); Console.WriteLine($"Label range: {validation_range}"); Console.WriteLine($"RMSE: {validation_rmse} {validation_rmse / validation_range * 100:0.00}%"); Console.WriteLine(); // set up test features and labels var test_features = test["median_income"].Values.ToArray(); var test_labels = test["median_house_value"].Values.ToArray(); // validate the model var test_predictions = model.Transform(test_features); var test_rmse = Math.Sqrt(new SquareLoss(test_labels).Loss(test_predictions)); // show validation results var test_range = Math.Abs(test_labels.Max() - test_labels.Min()); Console.WriteLine("TEST RESULTS"); Console.WriteLine($"Label range: {test_range}"); Console.WriteLine($"RMSE: {test_rmse} {test_rmse / test_range * 100:0.00}%"); Console.WriteLine(); // show training plot x = training_features.Concat(training_features).ToArray(); y = training_predictions.Concat(training_labels).ToArray(); var colors1 = Enumerable.Repeat(1, training_labels.Length).ToArray(); var colors2 = Enumerable.Repeat(2, training_labels.Length).ToArray(); var c = colors1.Concat(colors2).ToArray(); plot = new Scatterplot("Training", "feature", "label"); plot.Compute(x, y, c); ScatterplotBox.Show(plot); // show validation plot x = validation_features.Concat(validation_features).ToArray(); y = validation_predictions.Concat(validation_labels).ToArray(); colors1 = Enumerable.Repeat(1, validation_labels.Length).ToArray(); colors2 = Enumerable.Repeat(2, validation_labels.Length).ToArray(); c = colors1.Concat(colors2).ToArray(); plot = new Scatterplot("Validation", "feature", "label"); plot.Compute(x, y, c); ScatterplotBox.Show(plot); // show test plot x = test_features.Concat(test_features).ToArray(); y = test_predictions.Concat(test_labels).ToArray(); colors1 = Enumerable.Repeat(1, test_labels.Length).ToArray(); colors2 = Enumerable.Repeat(2, test_labels.Length).ToArray(); c = colors1.Concat(colors2).ToArray(); plot = new Scatterplot("Test", "feature", "label"); plot.Compute(x, y, c); ScatterplotBox.Show(plot); Console.ReadLine(); }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // get data Console.WriteLine("Loading data...."); var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, ".."); path = Path.Combine(path, ".."); path = Path.Combine(path, "california_housing.csv"); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // convert the house value range to thousands housing["median_house_value"] /= 1000; // shuffle row indices var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); // shuffle the frame using the indices housing = housing.IndexRowsWith(indices).SortByRowKey(); // create the rooms_per_person feature //housing.AddColumn("rooms_per_person", housing["total_rooms"] / housing["population"]); housing.AddColumn("rooms_per_person", (housing["total_rooms"] / housing["population"]).Select(v => v.Value <= 4.0 ? v.Value : 4.0)); // calculate the correlation matrix var correlation = Measures.Correlation(housing.ToArray2D <double>()); // show the correlation matrix Console.WriteLine(housing.ColumnKeys.ToArray().ToString <string>()); Console.WriteLine(correlation.ToString <double>("0.0")); // calculate binned latitudes var bins = from b in Enumerable.Range(32, 10) select(Min: b, Max: b + 1); var binned_latitude = from l in housing["latitude"].Values let bin = (from b in bins where l >= b.Min && l < b.Max select b) select bin.First().Min; // add one-hot encoding columns foreach (var i in Enumerable.Range(32, 10)) { housing.AddColumn($"latitude {i}-{i + 1}", from l in binned_latitude select l == i ? 1 : 0); } // drop the latitude column housing.DropColumn("latitude"); // show the data frame on the console housing.Print(); // calculate rooms_per_person histogram var histogram = new Histogram(); histogram.Compute(housing["rooms_per_person"].Values.ToArray(), 0.1); // use 1.0 without clipping // draw the histogram var x = new List <double>(); var y = new List <double>(); for (int i = 0; i < histogram.Values.Length; i++) { var xcor = histogram.Bins[i].Range.Min; x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor); y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0); } var plot = new Scatterplot("", "rooms per person", "count"); plot.Compute(x.ToArray(), y.ToArray()); ScatterplotBox.Show(plot); Console.ReadLine(); }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // get data Console.WriteLine("Loading data...."); var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, ".."); path = Path.Combine(path, ".."); path = Path.Combine(path, "california_housing.csv"); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortByRowKey(); // convert the house value range to thousands housing["median_house_value"] /= 1000; // create training, validation, and test frames var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; // set up model columns var columns = new string[] { "latitude", "longitude", "housing_median_age", "total_rooms", "total_bedrooms", "population", "households", "median_income" }; // build a neural network var network = new ActivationNetwork( new RectifiedLinearFunction(), // the activation function 8, // number of input features 8, // hidden layer with 8 nodes 1); // output layer with 1 node // set up a backpropagation learner var learner = new ParallelResilientBackpropagationLearning(network); // prep training feature and label arrays var features = training.Columns[columns].ToArray2D <double>().ToJagged(); var labels = (from l in training["median_house_value"].Values select new double[] { l }).ToArray(); // prep validation feature and label arrays var features_v = validation.Columns[columns].ToArray2D <double>().ToJagged(); var labels_v = (from l in validation["median_house_value"].Values select new double[] { l }).ToArray(); // warm up the network network.Randomize(); for (var i = 0; i < 15; i++) { learner.RunEpoch(features, labels); } // train the neural network var errors = new List <double>(); var errors_v = new List <double>(); for (var epoch = 0; epoch < 100; epoch++) { learner.RunEpoch(features, labels); var rmse = Math.Sqrt(2 * learner.ComputeError(features, labels) / labels.GetLength(0)); var rmse_v = Math.Sqrt(2 * learner.ComputeError(features_v, labels_v) / labels_v.GetLength(0)); errors.Add(rmse); errors_v.Add(rmse_v); Console.WriteLine($"Epoch: {epoch}, Training RMSE: {rmse}, Validation RMSE: {rmse_v}"); } // plot the training curve var x = Enumerable.Range(0, 100).Concat(Enumerable.Range(0, 100)).Select(v => (double)v).ToArray(); var y = errors.Concat(errors_v).ToArray(); var sets = Enumerable.Repeat(1, 100).Concat(Enumerable.Repeat(2, 100)).ToArray(); var plot = new Scatterplot("", "Epoch", "RMSE"); plot.Compute(x, y, sets); ScatterplotBox.Show(plot); Console.ReadLine(); }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // get data Console.WriteLine("Loading data...."); var path = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\california_housing.csv")); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortRowsByKey(); // create the median_high_house_value feature housing.AddColumn("median_high_house_value", housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0)); // create one-hot vectors for longitude and latitude var vectors_long = from l in housing["longitude"].Values select Vector.Create <double>( 1, (from b in RangeFromTo(-125, -114, 1) select l >= b.Min && l < b.Max).ToArray()); var vectors_lat = from l in housing["latitude"].Values select Vector.Create <double>( 1, (from b in RangeFromTo(32, 43, 1) select l >= b.Min && l < b.Max).ToArray()); // multiply vectors and create columns var vectors_cross = vectors_long.Zip(vectors_lat, (lng, lat) => lng.Outer(lat)); for (var i = 0; i < 12; i++) { for (var j = 0; j < 12; j++) { housing.AddColumn($"location {i},{j}", from v in vectors_cross select v[i, j]); } } // set up model columns var columns = (from i in Enumerable.Range(0, 12) from j in Enumerable.Range(0, 12) select $"location {i},{j}").ToList(); columns.Add("housing_median_age"); columns.Add("total_rooms"); columns.Add("total_bedrooms"); columns.Add("population"); columns.Add("households"); columns.Add("median_income"); // create training, validation, and test partitions var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; //////////////////////////////////////////////////////////////////////// // Without regularization //////////////////////////////////////////////////////////////////////// // train the model var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 50, Regularization = 0 }; var regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // display training results Console.WriteLine("TRAINING WITHOUT REGULARIZATION"); Console.WriteLine($"Weights: {regression.Weights.ToString<double>("0.00")}"); Console.WriteLine($"Intercept: {regression.Intercept}"); Console.WriteLine(); // plot a histogram of the nonzero weights var histogram = new Histogram(); histogram.Compute(regression.Weights, 0.1); // set to 1.0 when regularization is disabled // draw the histogram var x = new List <double>(); var y = new List <double>(); for (int i = 0; i < histogram.Values.Length; i++) { var xcor = histogram.Bins[i].Range.Min; x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor); y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0); } var plot = new Scatterplot("Without Regularization", "prediction", "count"); plot.Compute(x.ToArray(), y.ToArray()); ScatterplotBox.Show(plot); //////////////////////////////////////////////////////////////////////// // With regularization //////////////////////////////////////////////////////////////////////// // train the model learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 50, Regularization = 50 }; regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // display training results Console.WriteLine("TRAINING WITH REGULARIZATION"); Console.WriteLine($"Weights: {regression.Weights.ToString<double>("0.00")}"); Console.WriteLine($"Intercept: {regression.Intercept}"); Console.WriteLine(); // plot a histogram of the nonzero weights histogram = new Histogram(); histogram.Compute(regression.Weights, 0.1); // set to 1.0 when regularization is disabled // draw the histogram x = new List <double>(); y = new List <double>(); for (int i = 0; i < histogram.Values.Length; i++) { var xcor = histogram.Bins[i].Range.Min; x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor); y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0); } plot = new Scatterplot("With Regularization", "prediction", "count"); plot.Compute(x.ToArray(), y.ToArray()); ScatterplotBox.Show(plot); Console.ReadLine(); }
/// <summary> /// We are going to assume that there is a linear relationship between the total number of rooms in a housing block and the median house value of that same block. /// To test our hypothesis, we're going to run a linear regression on the data and see if we get a good fit. /// We're going to use the machine learning classes in the Accord.NET library. You can find them in the Accord.MachineLearning package. /// The Accord regression classes expect data in the form of array of double. So we need to convert our Deedle series into double arrays. /// The ValuesAll property is exactly what we need; it returns all values in the series as an enumeration.So we get the following code: /// </summary> /// <param name="total_rooms"></param> /// <param name="median_house_value"></param> private static void RunningSimpleLinearRegression(Series <int, double> total_rooms, Series <int, double> median_house_value) { // set up feature and label // This gets us both the input features (total_rooms) and the output labels (median_house_value) as arrays of double. var feature = total_rooms.Values.ToArray(); var labels = median_house_value.Values.ToArray(); // The next step is to pick the learning algorithm. // We could use gradient descent, but since we're doing linear regression with only a single input feature, there is an even better solution // that will give us the perfect fit in just a single pass: the ### OrdinaryLeastSquares ### class. Here's how that works: // train the model // This code snippet will run a linear regression on the data, using the ordinary least squares algorithm to find the optimal solution. var learner = new OrdinaryLeastSquares(); var model = learner.Learn(feature, labels); // We can access the discovered model parameters by reading the Slope and Intercept properties, like this: Console.WriteLine($"Slope: {model.Slope}"); Console.WriteLine($"Intercept: {model.Intercept}"); // ##################### // Validating The Result // ##################### // So is this a good fit? To find out, we must validate the model. We can do this by running every single feature through the model; this will yield a set of predictions. // Then we can compare each prediction with the actual label, and calculate the Root Mean Squared Error (RMSE) value: // validate the model var predictions = model.Transform(feature); var rmse = Math.Sqrt(new SquareLoss(labels).Loss(predictions)); // The RMSE indicates the uncertainty in each prediction. // We can compare it to the range of labels to get a feel for the accuracy of the model: var range = Math.Abs(labels.Max() - labels.Min()); Console.WriteLine($"Label range: {range}"); Console.WriteLine($"RMSE: {rmse} {rmse / range * 100:0.00}%"); // RESULTS // Slope: 0.006969381760507163 // Intercept: 188.8762058206879 // Label range: 485.00199999999995 // RMSE: 114.98100785209695 23.71% // We get an RMSE of 114, which is more than 23% of the label range. That's not very good. // Let's plot the data and the regression line to get a better feel for the data. // Accord.NET has a built-in graph library for quickly creating scatterplots and histograms. // To use it, you first need to install the Accord.Controls Nuget package. // Now we need to get a little creative. // Accord can work with separate x- and y data arrays (corresponding nicely to our feature and labels variables), // but we need to plot two data series: the labels and the model predictions. // To get this to work, we need to concatenate the labels and predictions arrays together. // The following code sets up two x- and y value arrays for the plot: // generate plot arrays var x = feature.Concat(feature).ToArray(); var y = predictions.Concat(labels).ToArray(); // Finally, we need a third array to tell Accord what color to use when drawing the two series. // We will generate an array with the value 1 for all predictions, and 2 for all labels: // set up color array var colors1 = Enumerable.Repeat(1, labels.Length).ToArray(); var colors2 = Enumerable.Repeat(2, labels.Length).ToArray(); var c = colors1.Concat(colors2).ToArray(); // And now we can generate the scatterplot: // plot the data var plot = new Scatterplot("Training", "feature", "label"); plot.Compute(x, y, c); // ScatterplotBox DOESN'T ACCEPT .NET CORE //ScatterplotBox.Show(plot); }
/// <summary> /// Run the lesson. /// </summary> public static void Run() { // get data Console.WriteLine("Loading data...."); var path = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\california_housing.csv")); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // create the median_high_house_value feature housing.AddColumn("median_high_house_value", housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0)); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortRowsByKey(); // create training, validation, and test frames var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; // build the list of features we're going to use var columns = new string[] { "latitude", "longitude", "housing_median_age", "total_rooms", "total_bedrooms", "population", "households", "median_income" }; // train the model using a linear regressor var learner = new OrdinaryLeastSquares() { IsRobust = true }; var regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // get probabilities var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged(); var label_validation = validation["median_high_house_value"].Values.ToArray(); var probabilities = regression.Transform(features_validation); // calculate the histogram of probabilities var histogram = new Histogram(); histogram.Compute(probabilities, 0.05); // draw the histogram var x = new List <double>(); var y = new List <double>(); for (int i = 0; i < histogram.Values.Length; i++) { var xcor = histogram.Bins[i].Range.Min; x.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select xcor); y.AddRange(from n in Enumerable.Range(0, histogram.Values[i]) select n * 1.0); } var plot = new Scatterplot("", "prediction", "count"); plot.Compute(x.ToArray(), y.ToArray()); ScatterplotBox.Show(plot); }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // read data Console.WriteLine("Loading data...."); var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, ".."); path = Path.Combine(path, ".."); path = Path.Combine(path, "handwritten_digits_medium.csv"); var digits = Frame.ReadCsv(path, separators: ",", hasHeaders: false); Console.WriteLine($" {digits.RowCount} rows loaded"); // normalize pixel values to 0..1 for (var i = 2; i <= 785; i++) { digits[$"Column{i}"] /= 255.0; } // grab a random digit var rnd = new Random(); var row = rnd.Next(1, digits.RowCount); var label = digits.Rows[row]["Column1"].ToString(); // plot the digit var x = Enumerable.Range(0, 784).Select(v => (double)(v % 28)); var y = Enumerable.Range(0, 784).Select(v => (double)(-v / 28)); var z = from i in Enumerable.Range(2, 784) let v = (double)digits.Rows[row][$"Column{i}"] select v > 0.5 ? 1 : 0; Scatterplot plot = new Scatterplot($"Digit {label}", "x", "y"); plot.Compute(x.ToArray(), y.ToArray(), z.ToArray()); ScatterplotBox.Show(plot); // create one-hot label columns for (var i = 0; i < 10; i++) { digits.AddColumn($"Label{i}", from v in digits["Column1"].Values select(int) v == i ? 1.0 : 0.0); } // print label columns digits.Columns[new string[] { "Column1", "Label0", "Label1", "Label2", "Label3", "Label4", "Label5", "Label6", "Label7", "Label8", "Label9" }].Print(); // create training and validation partitions var numRows = digits.RowKeys.Count(); var pivot = (int)(numRows * 0.8); var training = digits.Rows[Enumerable.Range(0, pivot)]; var validation = digits.Rows[Enumerable.Range(pivot, numRows - pivot)]; // set up feature and label column names var featureColumns = Enumerable.Range(2, 784).Select(v => $"Column{v}").ToArray(); var labelColumns = Enumerable.Range(0, 10).Select(v => $"Label{v}").ToArray(); // set up feature and label arrays var features = training.Columns[featureColumns].ToArray2D <double>().ToJagged(); var labels = training.Columns[labelColumns].ToArray2D <double>().ToJagged(); // build a neural network var network = new ActivationNetwork( new SigmoidFunction(), 784, 100, 100, 10); // randomize network weights new GaussianWeights(network, 0.1).Randomize(); // set up a backpropagation learner var learner = new BackPropagationLearning(network) { LearningRate = 0.05 }; // train the network and validate it in each epoch Console.WriteLine("Training neural network...."); var errors = new List <double>(); var validationErrors = new List <double>(); for (var epoch = 0; epoch < 50; epoch++) { var error = learner.RunEpoch(features, labels) / labels.GetLength(0); var validationError = Validate(validation, network); errors.Add(error); validationErrors.Add(validationError); Console.WriteLine($"Epoch: {epoch}, Training error: {error}, Validation error: {validationError}"); } // test the network on the validation data Console.WriteLine($"Validating neural network on {validation.RowCount} records...."); int mistakes = 0; foreach (var key in validation.RowKeys) { var record = validation.Rows[key]; var digit = (int)record.Values.First(); var input = record.Values.Skip(1).Take(784).Cast <double>(); var predictions = network.Compute(input.ToArray()); // Console.Write($" {digit}: {predictions.ToString("0.00")} "); // calculate best prediction var best = Enumerable.Range(0, 10) .Select(v => new { Digit = v, Prediction = predictions[v] }) .OrderByDescending(v => v.Prediction) .First(); //Console.Write($" -> {digit} = {best.Digit} ({100 * best.Prediction:0.00}%) "); // count incorrect predictions if (best.Digit != digit) { Console.Write($" {digit}: {predictions.ToString("0.00")} "); Console.WriteLine($" -> {digit} = {best.Digit} ({100 * best.Prediction:0.00}%) WRONG"); //Console.Write("WRONG"); mistakes++; } //Console.WriteLine(); } // report total mistakes var accuracy = 100.0 * (validation.Rows.KeyCount - mistakes) / validation.Rows.KeyCount; Console.WriteLine($"Total mistakes: {mistakes}, Accuracy: {accuracy:0.00}%"); // plot the training and validation curves var tmp = Enumerable.Range(1, 50).Select(v => (double)v); x = tmp.Concat(tmp); y = errors.Concat(validationErrors); z = Enumerable.Repeat(1, 50).Concat(Enumerable.Repeat(2, 50)); plot = new Scatterplot("Training & validation curves", "epochs", "training error"); plot.Compute(x.ToArray(), y.ToArray(), z.ToArray()); ScatterplotBox.Show(plot); Console.ReadLine(); }