Exemplo n.º 1
0
        private void ComputeInference()
        {
            var codebook = new Codification();

            codebook.Learn(tradeTable);

            DataTable symbols = codebook.Apply(tradeTable);

            string[]   inputNames = new[] { "Strike", "MarketPrice", "Notional" };
            double[][] inputs     = tradeTable.ToJagged(inputNames);
            int[]      outputs    = tradeTable.ToArray <int>("Result");


            var teacher = new C45Learning()
            {
                Attributes = DecisionVariable.FromCodebook(codebook, inputNames)
            };


            DecisionTree tree = teacher.Learn(inputs, outputs);

            int[]       predicted = tree.Decide(inputs);
            double      error     = new ZeroOneLoss(outputs).Loss(predicted);
            DecisionSet rules     = tree.ToRules();

            var str = rules.ToString();

            textBoxInferredRules.Text = str;
        }
        public TrainerHelper Train(System.Data.DataTable table, string columnName)
        {
            var container            = new TrainerHelper();
            var trainingCodification = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            trainingCodification.Learn(table);
            DataTable symbols = trainingCodification.Apply(table);

            container.columnNamesArray =
                table.Columns.Cast <DataColumn>().Select(x => x.ColumnName).Where(s => s != columnName).ToArray();

            var columnOrdinal = table.Columns[columnName].Ordinal;

            double[][] tempInputs = symbols.ToJagged(container.columnNamesArray);
            double[][] inputs     = new double[tempInputs.Length][];
            for (var i = 0; i < tempInputs.Length; i++)
            {
                var flattened = this.ExpandRow(trainingCodification, tempInputs[i], columnOrdinal);
                inputs[i] = flattened;
            }


            int[] outputs = symbols.ToArray <int>(columnName);

            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Set options for the component distributions
            teacher.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-5 // to avoid zero variances
            };

            if (inputs.Length > 0)
            {
                NaiveBayes <NormalDistribution> learner = teacher.Learn(inputs, outputs);
                container.trainer = learner;
            }

            //var lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 100, Tolerance = 1e-6 };
            //var mlr = lbnr.Learn(inputs, outputs);
            container.codification = trainingCodification;
            container.symbols      = symbols;
            return(container);
        }
        public void remapping_test_new_method()
        {
            // https://web.archive.org/web/20170210050820/http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm

            // Let's download an example dataset from the web to learn a multinomial logistic regression:
            CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true);

            // Let's read the CSV into a DataTable. As mentioned above, this step
            // can help, but is not necessarily required for learning a the model:
            DataTable table = reader.ToTable();

            // We will learn a MLR regression between the following input and output fields of this table:
            string[] inputNames  = new[] { "write", "ses" };
            string[] outputNames = new[] { "prog" };

            // Now let's create a codification codebook to convert the string fields in the data
            // into integer symbols. This is required because the MLR model can only learn from
            // numeric data, so strings have to be transformed first. We can force a particular
            // interpretation for those columns if needed, as shown in the initializer below:
            var codification = new Codification()
            {
                new Codification.Options("write", CodificationVariable.Continuous),
                new Codification.Options("ses", CodificationVariable.CategoricalWithBaseline, order: new[] { "low", "middle", "high" }),
                new Codification.Options("prog", CodificationVariable.Categorical, order: new[] { "academic", "general" })
            };

            // Learn the codification
            codification.Learn(table);

            // Now, transform symbols into a vector representation, growing the number of inputs:
            double[][] inputsData = codification.Transform(table, inputNames, out inputNames).ToDouble();
            double[][] outputData = codification.Transform(table, outputNames, out outputNames).ToDouble();

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(new double[] { 35, 0, 0 }, inputsData[0]);
            Assert.AreEqual(new double[] { 33, 1, 0 }, inputsData[1]);
            Assert.AreEqual(new double[] { 39, 0, 1 }, inputsData[2]);

            Assert.AreEqual(new double[] { 0, 0, 1 }, outputData[0]);
            Assert.AreEqual(new double[] { 0, 1, 0 }, outputData[1]);
            Assert.AreEqual(new double[] { 0, 0, 1 }, outputData[2]);
            Assert.AreEqual(new double[] { 1, 0, 0 }, outputData[11]);
        }
        public TrainerHelper Train(System.Data.DataTable table, string columnName)
        {
            var container            = new TrainerHelper();
            var trainingCodification = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            trainingCodification.Learn(table);
            DataTable symbols = trainingCodification.Apply(table);

            container.columnNamesArray =
                table.Columns.Cast <DataColumn>().Select(x => x.ColumnName).Where(s => s != columnName).ToArray();

            var columnOrdinal = table.Columns[columnName].Ordinal;

            int[][]    tempInputs = symbols.ToJagged <int>(container.columnNamesArray);
            double[][] inputs     = new double[tempInputs.Length][];
            for (var i = 0; i < tempInputs.Length; i++)
            {
                // var flattened = this.ExpandRow(trainingCodification, tempInputs[i], columnOrdinal);
                // inputs[i] = flattened;
            }


            int[] outputs = symbols.ToArray <int>(columnName);

            var id3learning = new ID3Learning();

            id3learning.Attributes = DecisionVariable.FromCodebook(trainingCodification);
            // Learn the training instances!
            DecisionTree tree = id3learning.Learn(tempInputs, outputs);

            container.decisionTree = tree;


            //var lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 100, Tolerance = 1e-6 };
            //var mlr = lbnr.Learn(inputs, outputs);
            container.codification = trainingCodification;
            container.symbols      = symbols;
            return(container);
        }
Exemplo n.º 5
0
        public void learn_test_mixed()
        {
            #region doc_learn_mixed
            Accord.Math.Random.Generator.Seed = 0;

            // Declare some mixed discrete and continuous observations
            double[][] observations =
            {
                //             (categorical) (discrete) (continuous)
                new double[] { 1, -1, -2.2 },
                new double[] { 1, -6, -5.5 },
                new double[] { 2,  1,  1.1 },
                new double[] { 2,  2,  1.2 },
                new double[] { 2,  2,  2.6 },
                new double[] { 3,  2,  1.4 },
                new double[] { 3,  4,  5.2 },
                new double[] { 1,  6,  5.1 },
                new double[] { 1,  6,  5.9 },
            };

            // Create a new codification algorithm to convert
            // the mixed variables above into all continuous:
            var codification = new Codification <double>()
            {
                CodificationVariable.Categorical,
                CodificationVariable.Discrete,
                CodificationVariable.Continuous
            };

            // Learn the codification from observations
            var model = codification.Learn(observations);

            // Transform the mixed observations into only continuous:
            double[][] newObservations = model.ToDouble().Transform(observations);

            // (newObservations will be equivalent to)
            double[][] expected =
            {
                //               (one hot)    (discrete)    (continuous)
                new double[] { 1, 0, 0, -1, -2.2 },
                new double[] { 1, 0, 0, -6, -5.5 },
                new double[] { 0, 1, 0,  1,  1.1 },
                new double[] { 0, 1, 0,  2,  1.2 },
                new double[] { 0, 1, 0,  2,  2.6 },
                new double[] { 0, 0, 1,  2,  1.4 },
                new double[] { 0, 0, 1,  4,  5.2 },
                new double[] { 1, 0, 0,  6,  5.1 },
                new double[] { 1, 0, 0,  6,  5.9 },
            };

            // Create a new K-Means algorithm
            KMeans kmeans = new KMeans(k: 3);

            // Compute and retrieve the data centroids
            var clusters = kmeans.Learn(observations);

            // Use the centroids to parition all the data
            int[] labels = clusters.Decide(observations);
            #endregion


            Assert.IsTrue(expected.IsEqual(newObservations, 1e-8));

            Assert.AreEqual(3, codification.NumberOfInputs);
            Assert.AreEqual(5, codification.NumberOfOutputs);
            Assert.AreEqual(3, codification.Columns.Count);
            Assert.AreEqual("0", codification.Columns[0].ColumnName);
            Assert.AreEqual(3, codification.Columns[0].NumberOfSymbols);
            Assert.AreEqual(1, codification.Columns[0].NumberOfInputs);
            Assert.AreEqual(1, codification.Columns[0].NumberOfOutputs);
            Assert.AreEqual(3, codification.Columns[0].NumberOfClasses);
            Assert.AreEqual(CodificationVariable.Categorical, codification.Columns[0].VariableType);
            Assert.AreEqual("1", codification.Columns[1].ColumnName);
            Assert.AreEqual(1, codification.Columns[1].NumberOfSymbols);
            Assert.AreEqual(1, codification.Columns[1].NumberOfInputs);
            Assert.AreEqual(1, codification.Columns[1].NumberOfOutputs);
            Assert.AreEqual(1, codification.Columns[1].NumberOfClasses);
            Assert.AreEqual(CodificationVariable.Discrete, codification.Columns[1].VariableType);
            Assert.AreEqual("2", codification.Columns[2].ColumnName);
            Assert.AreEqual(1, codification.Columns[2].NumberOfSymbols);
            Assert.AreEqual(1, codification.Columns[2].NumberOfInputs);
            Assert.AreEqual(1, codification.Columns[2].NumberOfOutputs);
            Assert.AreEqual(1, codification.Columns[2].NumberOfClasses);
            Assert.AreEqual(CodificationVariable.Continuous, codification.Columns[2].VariableType);

            Assert.AreEqual(labels[0], labels[2]);
            Assert.AreEqual(labels[0], labels[3]);
            Assert.AreEqual(labels[0], labels[4]);
            Assert.AreEqual(labels[0], labels[5]);

            Assert.AreEqual(labels[6], labels[7]);
            Assert.AreEqual(labels[6], labels[8]);

            Assert.AreNotEqual(labels[0], labels[1]);
            Assert.AreNotEqual(labels[0], labels[6]);

            int[] labels2 = kmeans.Clusters.Decide(observations);
            Assert.IsTrue(labels.IsEqual(labels2));

            var c = new KMeansClusterCollection.KMeansCluster[clusters.Count];
            int i = 0;
            foreach (var cluster in clusters)
            {
                c[i++] = cluster;
            }

            for (i = 0; i < c.Length; i++)
            {
                Assert.AreSame(c[i], clusters[i]);
            }
        }
        public void learn_test()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            #region doc_learn_1
            // This example downloads an example dataset from the web and learns a multinomial logistic
            // regression on it. However, please keep in mind that the Multinomial Logistic Regression
            // can also work without many of the elements that will be shown below, like the codebook,
            // DataTables, and a CsvReader.

            // Let's download an example dataset from the web to learn a multinomial logistic regression:
            CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true);

            // Let's read the CSV into a DataTable. As mentioned above, this step
            // can help, but is not necessarily required for learning a the model:
            DataTable table = reader.ToTable();

            // We will learn a MLR regression between the following input and output fields of this table:
            string[] inputNames  = new[] { "write", "ses" };
            string[] outputNames = new[] { "prog" };

            // Now let's create a codification codebook to convert the string fields in the data
            // into integer symbols. This is required because the MLR model can only learn from
            // numeric data, so strings have to be transformed first. We can force a particular
            // interpretation for those columns if needed, as shown in the initializer below:
            var codification = new Codification()
            {
                { "write", CodificationVariable.Continuous },
                { "ses", CodificationVariable.CategoricalWithBaseline, new[] { "low", "middle", "high" } },
                { "prog", CodificationVariable.Categorical, new[] { "academic", "general" } },
            };

            // Learn the codification
            codification.Learn(table);

            // Now, transform symbols into a vector representation, growing the number of inputs:
            double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble();
            double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble();

            // Create a new Multinomial Logistic Regression Analysis:
            var analysis = new MultinomialLogisticRegressionAnalysis()
            {
                InputNames  = inputNames,
                OutputNames = outputNames,
            };

            // Learn the regression from the input and output pairs:
            MultinomialLogisticRegression regression = analysis.Learn(x, y);

            // Let's retrieve some information about what we just learned:
            int coefficients    = analysis.Coefficients.Count; // should be 9
            int numberOfInputs  = analysis.NumberOfInputs;     // should be 3
            int numberOfOutputs = analysis.NumberOfOutputs;    // should be 3

            inputNames  = analysis.InputNames;                 // should be "write", "ses: middle", "ses: high"
            outputNames = analysis.OutputNames;                // should be "prog: academic", "prog: general", "prog: vocation"

            // The regression is best visualized when it is data-bound to a
            // Windows.Forms DataGridView or WPF DataGrid. You can get the
            // values for all different coefficients and discrete values:

            // DataGridBox.Show(regression.Coefficients); // uncomment this line

            // You can get the matrix of coefficients:
            double[][] coef = analysis.CoefficientValues;

            // Should be equal to:
            double[][] expectedCoef = new double[][]
            {
                new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 },
                new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 }
            };

            // And their associated standard errors:
            double[][] stdErr = analysis.StandardErrors;

            // Should be equal to:
            double[][] expectedErr = new double[][]
            {
                new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 },
                new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 }
            };

            // We can also get statistics and hypothesis tests:
            WaldTest[][]  wald          = analysis.WaldTests;     // should all have p < 0.05
            ChiSquareTest chiSquare     = analysis.ChiSquare;     // should be p=1.06300120956871E-08
            double        logLikelihood = analysis.LogLikelihood; // should be -179.98173272217591

            // You can use the regression to predict the values:
            int[] pred = regression.Transform(x);

            // And get the accuracy of the prediction if needed:
            var cm = GeneralConfusionMatrix.Estimate(regression, x, y.ArgMax(dimension: 1));

            double acc   = cm.Accuracy; // should be 0.61
            double kappa = cm.Kappa;    // should be 0.2993487536492252
            #endregion


            Assert.AreEqual(9, coefficients);
            Assert.AreEqual(3, numberOfInputs);
            Assert.AreEqual(3, numberOfOutputs);

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(0.61, acc, 1e-10);
            Assert.AreEqual(0.2993487536492252, kappa, 1e-10);
            Assert.AreEqual(1.06300120956871E-08, chiSquare.PValue, 1e-8);
            Assert.AreEqual(-179.98172637136295, logLikelihood, 1e-8);

            testmlr(analysis);
        }
Exemplo n.º 7
0
        public Learn()
        {
            try
            {
                //http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_Learning_C45Learning.htm
                using (var db = new DatabaseEntities())
                {
                    var allItems = db.Records.ToList();

                    DataTable data = new DataTable("e-Tracker Values");

                    data.Columns.Add("Id", typeof(int));
                    data.Columns.Add("Age", typeof(string));
                    data.Columns.Add("L1", typeof(string));
                    data.Columns.Add("Word", typeof(string));
                    data.Columns.Add("Synonym", typeof(string));

                    allItems.ForEach(r =>
                    {
                        r.DetailRecords.ToList().ForEach(dr =>
                        {
                            data.Rows.Add(dr.Id, r.Age, r.L1, dr.UnknownWord, dr.SelectedSynonism);
                        });
                    });

                    // Create a new codification codebook to convert
                    // the strings above into numeric, integer labels:
                    CodeBook = new Codification()
                    {
                        DefaultMissingValueReplacement = Double.NaN
                    };

                    // Learn the codebook
                    CodeBook.Learn(data);

                    // Use the codebook to convert all the data
                    DataTable symbols = CodeBook.Apply(data);

                    // Grab the training input and output instances:
                    int[][] inputs  = symbols.ToJagged <int>(InputNames);
                    int[]   outputs = symbols.ToArray <int>("Synonym");

                    // Create a new learning algorithm
                    var teacher = new C45Learning()
                    {
                        Attributes = DecisionVariable.FromCodebook(CodeBook, InputNames),
                    };

                    // Use the learning algorithm to induce a new tree:
                    Tree = teacher.Learn(inputs, outputs);

                    // To get the estimated class labels, we can use
                    int[] predicted = Tree.Decide(inputs);

                    // The classification error (~0.214) can be computed as
                    double error = new ZeroOneLoss(outputs).Loss(predicted);

                    // Moreover, we may decide to convert our tree to a set of rules:
                    DecisionSet rules = Tree.ToRules();

                    // And using the codebook, we can inspect the tree reasoning:
                    string ruleText = rules.ToString(CodeBook, "Synonym",
                                                     System.Globalization.CultureInfo.InvariantCulture);

                    Rules = ruleText;

                    Code = Tree.ToCode("Rules");
                }
            }
            catch (Exception e)
            {
                MessageBox.Show(e.Message);
            }
        }
Exemplo n.º 8
0
        public JsonResult PredictPossibleProducts()
        {
            var userId       = 0;
            int knnNum       = 5;
            int clusterNum   = 4;
            var userIdString = "";

            if (HttpContext.Session["userid"] == null)
            {
                return(Json(new { errorCode = 1, errorMessage = "יוזר לא חוקי" }));
            }

            userIdString = HttpContext.Session["userid"].ToString();
            var didParsed = Int32.TryParse(userIdString, out userId);

            if (!didParsed)
            {
                return(Json(new { errorCode = 1, errorMessage = "יוזר לא חוקי" }));
            }

            var userGender = _context.Users
                             .Where(x => x.Id == userId)
                             .Select(x => x.Gender)
                             .SingleOrDefault();

            var trainData = _context.Purchases
                            .OrderBy(x => x.UserId)
                            .Where(x => x.Product != null)
                            .Select(x => new
            {
                userId     = x.UserId.Value,
                size       = x.Product.Size,
                type       = x.Product.ProductTypeId,
                gender     = x.Product.ProductType.Gender,
                genderUser = x.User.Gender
            })
                            .ToList();

            if (trainData.Count < knnNum || trainData.Count < clusterNum)
            {
                return(Json(new { errorCode = 2, errorMessage = "אין מספיק מידע" }));
            }
            var inputs = trainData.Select(x =>
            {
                double[] res = new double[]
                {
                    Convert.ToInt32(x.gender),
                    Convert.ToInt32(x.genderUser),
                    x.type.Value,
                    x.size
                };

                return(res);
            })
                         .ToArray();

            var codification = new Codification <double>()
            {
                CodificationVariable.Categorical,
                CodificationVariable.Categorical,
                CodificationVariable.Categorical,
                CodificationVariable.Discrete
            };

            // Learn the codification from observations
            var model = codification.Learn(inputs);

            // Transform the mixed observations into only continuous:
            double[][] newInputs = model.ToDouble().Transform(inputs);

            KMedoids kmeans   = new KMedoids(k: clusterNum);
            var      clusters = kmeans.Learn(newInputs);

            int[] labels = clusters.Decide(newInputs);

            var knn5 = new KNearestNeighbors(k: knnNum);

            knn5.Learn(newInputs, labels);

            var purchasesById = _context.Purchases
                                .Where(x => x.Product != null)
                                .Select(x => new
            {
                userId     = x.UserId.Value,
                size       = x.Product.Size,
                type       = x.Product.ProductTypeId,
                gender     = x.Product.ProductType.Gender,
                genderUser = x.User.Gender
            })
                                .GroupBy(x => x.userId)
                                .ToList();

            IList <Tuple <int, int[]> > labelsForUsers = new List <Tuple <int, int[]> >();

            for (int i = 0; i < purchasesById.Count; i++)
            {
                var userInputs = purchasesById[i].
                                 Select(x =>
                {
                    double[] res = new double[]
                    {
                        Convert.ToInt32(x.gender),
                        Convert.ToInt32(x.genderUser),
                        x.type.Value,
                        x.size
                    };

                    return(res);
                })
                                 .ToArray();

                double[][] newUserInputs = model.ToDouble().Transform(userInputs);
                labelsForUsers.Add(new Tuple <int, int[]>(purchasesById[i].Key, clusters.Decide(newUserInputs).Distinct().ToArray()));
            }

            var productIdsUserBought = _context.Purchases
                                       .Where(x => x.UserId == userId)
                                       .Select(x => x.ProductId)
                                       .Distinct()
                                       .ToList();

            var validProductTypeIds = _context.Purchases
                                      .Where(x => x.UserId == userId)
                                      .Select(x => x.Product.ProductTypeId)
                                      .Distinct()
                                      .ToList();

            var productsToPredict = _context.Products
                                    .Where(x => !productIdsUserBought.Contains(x.Id))
                                    .Where(x => validProductTypeIds.Contains(x.ProductTypeId))
                                    .Select(x => new
            {
                id         = x.Id,
                size       = x.Size,
                type       = x.ProductTypeId,
                gender     = x.ProductType.Gender,
                genderUser = userGender
            })
                                    .ToList();

            var predInputs = productsToPredict.Select(x =>
            {
                double[] res = new double[]
                {
                    Convert.ToInt32(x.gender),
                    Convert.ToInt32(x.genderUser),
                    x.type.Value,
                    x.size
                };

                return(res);
            })
                             .ToArray();

            double[][] newPredInputs = model.ToDouble().Transform(predInputs);

            int[] newLabels = knn5.Decide(newPredInputs);

            IList <int> productIdsPrediction = new List <int>();
            var         userLabels           = labelsForUsers.Where(x => x.Item1 == userId).FirstOrDefault() != null?
                                               labelsForUsers.Where(x => x.Item1 == userId).FirstOrDefault().Item2 : new int[0];

            for (int i = 0; i < newLabels.Length; i++)
            {
                if (userLabels.Contains(newLabels[i]))
                {
                    productIdsPrediction.Add(productsToPredict[i].id);
                }
            }

            var predictedProduct = _context.Products
                                   .Where(x => productIdsPrediction.Contains(x.Id))
                                   .Select(x => new
            {
                Id          = x.Id,
                Name        = x.Name,
                Price       = x.Price,
                Size        = x.Size,
                PictureName = x.PictureName
            })
                                   .ToList();

            return(Json(new { products = predictedProduct }, JsonRequestBehavior.AllowGet));
        }
Exemplo n.º 9
0
        public void learn_test_2()
        {
            #region doc_learn_2
            // Let's say we would like predict a continuous number from a set
            // of discrete and continuous input variables. For this, we will
            // be using the Servo dataset from UCI's Machine Learning repository
            // as an example: http://archive.ics.uci.edu/ml/datasets/Servo

            // Create a Servo dataset
            Servo      servo     = new Servo();
            object[][] instances = servo.Instances; // 167 x 4
            double[]   outputs   = servo.Output;    // 167 x 1

            // This dataset contains 4 columns, where the first two are
            // symbolic (having possible values A, B, C, D, E), and the
            // last two are continuous.

            // We will use a codification filter to transform the symbolic
            // variables into one-hot vectors, while keeping the other two
            // continuous variables intact:
            var codebook = new Codification <object>()
            {
                { "motor", CodificationVariable.Categorical },
                { "screw", CodificationVariable.Categorical },
                { "pgain", CodificationVariable.Continuous },
                { "vgain", CodificationVariable.Continuous },
            };

            // Learn the codebook
            codebook.Learn(instances);

            // We can gather some info about the problem:
            int numberOfInputs  = codebook.NumberOfInputs;  // should be 4 (since there are 4 variables)
            int numberOfOutputs = codebook.NumberOfOutputs; // should be 12 (due their one-hot encodings)

            // Now we can use it to obtain double[] vectors:
            double[][] inputs = codebook.ToDouble().Transform(instances);

            // We will use Ordinary Least Squares to create a
            // linear regression model with an intercept term
            var ols = new OrdinaryLeastSquares()
            {
                UseIntercept = true
            };

            // Use Ordinary Least Squares to estimate a regression model:
            MultipleLinearRegression regression = ols.Learn(inputs, outputs);

            // We can compute the predicted points using:
            double[] predicted = regression.Transform(inputs);

            // And the squared error using the SquareLoss class:
            double error = new SquareLoss(outputs).Loss(predicted);

            // We can also compute other measures, such as the coefficient of determination r² using:
            double r2 = new RSquaredLoss(numberOfOutputs, outputs).Loss(predicted); // should be 0.55086630162967354

            // Or the adjusted or weighted versions of r² using:
            var r2loss = new RSquaredLoss(numberOfOutputs, outputs)
            {
                Adjust = true,
                // Weights = weights; // (uncomment if you have a weighted problem)
            };

            double ar2 = r2loss.Loss(predicted); // should be 0.51586887058782993

            // Alternatively, we can also use the less generic, but maybe more user-friendly method directly:
            double ur2 = regression.CoefficientOfDetermination(inputs, outputs, adjust: true); // should be 0.51586887058782993
            #endregion

            Assert.AreEqual(4, numberOfInputs);
            Assert.AreEqual(12, numberOfOutputs);
            Assert.AreEqual(12, regression.NumberOfInputs);
            Assert.AreEqual(1, regression.NumberOfOutputs);

            Assert.AreEqual(1.0859586717266123, error, 1e-6);

            double[] expected = regression.Compute(inputs);
            double[] actual   = regression.Transform(inputs);
            Assert.IsTrue(expected.IsEqual(actual, 1e-10));

            Assert.AreEqual(0.55086630162967354, r2);
            Assert.AreEqual(0.51586887058782993, ar2);
            Assert.AreEqual(0.51586887058782993, ur2);
        }
        public void gh_937()
        {
            #region doc_learn_database
            // Note: this example uses a System.Data.DataTable to represent input data,
            // but note that this is not required. The data could have been represented
            // as jagged double matrices (double[][]) directly.

            // If you have to handle heterogeneus data in your application, such as user records
            // in a database, this data is best represented within the framework using a .NET's
            // DataTable object. In order to try to learn a classification or regression model
            // using this datatable, first we will need to convert the table into a representation
            // that the machine learning model can understand. Such representation is quite often,
            // a matrix of doubles (double[][]).
            var data = new DataTable("Customer Revenue Example");

            data.Columns.Add("Day", "CustomerId", "Time (hour)", "Weather", "Buy");
            data.Rows.Add("D1", 0, 8, "Sunny", true);
            data.Rows.Add("D2", 1, 10, "Sunny", true);
            data.Rows.Add("D3", 2, 10, "Rain", false);
            data.Rows.Add("D4", 3, 16, "Rain", true);
            data.Rows.Add("D5", 4, 15, "Rain", true);
            data.Rows.Add("D6", 5, 20, "Rain", false);
            data.Rows.Add("D7", 6, 12, "Cloudy", true);
            data.Rows.Add("D8", 7, 12, "Sunny", false);

            // One way to perform this conversion is by using a Codification filter. The Codification
            // filter can take care of converting variables that actually denote symbols (i.e. the
            // weather in the example above) into representations that make more sense given the assumption
            // of a real vector-based classifier.

            // Create a codification codebook
            var codebook = new Codification()
            {
                { "Weather", CodificationVariable.Categorical },
                { "Time (hour)", CodificationVariable.Continuous },
                { "Revenue", CodificationVariable.Continuous },
            };

            // Learn from the data
            codebook.Learn(data);

            // Now, we will use the codebook to transform the DataTable into double[][] vectors. Due
            // the way the conversion works, we can end up with more columns in your output vectors
            // than the ones started with. If you would like more details about what those columns
            // represent, you can pass then as 'out' parameters in the methods that follow below.
            string[] inputNames;  // (note: if you do not want to run this example yourself, you
            string   outputName;  // can see below the new variable names that will be generated)

            // Now, we can translate our training data into integer symbols using our codebook:
            double[][] inputs  = codebook.Apply(data, "Weather", "Time (hour)").ToJagged(out inputNames);
            double[]   outputs = codebook.Apply(data, "Buy").ToVector(out outputName);
            // (note: the Apply method transform a DataTable into another DataTable containing the codified
            //  variables. The ToJagged and ToVector methods are then used to transform those tables into
            //  double[][] matrices and double[] vectors, respectively.

            // If we would like to learn a logistic regression model for this data, there are two possible
            // ways depending on which aspect of the logistic regression we are interested the most. If we
            // are interested in interpreting the logistic regression, performing hypothesis tests with the
            // coefficients and performing an actual _logistic regression analysis_, then we can use the
            // LogisticRegressionAnalysis class for this. If however we are only interested in using
            // the learned model directly to predict new values for the dataset, then we could be using the
            // LogisticRegression and IterativeReweightedLeastSquares classes directly instead.

            // This example deals with the former case. For the later, please see the documentation page
            // for the LogisticRegression class.

            // We can create a new multiple linear analysis for the variables
            var lra = new LogisticRegressionAnalysis()
            {
                // We can also inform the names of the new variables that have been created by the
                // codification filter. Those can help in the visualizing the analysis once it is
                // data-bound to a visual control such a Windows.Forms.DataGridView or WPF DataGrid:

                Inputs = inputNames, // will be { "Weather: Sunny", "Weather: Rain, "Weather: Cloudy", "Time (hours)" }
                Output = outputName  // will be "Revenue"
            };

            // Compute the analysis and obtain the estimated regression
            LogisticRegression regression = lra.Learn(inputs, outputs);

            // And then predict the label using
            double predicted = lra.Transform(inputs[0]); // result will be ~0.287

            // Because we opted for doing a MultipleLinearRegressionAnalysis instead of a simple
            // linear regression, we will have further information about the regression available:
            int           inputCount        = lra.NumberOfInputs;  // should be 4
            int           outputCount       = lra.NumberOfOutputs; // should be 1
            double        logl              = lra.LogLikelihood;   // should be -4.6035570737785525
            ChiSquareTest x2                = lra.ChiSquare;       // should be 1.37789 (p=0.8480, non-significant)
            double[]      stdErr            = lra.StandardErrors;  // should be high except for the last value of 0.27122079214927985 (due small data)
            double[]      or                = lra.OddsRatios;      // should be 1.1116659950687609 for the last coefficient (related to time of day)
            LogisticCoefficientCollection c = lra.Coefficients;    // coefficient table (bind to a visual control for quick inspection)
            double[][] h = lra.InformationMatrix;                  // should contain Fisher's information matrix for the problem
            #endregion

            Assert.AreEqual(0.28703150858677107, predicted, 1e-8);
            Assert.AreEqual(4, inputCount, 1e-8);
            Assert.AreEqual(1, outputCount, 1e-8);
            Assert.AreEqual(-4.6035570737785525, logl, 1e-8);
            Assert.IsTrue(new[] { 0.0019604927838235376, 88.043929817973222, 101.42211648160144, 2.1954970044905113E-07, 1.1116659950687609 }.IsEqual(or, 1e-4));

            Assert.AreEqual(1.377897662970609, x2.Statistic, 1e-8);
            Assert.AreEqual(0.84802726696077046, x2.PValue, 1e-8);
        }
        public void gh_937()
        {
            #region doc_learn_database
            // Note: this example uses a System.Data.DataTable to represent input data,
            // but note that this is not required. The data could have been represented
            // as jagged double matrices (double[][]) directly.

            // If you have to handle heterogeneus data in your application, such as user records
            // in a database, this data is best represented within the framework using a .NET's
            // DataTable object. In order to try to learn a classification or regression model
            // using this datatable, first we will need to convert the table into a representation
            // that the machine learning model can understand. Such representation is quite often,
            // a matrix of doubles (double[][]).
            var data = new DataTable("Customer Revenue Example");

            data.Columns.Add("Day", "CustomerId", "Time (hour)", "Weather", "Revenue");
            data.Rows.Add("D1", 0, 8, "Sunny", 101.2);
            data.Rows.Add("D2", 1, 10, "Sunny", 24.1);
            data.Rows.Add("D3", 2, 10, "Rain", 107);
            data.Rows.Add("D4", 3, 16, "Rain", 223);
            data.Rows.Add("D5", 4, 15, "Rain", 1);
            data.Rows.Add("D6", 5, 20, "Rain", 42);
            data.Rows.Add("D7", 6, 12, "Cloudy", 123);
            data.Rows.Add("D8", 7, 12, "Sunny", 64);

            // One way to perform this conversion is by using a Codification filter. The Codification
            // filter can take care of converting variables that actually denote symbols (i.e. the
            // weather in the example above) into representations that make more sense given the assumption
            // of a real vector-based classifier.

            // Create a codification codebook
            var codebook = new Codification()
            {
                { "Weather", CodificationVariable.Categorical },
                { "Time (hour)", CodificationVariable.Continuous },
                { "Revenue", CodificationVariable.Continuous },
            };

            // Learn from the data
            codebook.Learn(data);

            // Now, we will use the codebook to transform the DataTable into double[][] vectors. Due
            // the way the conversion works, we can end up with more columns in your output vectors
            // than the ones started with. If you would like more details about what those columns
            // represent, you can pass then as 'out' parameters in the methods that follow below.
            string[] inputNames;  // (note: if you do not want to run this example yourself, you
            string   outputName;  // can see below the new variable names that will be generated)

            // Now, we can translate our training data into integer symbols using our codebook:
            double[][] inputs  = codebook.Apply(data, "Weather", "Time (hour)").ToJagged(out inputNames);
            double[]   outputs = codebook.Apply(data, "Revenue").ToVector(out outputName);
            // (note: the Apply method transform a DataTable into another DataTable containing the codified
            //  variables. The ToJagged and ToVector methods are then used to transform those tables into
            //  double[][] matrices and double[] vectors, respectively.

            // If we would like to learn a linear regression model for this data, there are two possible
            // ways depending on which aspect of the linear regression we are interested the most. If we
            // are interested in interpreting the linear regression, performing hypothesis tests with the
            // coefficients and performing an actual _linear regression analysis_, then we can use the
            // MultipleLinearRegressionAnalysis class for this. If however we are only interested in using
            // the learned model directly to predict new values for the dataset, then we could be using the
            // MultipleLinearRegression and OrdinaryLeastSquares classes directly instead.

            // This example deals with the former case. For the later, please see the documentation page
            // for the MultipleLinearRegression class.

            // We can create a new multiple linear analysis for the variables
            var mlra = new MultipleLinearRegressionAnalysis(intercept: true)
            {
                // We can also inform the names of the new variables that have been created by the
                // codification filter. Those can help in the visualizing the analysis once it is
                // data-bound to a visual control such a Windows.Forms.DataGridView or WPF DataGrid:

                Inputs = inputNames, // will be { "Weather: Sunny", "Weather: Rain, "Weather: Cloudy", "Time (hours)" }
                Output = outputName  // will be "Revenue"
            };

            // To overcome linear dependency errors
            mlra.OrdinaryLeastSquares.IsRobust = true;

            // Compute the analysis and obtain the estimated regression
            MultipleLinearRegression regression = mlra.Learn(inputs, outputs);

            // And then predict the label using
            double predicted = mlra.Transform(inputs[0]); // result will be ~72.3

            // Because we opted for doing a MultipleLinearRegressionAnalysis instead of a simple
            // linear regression, we will have further information about the regression available:
            int    inputCount       = mlra.NumberOfInputs;    // should be 4
            int    outputCount      = mlra.NumberOfOutputs;   // should be 1
            double r2               = mlra.RSquared;          // should be 0.12801838425195311
            AnovaSourceCollection a = mlra.Table;             // ANOVA table (bind to a visual control for quick inspection)
            double[][]            h = mlra.InformationMatrix; // should contain Fisher's information matrix for the problem
            ZTest z = mlra.ZTest;                             // should be 0 (p=0.999, non-significant)
            #endregion

            Assert.AreEqual(72.279574468085144d, predicted, 1e-8);
            Assert.AreEqual(4, inputCount, 1e-8);
            Assert.AreEqual(1, outputCount, 1e-8);
            Assert.AreEqual(0.12801838425195311, r2, 1e-8);
            Assert.AreEqual(0.11010987669344097, a[0].Statistic, 1e-8);

            string     str       = h.ToCSharp();
            double[][] expectedH = new double[][]
            {
                new double[] { 0.442293243337911, -0.069833718526197, -0.228692384542512, -0.0141758263063635, 0.143767140269202 },
                new double[] { -0.0698337185261971, 0.717811616891116, -0.112258662892007, -0.0655549422852099, 0.535719235472913 },
                new double[] { -0.228692384542512, -0.112258662892007, 0.717434922237013, -0.0232803210243207, 0.376483874802496 },
                new double[] { -0.0141758263063635, -0.0655549422852099, -0.0232803210243207, 0.0370082984668314, -0.103011089615894 },
                new double[] { 0.143767140269202, 0.535719235472913, 0.376483874802496, -0.103011089615894, 1.05597025054461 }
            };

            Assert.IsTrue(expectedH.IsEqual(h, 1e-8));
            Assert.AreEqual(0, z.Statistic, 1e-8);
            Assert.AreEqual(1, z.PValue, 1e-8);
        }
Exemplo n.º 12
0
        public void missing_values_test()
        {
            #region doc_missing
            // In this example, we will be using a modified version of the famous Play Tennis
            // example by Tom Mitchell (1998), where some values have been replaced by missing
            // values. We will use NaN double values to represent values missing from the data.

            // Note: this example uses DataTables to represent the input data,
            // but this is not required. The same could be performed using plain
            // double[][] matrices and vectors instead.
            DataTable data = new DataTable("Tennis Example with Missing Values");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(string));
            data.Columns.Add("Humidity", typeof(string));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", null, "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", null, null, "High", null, "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", null, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", null, "Mild", "High", null, "No");
            data.Rows.Add("D9", null, "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", null, null, "Normal", null, "Yes");
            data.Rows.Add("D11", null, "Mild", "Normal", null, "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", null, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", null, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to convert
            // the strings above into numeric, integer labels:
            var codebook = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            // Learn the codebook
            codebook.Learn(data);

            // Use the codebook to convert all the data
            DataTable symbols = codebook.Apply(data);

            // Grab the training input and output instances:
            string[]   inputNames = new[] { "Outlook", "Temperature", "Humidity", "Wind" };
            double[][] inputs     = symbols.ToJagged(inputNames);
            int[]      outputs    = symbols.ToArray <int>("PlayTennis");

            // Create a new learning algorithm
            var teacher = new C45Learning()
            {
                Attributes = DecisionVariable.FromCodebook(codebook, inputNames)
            };

            // Use the learning algorithm to induce a new tree:
            DecisionTree tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // The classification error (~0.214) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "PlayTennis",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output should be:
            string expected = @"No =: (Outlook == Sunny)
No =: (Outlook == Rain) && (Wind == Strong)
Yes =: (Outlook == Overcast)
Yes =: (Outlook == Rain) && (Wind == Weak)
";
            #endregion

            expected = expected.Replace("\r\n", Environment.NewLine);
            Assert.AreEqual(expected, ruleText);

            Assert.AreEqual(14, codebook["Day"].NumberOfSymbols);
            Assert.AreEqual(3, codebook["Outlook"].NumberOfSymbols);
            Assert.AreEqual(3, codebook["Temperature"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["Humidity"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["Wind"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["PlayTennis"].NumberOfSymbols);

            foreach (var col in codebook)
            {
                Assert.AreEqual(Double.NaN, col.MissingValueReplacement);
                Assert.AreEqual(CodificationVariable.Ordinal, col.VariableType);
            }

            Assert.AreEqual(0.21428571428571427, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(2, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.21428571428571427, newError, 1e-10);
        }