C# (CSharp) Codification.Learn примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: Codification

Метод/Функция: Learn

Примеров на hotexamples.com: 12

C# (CSharp) Codification.Learn - 12 примеров найдено. Это лучшие примеры C# (CSharp) кода для Codification.Learn, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Revert(30)

Apply(30)

Translate(30)

Transform(23)

Learn(12)

EncodePasswordToBase64(5)

AddRange(1)

Detect(1)

ParallelTransform(1)

Save(1)

ToArray(1)

ToDouble(1)

Пример #1

Показать файл

Файл: Form1.cs Проект: arunIITB/InferenceRule

        private void ComputeInference()
        {
            var codebook = new Codification();

            codebook.Learn(tradeTable);

            DataTable symbols = codebook.Apply(tradeTable);

            string[]   inputNames = new[] { "Strike", "MarketPrice", "Notional" };
            double[][] inputs     = tradeTable.ToJagged(inputNames);
            int[]      outputs    = tradeTable.ToArray <int>("Result");


            var teacher = new C45Learning()
            {
                Attributes = DecisionVariable.FromCodebook(codebook, inputNames)
            };


            DecisionTree tree = teacher.Learn(inputs, outputs);

            int[]       predicted = tree.Decide(inputs);
            double      error     = new ZeroOneLoss(outputs).Loss(predicted);
            DecisionSet rules     = tree.ToRules();

            var str = rules.ToString();

            textBoxInferredRules.Text = str;
        }

Пример #2

Показать файл

Файл: MultinomialLogisticTrainer.cs Проект: ericintapp/hackathon-17-rotai

        public TrainerHelper Train(System.Data.DataTable table, string columnName)
        {
            var container            = new TrainerHelper();
            var trainingCodification = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            trainingCodification.Learn(table);
            DataTable symbols = trainingCodification.Apply(table);

            container.columnNamesArray =
                table.Columns.Cast <DataColumn>().Select(x => x.ColumnName).Where(s => s != columnName).ToArray();

            var columnOrdinal = table.Columns[columnName].Ordinal;

            double[][] tempInputs = symbols.ToJagged(container.columnNamesArray);
            double[][] inputs     = new double[tempInputs.Length][];
            for (var i = 0; i < tempInputs.Length; i++)
            {
                var flattened = this.ExpandRow(trainingCodification, tempInputs[i], columnOrdinal);
                inputs[i] = flattened;
            }


            int[] outputs = symbols.ToArray <int>(columnName);

            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Set options for the component distributions
            teacher.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-5 // to avoid zero variances
            };

            if (inputs.Length > 0)
            {
                NaiveBayes <NormalDistribution> learner = teacher.Learn(inputs, outputs);
                container.trainer = learner;
            }

            //var lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 100, Tolerance = 1e-6 };
            //var mlr = lbnr.Learn(inputs, outputs);
            container.codification = trainingCodification;
            container.symbols      = symbols;
            return(container);
        }

Пример #3

Показать файл

Файл: CodificationFilterTest.cs Проект: jthornca/accord-framework

        public void remapping_test_new_method()
        {
            // https://web.archive.org/web/20170210050820/http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm

            // Let's download an example dataset from the web to learn a multinomial logistic regression:
            CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true);

            // Let's read the CSV into a DataTable. As mentioned above, this step
            // can help, but is not necessarily required for learning a the model:
            DataTable table = reader.ToTable();

            // We will learn a MLR regression between the following input and output fields of this table:
            string[] inputNames  = new[] { "write", "ses" };
            string[] outputNames = new[] { "prog" };

            // Now let's create a codification codebook to convert the string fields in the data
            // into integer symbols. This is required because the MLR model can only learn from
            // numeric data, so strings have to be transformed first. We can force a particular
            // interpretation for those columns if needed, as shown in the initializer below:
            var codification = new Codification()
            {
                new Codification.Options("write", CodificationVariable.Continuous),
                new Codification.Options("ses", CodificationVariable.CategoricalWithBaseline, order: new[] { "low", "middle", "high" }),
                new Codification.Options("prog", CodificationVariable.Categorical, order: new[] { "academic", "general" })
            };

            // Learn the codification
            codification.Learn(table);

            // Now, transform symbols into a vector representation, growing the number of inputs:
            double[][] inputsData = codification.Transform(table, inputNames, out inputNames).ToDouble();
            double[][] outputData = codification.Transform(table, outputNames, out outputNames).ToDouble();

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(new double[] { 35, 0, 0 }, inputsData[0]);
            Assert.AreEqual(new double[] { 33, 1, 0 }, inputsData[1]);
            Assert.AreEqual(new double[] { 39, 0, 1 }, inputsData[2]);

            Assert.AreEqual(new double[] { 0, 0, 1 }, outputData[0]);
            Assert.AreEqual(new double[] { 0, 1, 0 }, outputData[1]);
            Assert.AreEqual(new double[] { 0, 0, 1 }, outputData[2]);
            Assert.AreEqual(new double[] { 1, 0, 0 }, outputData[11]);
        }

Пример #4

Показать файл

Файл: DecisionTreeLogisticTrainer.cs Проект: ericintapp/hackathon-17-rotai

        public TrainerHelper Train(System.Data.DataTable table, string columnName)
        {
            var container            = new TrainerHelper();
            var trainingCodification = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            trainingCodification.Learn(table);
            DataTable symbols = trainingCodification.Apply(table);

            container.columnNamesArray =
                table.Columns.Cast <DataColumn>().Select(x => x.ColumnName).Where(s => s != columnName).ToArray();

            var columnOrdinal = table.Columns[columnName].Ordinal;

            int[][]    tempInputs = symbols.ToJagged <int>(container.columnNamesArray);
            double[][] inputs     = new double[tempInputs.Length][];
            for (var i = 0; i < tempInputs.Length; i++)
            {
                // var flattened = this.ExpandRow(trainingCodification, tempInputs[i], columnOrdinal);
                // inputs[i] = flattened;
            }


            int[] outputs = symbols.ToArray <int>(columnName);

            var id3learning = new ID3Learning();

            id3learning.Attributes = DecisionVariable.FromCodebook(trainingCodification);
            // Learn the training instances!
            DecisionTree tree = id3learning.Learn(tempInputs, outputs);

            container.decisionTree = tree;


            //var lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 100, Tolerance = 1e-6 };
            //var mlr = lbnr.Learn(inputs, outputs);
            container.codification = trainingCodification;
            container.symbols      = symbols;
            return(container);
        }

Пример #5

Показать файл

Файл: KMeansTest.cs Проект: xiubjarne/framework

        public void learn_test_mixed()
        {
            #region doc_learn_mixed
            Accord.Math.Random.Generator.Seed = 0;

            // Declare some mixed discrete and continuous observations
            double[][] observations =
            {
                //             (categorical) (discrete) (continuous)
                new double[] { 1, -1, -2.2 },
                new double[] { 1, -6, -5.5 },
                new double[] { 2,  1,  1.1 },
                new double[] { 2,  2,  1.2 },
                new double[] { 2,  2,  2.6 },
                new double[] { 3,  2,  1.4 },
                new double[] { 3,  4,  5.2 },
                new double[] { 1,  6,  5.1 },
                new double[] { 1,  6,  5.9 },
            };

            // Create a new codification algorithm to convert
            // the mixed variables above into all continuous:
            var codification = new Codification <double>()
            {
                CodificationVariable.Categorical,
                CodificationVariable.Discrete,
                CodificationVariable.Continuous
            };

            // Learn the codification from observations
            var model = codification.Learn(observations);

            // Transform the mixed observations into only continuous:
            double[][] newObservations = model.ToDouble().Transform(observations);

            // (newObservations will be equivalent to)
            double[][] expected =
            {
                //               (one hot)    (discrete)    (continuous)
                new double[] { 1, 0, 0, -1, -2.2 },
                new double[] { 1, 0, 0, -6, -5.5 },
                new double[] { 0, 1, 0,  1,  1.1 },
                new double[] { 0, 1, 0,  2,  1.2 },
                new double[] { 0, 1, 0,  2,  2.6 },
                new double[] { 0, 0, 1,  2,  1.4 },
                new double[] { 0, 0, 1,  4,  5.2 },
                new double[] { 1, 0, 0,  6,  5.1 },
                new double[] { 1, 0, 0,  6,  5.9 },
            };

            // Create a new K-Means algorithm
            KMeans kmeans = new KMeans(k: 3);

            // Compute and retrieve the data centroids
            var clusters = kmeans.Learn(observations);

            // Use the centroids to parition all the data
            int[] labels = clusters.Decide(observations);
            #endregion


            Assert.IsTrue(expected.IsEqual(newObservations, 1e-8));

            Assert.AreEqual(3, codification.NumberOfInputs);
            Assert.AreEqual(5, codification.NumberOfOutputs);
            Assert.AreEqual(3, codification.Columns.Count);
            Assert.AreEqual("0", codification.Columns[0].ColumnName);
            Assert.AreEqual(3, codification.Columns[0].NumberOfSymbols);
            Assert.AreEqual(1, codification.Columns[0].NumberOfInputs);
            Assert.AreEqual(1, codification.Columns[0].NumberOfOutputs);
            Assert.AreEqual(3, codification.Columns[0].NumberOfClasses);
            Assert.AreEqual(CodificationVariable.Categorical, codification.Columns[0].VariableType);
            Assert.AreEqual("1", codification.Columns[1].ColumnName);
            Assert.AreEqual(1, codification.Columns[1].NumberOfSymbols);
            Assert.AreEqual(1, codification.Columns[1].NumberOfInputs);
            Assert.AreEqual(1, codification.Columns[1].NumberOfOutputs);
            Assert.AreEqual(1, codification.Columns[1].NumberOfClasses);
            Assert.AreEqual(CodificationVariable.Discrete, codification.Columns[1].VariableType);
            Assert.AreEqual("2", codification.Columns[2].ColumnName);
            Assert.AreEqual(1, codification.Columns[2].NumberOfSymbols);
            Assert.AreEqual(1, codification.Columns[2].NumberOfInputs);
            Assert.AreEqual(1, codification.Columns[2].NumberOfOutputs);
            Assert.AreEqual(1, codification.Columns[2].NumberOfClasses);
            Assert.AreEqual(CodificationVariable.Continuous, codification.Columns[2].VariableType);

            Assert.AreEqual(labels[0], labels[2]);
            Assert.AreEqual(labels[0], labels[3]);
            Assert.AreEqual(labels[0], labels[4]);
            Assert.AreEqual(labels[0], labels[5]);

            Assert.AreEqual(labels[6], labels[7]);
            Assert.AreEqual(labels[6], labels[8]);

            Assert.AreNotEqual(labels[0], labels[1]);
            Assert.AreNotEqual(labels[0], labels[6]);

            int[] labels2 = kmeans.Clusters.Decide(observations);
            Assert.IsTrue(labels.IsEqual(labels2));

            var c = new KMeansClusterCollection.KMeansCluster[clusters.Count];
            int i = 0;
            foreach (var cluster in clusters)
            {
                c[i++] = cluster;
            }

            for (i = 0; i < c.Length; i++)
            {
                Assert.AreSame(c[i], clusters[i]);
            }
        }

Пример #6

Показать файл

Файл: MultinomialLogisticRegressionAnalysisTest.cs Проект: RitterRBC/framework

        public void learn_test()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            #region doc_learn_1
            // This example downloads an example dataset from the web and learns a multinomial logistic
            // regression on it. However, please keep in mind that the Multinomial Logistic Regression
            // can also work without many of the elements that will be shown below, like the codebook,
            // DataTables, and a CsvReader.

            // Let's download an example dataset from the web to learn a multinomial logistic regression:
            CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true);

            // Let's read the CSV into a DataTable. As mentioned above, this step
            // can help, but is not necessarily required for learning a the model:
            DataTable table = reader.ToTable();

            // We will learn a MLR regression between the following input and output fields of this table:
            string[] inputNames  = new[] { "write", "ses" };
            string[] outputNames = new[] { "prog" };

            // Now let's create a codification codebook to convert the string fields in the data
            // into integer symbols. This is required because the MLR model can only learn from
            // numeric data, so strings have to be transformed first. We can force a particular
            // interpretation for those columns if needed, as shown in the initializer below:
            var codification = new Codification()
            {
                { "write", CodificationVariable.Continuous },
                { "ses", CodificationVariable.CategoricalWithBaseline, new[] { "low", "middle", "high" } },
                { "prog", CodificationVariable.Categorical, new[] { "academic", "general" } },
            };

            // Learn the codification
            codification.Learn(table);

            // Now, transform symbols into a vector representation, growing the number of inputs:
            double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble();
            double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble();

            // Create a new Multinomial Logistic Regression Analysis:
            var analysis = new MultinomialLogisticRegressionAnalysis()
            {
                InputNames  = inputNames,
                OutputNames = outputNames,
            };

            // Learn the regression from the input and output pairs:
            MultinomialLogisticRegression regression = analysis.Learn(x, y);

            // Let's retrieve some information about what we just learned:
            int coefficients    = analysis.Coefficients.Count; // should be 9
            int numberOfInputs  = analysis.NumberOfInputs;     // should be 3
            int numberOfOutputs = analysis.NumberOfOutputs;    // should be 3

            inputNames  = analysis.InputNames;                 // should be "write", "ses: middle", "ses: high"
            outputNames = analysis.OutputNames;                // should be "prog: academic", "prog: general", "prog: vocation"

            // The regression is best visualized when it is data-bound to a
            // Windows.Forms DataGridView or WPF DataGrid. You can get the
            // values for all different coefficients and discrete values:

            // DataGridBox.Show(regression.Coefficients); // uncomment this line

            // You can get the matrix of coefficients:
            double[][] coef = analysis.CoefficientValues;

            // Should be equal to:
            double[][] expectedCoef = new double[][]
            {
                new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 },
                new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 }
            };

            // And their associated standard errors:
            double[][] stdErr = analysis.StandardErrors;

            // Should be equal to:
            double[][] expectedErr = new double[][]
            {
                new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 },
                new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 }
            };

            // We can also get statistics and hypothesis tests:
            WaldTest[][]  wald          = analysis.WaldTests;     // should all have p < 0.05
            ChiSquareTest chiSquare     = analysis.ChiSquare;     // should be p=1.06300120956871E-08
            double        logLikelihood = analysis.LogLikelihood; // should be -179.98173272217591

            // You can use the regression to predict the values:
            int[] pred = regression.Transform(x);

            // And get the accuracy of the prediction if needed:
            var cm = GeneralConfusionMatrix.Estimate(regression, x, y.ArgMax(dimension: 1));

            double acc   = cm.Accuracy; // should be 0.61
            double kappa = cm.Kappa;    // should be 0.2993487536492252
            #endregion


            Assert.AreEqual(9, coefficients);
            Assert.AreEqual(3, numberOfInputs);
            Assert.AreEqual(3, numberOfOutputs);

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(0.61, acc, 1e-10);
            Assert.AreEqual(0.2993487536492252, kappa, 1e-10);
            Assert.AreEqual(1.06300120956871E-08, chiSquare.PValue, 1e-8);
            Assert.AreEqual(-179.98172637136295, logLikelihood, 1e-8);

            testmlr(analysis);
        }

Пример #7

Показать файл

        public Learn()
        {
            try
            {
                //http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_Learning_C45Learning.htm
                using (var db = new DatabaseEntities())
                {
                    var allItems = db.Records.ToList();

                    DataTable data = new DataTable("e-Tracker Values");

                    data.Columns.Add("Id", typeof(int));
                    data.Columns.Add("Age", typeof(string));
                    data.Columns.Add("L1", typeof(string));
                    data.Columns.Add("Word", typeof(string));
                    data.Columns.Add("Synonym", typeof(string));

                    allItems.ForEach(r =>
                    {
                        r.DetailRecords.ToList().ForEach(dr =>
                        {
                            data.Rows.Add(dr.Id, r.Age, r.L1, dr.UnknownWord, dr.SelectedSynonism);
                        });
                    });

                    // Create a new codification codebook to convert
                    // the strings above into numeric, integer labels:
                    CodeBook = new Codification()
                    {
                        DefaultMissingValueReplacement = Double.NaN
                    };

                    // Learn the codebook
                    CodeBook.Learn(data);

                    // Use the codebook to convert all the data
                    DataTable symbols = CodeBook.Apply(data);

                    // Grab the training input and output instances:
                    int[][] inputs  = symbols.ToJagged <int>(InputNames);
                    int[]   outputs = symbols.ToArray <int>("Synonym");

                    // Create a new learning algorithm
                    var teacher = new C45Learning()
                    {
                        Attributes = DecisionVariable.FromCodebook(CodeBook, InputNames),
                    };

                    // Use the learning algorithm to induce a new tree:
                    Tree = teacher.Learn(inputs, outputs);

                    // To get the estimated class labels, we can use
                    int[] predicted = Tree.Decide(inputs);

                    // The classification error (~0.214) can be computed as
                    double error = new ZeroOneLoss(outputs).Loss(predicted);

                    // Moreover, we may decide to convert our tree to a set of rules:
                    DecisionSet rules = Tree.ToRules();

                    // And using the codebook, we can inspect the tree reasoning:
                    string ruleText = rules.ToString(CodeBook, "Synonym",
                                                     System.Globalization.CultureInfo.InvariantCulture);

                    Rules = ruleText;

                    Code = Tree.ToCode("Rules");
                }
            }
            catch (Exception e)
            {
                MessageBox.Show(e.Message);
            }
        }

Пример #8

Показать файл

Файл: HomeController.cs Проект: NoamDor/finalProject

        public JsonResult PredictPossibleProducts()
        {
            var userId       = 0;
            int knnNum       = 5;
            int clusterNum   = 4;
            var userIdString = "";

            if (HttpContext.Session["userid"] == null)
            {
                return(Json(new { errorCode = 1, errorMessage = "יוזר לא חוקי" }));
            }

            userIdString = HttpContext.Session["userid"].ToString();
            var didParsed = Int32.TryParse(userIdString, out userId);

            if (!didParsed)
            {
                return(Json(new { errorCode = 1, errorMessage = "יוזר לא חוקי" }));
            }

            var userGender = _context.Users
                             .Where(x => x.Id == userId)
                             .Select(x => x.Gender)
                             .SingleOrDefault();

            var trainData = _context.Purchases
                            .OrderBy(x => x.UserId)
                            .Where(x => x.Product != null)
                            .Select(x => new
            {
                userId     = x.UserId.Value,
                size       = x.Product.Size,
                type       = x.Product.ProductTypeId,
                gender     = x.Product.ProductType.Gender,
                genderUser = x.User.Gender
            })
                            .ToList();

            if (trainData.Count < knnNum || trainData.Count < clusterNum)
            {
                return(Json(new { errorCode = 2, errorMessage = "אין מספיק מידע" }));
            }
            var inputs = trainData.Select(x =>
            {
                double[] res = new double[]
                {
                    Convert.ToInt32(x.gender),
                    Convert.ToInt32(x.genderUser),
                    x.type.Value,
                    x.size
                };

                return(res);
            })
                         .ToArray();

            var codification = new Codification <double>()
            {
                CodificationVariable.Categorical,
                CodificationVariable.Categorical,
                CodificationVariable.Categorical,
                CodificationVariable.Discrete
            };

            // Learn the codification from observations
            var model = codification.Learn(inputs);

            // Transform the mixed observations into only continuous:
            double[][] newInputs = model.ToDouble().Transform(inputs);

            KMedoids kmeans   = new KMedoids(k: clusterNum);
            var      clusters = kmeans.Learn(newInputs);

            int[] labels = clusters.Decide(newInputs);

            var knn5 = new KNearestNeighbors(k: knnNum);

            knn5.Learn(newInputs, labels);

            var purchasesById = _context.Purchases
                                .Where(x => x.Product != null)
                                .Select(x => new
            {
                userId     = x.UserId.Value,
                size       = x.Product.Size,
                type       = x.Product.ProductTypeId,
                gender     = x.Product.ProductType.Gender,
                genderUser = x.User.Gender
            })
                                .GroupBy(x => x.userId)
                                .ToList();

            IList <Tuple <int, int[]> > labelsForUsers = new List <Tuple <int, int[]> >();

            for (int i = 0; i < purchasesById.Count; i++)
            {
                var userInputs = purchasesById[i].
                                 Select(x =>
                {
                    double[] res = new double[]
                    {
                        Convert.ToInt32(x.gender),
                        Convert.ToInt32(x.genderUser),
                        x.type.Value,
                        x.size
                    };

                    return(res);
                })
                                 .ToArray();

                double[][] newUserInputs = model.ToDouble().Transform(userInputs);
                labelsForUsers.Add(new Tuple <int, int[]>(purchasesById[i].Key, clusters.Decide(newUserInputs).Distinct().ToArray()));
            }

            var productIdsUserBought = _context.Purchases
                                       .Where(x => x.UserId == userId)
                                       .Select(x => x.ProductId)
                                       .Distinct()
                                       .ToList();

            var validProductTypeIds = _context.Purchases
                                      .Where(x => x.UserId == userId)
                                      .Select(x => x.Product.ProductTypeId)
                                      .Distinct()
                                      .ToList();

            var productsToPredict = _context.Products
                                    .Where(x => !productIdsUserBought.Contains(x.Id))
                                    .Where(x => validProductTypeIds.Contains(x.ProductTypeId))
                                    .Select(x => new
            {
                id         = x.Id,
                size       = x.Size,
                type       = x.ProductTypeId,
                gender     = x.ProductType.Gender,
                genderUser = userGender
            })
                                    .ToList();

            var predInputs = productsToPredict.Select(x =>
            {
                double[] res = new double[]
                {
                    Convert.ToInt32(x.gender),
                    Convert.ToInt32(x.genderUser),
                    x.type.Value,
                    x.size
                };

                return(res);
            })
                             .ToArray();

            double[][] newPredInputs = model.ToDouble().Transform(predInputs);

            int[] newLabels = knn5.Decide(newPredInputs);

            IList <int> productIdsPrediction = new List <int>();
            var         userLabels           = labelsForUsers.Where(x => x.Item1 == userId).FirstOrDefault() != null?
                                               labelsForUsers.Where(x => x.Item1 == userId).FirstOrDefault().Item2 : new int[0];

            for (int i = 0; i < newLabels.Length; i++)
            {
                if (userLabels.Contains(newLabels[i]))
                {
                    productIdsPrediction.Add(productsToPredict[i].id);
                }
            }

            var predictedProduct = _context.Products
                                   .Where(x => productIdsPrediction.Contains(x.Id))
                                   .Select(x => new
            {
                Id          = x.Id,
                Name        = x.Name,
                Price       = x.Price,
                Size        = x.Size,
                PictureName = x.PictureName
            })
                                   .ToList();

            return(Json(new { products = predictedProduct }, JsonRequestBehavior.AllowGet));
        }

Пример #9

Показать файл

        public void learn_test_2()
        {
            #region doc_learn_2
            // Let's say we would like predict a continuous number from a set
            // of discrete and continuous input variables. For this, we will
            // be using the Servo dataset from UCI's Machine Learning repository
            // as an example: http://archive.ics.uci.edu/ml/datasets/Servo

            // Create a Servo dataset
            Servo      servo     = new Servo();
            object[][] instances = servo.Instances; // 167 x 4
            double[]   outputs   = servo.Output;    // 167 x 1

            // This dataset contains 4 columns, where the first two are
            // symbolic (having possible values A, B, C, D, E), and the
            // last two are continuous.

            // We will use a codification filter to transform the symbolic
            // variables into one-hot vectors, while keeping the other two
            // continuous variables intact:
            var codebook = new Codification <object>()
            {
                { "motor", CodificationVariable.Categorical },
                { "screw", CodificationVariable.Categorical },
                { "pgain", CodificationVariable.Continuous },
                { "vgain", CodificationVariable.Continuous },
            };

            // Learn the codebook
            codebook.Learn(instances);

            // We can gather some info about the problem:
            int numberOfInputs  = codebook.NumberOfInputs;  // should be 4 (since there are 4 variables)
            int numberOfOutputs = codebook.NumberOfOutputs; // should be 12 (due their one-hot encodings)

            // Now we can use it to obtain double[] vectors:
            double[][] inputs = codebook.ToDouble().Transform(instances);

            // We will use Ordinary Least Squares to create a
            // linear regression model with an intercept term
            var ols = new OrdinaryLeastSquares()
            {
                UseIntercept = true
            };

            // Use Ordinary Least Squares to estimate a regression model:
            MultipleLinearRegression regression = ols.Learn(inputs, outputs);

            // We can compute the predicted points using:
            double[] predicted = regression.Transform(inputs);

            // And the squared error using the SquareLoss class:
            double error = new SquareLoss(outputs).Loss(predicted);

            // We can also compute other measures, such as the coefficient of determination r² using:
            double r2 = new RSquaredLoss(numberOfOutputs, outputs).Loss(predicted); // should be 0.55086630162967354

            // Or the adjusted or weighted versions of r² using:
            var r2loss = new RSquaredLoss(numberOfOutputs, outputs)
            {
                Adjust = true,
                // Weights = weights; // (uncomment if you have a weighted problem)
            };

            double ar2 = r2loss.Loss(predicted); // should be 0.51586887058782993

            // Alternatively, we can also use the less generic, but maybe more user-friendly method directly:
            double ur2 = regression.CoefficientOfDetermination(inputs, outputs, adjust: true); // should be 0.51586887058782993
            #endregion

            Assert.AreEqual(4, numberOfInputs);
            Assert.AreEqual(12, numberOfOutputs);
            Assert.AreEqual(12, regression.NumberOfInputs);
            Assert.AreEqual(1, regression.NumberOfOutputs);

            Assert.AreEqual(1.0859586717266123, error, 1e-6);

            double[] expected = regression.Compute(inputs);
            double[] actual   = regression.Transform(inputs);
            Assert.IsTrue(expected.IsEqual(actual, 1e-10));

            Assert.AreEqual(0.55086630162967354, r2);
            Assert.AreEqual(0.51586887058782993, ar2);
            Assert.AreEqual(0.51586887058782993, ur2);
        }

Пример #10

Показать файл

Файл: LogisticRegressionAnalysisTest.cs Проект: jthornca/accord-framework

        public void gh_937()
        {
            #region doc_learn_database
            // Note: this example uses a System.Data.DataTable to represent input data,
            // but note that this is not required. The data could have been represented
            // as jagged double matrices (double[][]) directly.

            // If you have to handle heterogeneus data in your application, such as user records
            // in a database, this data is best represented within the framework using a .NET's
            // DataTable object. In order to try to learn a classification or regression model
            // using this datatable, first we will need to convert the table into a representation
            // that the machine learning model can understand. Such representation is quite often,
            // a matrix of doubles (double[][]).
            var data = new DataTable("Customer Revenue Example");

            data.Columns.Add("Day", "CustomerId", "Time (hour)", "Weather", "Buy");
            data.Rows.Add("D1", 0, 8, "Sunny", true);
            data.Rows.Add("D2", 1, 10, "Sunny", true);
            data.Rows.Add("D3", 2, 10, "Rain", false);
            data.Rows.Add("D4", 3, 16, "Rain", true);
            data.Rows.Add("D5", 4, 15, "Rain", true);
            data.Rows.Add("D6", 5, 20, "Rain", false);
            data.Rows.Add("D7", 6, 12, "Cloudy", true);
            data.Rows.Add("D8", 7, 12, "Sunny", false);

            // One way to perform this conversion is by using a Codification filter. The Codification
            // filter can take care of converting variables that actually denote symbols (i.e. the
            // weather in the example above) into representations that make more sense given the assumption
            // of a real vector-based classifier.

            // Create a codification codebook
            var codebook = new Codification()
            {
                { "Weather", CodificationVariable.Categorical },
                { "Time (hour)", CodificationVariable.Continuous },
                { "Revenue", CodificationVariable.Continuous },
            };

            // Learn from the data
            codebook.Learn(data);

            // Now, we will use the codebook to transform the DataTable into double[][] vectors. Due
            // the way the conversion works, we can end up with more columns in your output vectors
            // than the ones started with. If you would like more details about what those columns
            // represent, you can pass then as 'out' parameters in the methods that follow below.
            string[] inputNames;  // (note: if you do not want to run this example yourself, you
            string   outputName;  // can see below the new variable names that will be generated)

            // Now, we can translate our training data into integer symbols using our codebook:
            double[][] inputs  = codebook.Apply(data, "Weather", "Time (hour)").ToJagged(out inputNames);
            double[]   outputs = codebook.Apply(data, "Buy").ToVector(out outputName);
            // (note: the Apply method transform a DataTable into another DataTable containing the codified
            //  variables. The ToJagged and ToVector methods are then used to transform those tables into
            //  double[][] matrices and double[] vectors, respectively.

            // If we would like to learn a logistic regression model for this data, there are two possible
            // ways depending on which aspect of the logistic regression we are interested the most. If we
            // are interested in interpreting the logistic regression, performing hypothesis tests with the
            // coefficients and performing an actual _logistic regression analysis_, then we can use the
            // LogisticRegressionAnalysis class for this. If however we are only interested in using
            // the learned model directly to predict new values for the dataset, then we could be using the
            // LogisticRegression and IterativeReweightedLeastSquares classes directly instead.

            // This example deals with the former case. For the later, please see the documentation page
            // for the LogisticRegression class.

            // We can create a new multiple linear analysis for the variables
            var lra = new LogisticRegressionAnalysis()
            {
                // We can also inform the names of the new variables that have been created by the
                // codification filter. Those can help in the visualizing the analysis once it is
                // data-bound to a visual control such a Windows.Forms.DataGridView or WPF DataGrid:

                Inputs = inputNames, // will be { "Weather: Sunny", "Weather: Rain, "Weather: Cloudy", "Time (hours)" }
                Output = outputName  // will be "Revenue"
            };

            // Compute the analysis and obtain the estimated regression
            LogisticRegression regression = lra.Learn(inputs, outputs);

            // And then predict the label using
            double predicted = lra.Transform(inputs[0]); // result will be ~0.287

            // Because we opted for doing a MultipleLinearRegressionAnalysis instead of a simple
            // linear regression, we will have further information about the regression available:
            int           inputCount        = lra.NumberOfInputs;  // should be 4
            int           outputCount       = lra.NumberOfOutputs; // should be 1
            double        logl              = lra.LogLikelihood;   // should be -4.6035570737785525
            ChiSquareTest x2                = lra.ChiSquare;       // should be 1.37789 (p=0.8480, non-significant)
            double[]      stdErr            = lra.StandardErrors;  // should be high except for the last value of 0.27122079214927985 (due small data)
            double[]      or                = lra.OddsRatios;      // should be 1.1116659950687609 for the last coefficient (related to time of day)
            LogisticCoefficientCollection c = lra.Coefficients;    // coefficient table (bind to a visual control for quick inspection)
            double[][] h = lra.InformationMatrix;                  // should contain Fisher's information matrix for the problem
            #endregion

            Assert.AreEqual(0.28703150858677107, predicted, 1e-8);
            Assert.AreEqual(4, inputCount, 1e-8);
            Assert.AreEqual(1, outputCount, 1e-8);
            Assert.AreEqual(-4.6035570737785525, logl, 1e-8);
            Assert.IsTrue(new[] { 0.0019604927838235376, 88.043929817973222, 101.42211648160144, 2.1954970044905113E-07, 1.1116659950687609 }.IsEqual(or, 1e-4));

            Assert.AreEqual(1.377897662970609, x2.Statistic, 1e-8);
            Assert.AreEqual(0.84802726696077046, x2.PValue, 1e-8);
        }

Пример #11

Показать файл

Файл: MultipleLinearRegressionAnalysisTest.cs Проект: jthornca/accord-framework

        public void gh_937()
        {
            #region doc_learn_database
            // Note: this example uses a System.Data.DataTable to represent input data,
            // but note that this is not required. The data could have been represented
            // as jagged double matrices (double[][]) directly.

            // If you have to handle heterogeneus data in your application, such as user records
            // in a database, this data is best represented within the framework using a .NET's
            // DataTable object. In order to try to learn a classification or regression model
            // using this datatable, first we will need to convert the table into a representation
            // that the machine learning model can understand. Such representation is quite often,
            // a matrix of doubles (double[][]).
            var data = new DataTable("Customer Revenue Example");

            data.Columns.Add("Day", "CustomerId", "Time (hour)", "Weather", "Revenue");
            data.Rows.Add("D1", 0, 8, "Sunny", 101.2);
            data.Rows.Add("D2", 1, 10, "Sunny", 24.1);
            data.Rows.Add("D3", 2, 10, "Rain", 107);
            data.Rows.Add("D4", 3, 16, "Rain", 223);
            data.Rows.Add("D5", 4, 15, "Rain", 1);
            data.Rows.Add("D6", 5, 20, "Rain", 42);
            data.Rows.Add("D7", 6, 12, "Cloudy", 123);
            data.Rows.Add("D8", 7, 12, "Sunny", 64);

            // One way to perform this conversion is by using a Codification filter. The Codification
            // filter can take care of converting variables that actually denote symbols (i.e. the
            // weather in the example above) into representations that make more sense given the assumption
            // of a real vector-based classifier.

            // Create a codification codebook
            var codebook = new Codification()
            {
                { "Weather", CodificationVariable.Categorical },
                { "Time (hour)", CodificationVariable.Continuous },
                { "Revenue", CodificationVariable.Continuous },
            };

            // Learn from the data
            codebook.Learn(data);

            // Now, we will use the codebook to transform the DataTable into double[][] vectors. Due
            // the way the conversion works, we can end up with more columns in your output vectors
            // than the ones started with. If you would like more details about what those columns
            // represent, you can pass then as 'out' parameters in the methods that follow below.
            string[] inputNames;  // (note: if you do not want to run this example yourself, you
            string   outputName;  // can see below the new variable names that will be generated)

            // Now, we can translate our training data into integer symbols using our codebook:
            double[][] inputs  = codebook.Apply(data, "Weather", "Time (hour)").ToJagged(out inputNames);
            double[]   outputs = codebook.Apply(data, "Revenue").ToVector(out outputName);
            // (note: the Apply method transform a DataTable into another DataTable containing the codified
            //  variables. The ToJagged and ToVector methods are then used to transform those tables into
            //  double[][] matrices and double[] vectors, respectively.

            // If we would like to learn a linear regression model for this data, there are two possible
            // ways depending on which aspect of the linear regression we are interested the most. If we
            // are interested in interpreting the linear regression, performing hypothesis tests with the
            // coefficients and performing an actual _linear regression analysis_, then we can use the
            // MultipleLinearRegressionAnalysis class for this. If however we are only interested in using
            // the learned model directly to predict new values for the dataset, then we could be using the
            // MultipleLinearRegression and OrdinaryLeastSquares classes directly instead.

            // This example deals with the former case. For the later, please see the documentation page
            // for the MultipleLinearRegression class.

            // We can create a new multiple linear analysis for the variables
            var mlra = new MultipleLinearRegressionAnalysis(intercept: true)
            {
                // We can also inform the names of the new variables that have been created by the
                // codification filter. Those can help in the visualizing the analysis once it is
                // data-bound to a visual control such a Windows.Forms.DataGridView or WPF DataGrid:

                Inputs = inputNames, // will be { "Weather: Sunny", "Weather: Rain, "Weather: Cloudy", "Time (hours)" }
                Output = outputName  // will be "Revenue"
            };

            // To overcome linear dependency errors
            mlra.OrdinaryLeastSquares.IsRobust = true;

            // Compute the analysis and obtain the estimated regression
            MultipleLinearRegression regression = mlra.Learn(inputs, outputs);

            // And then predict the label using
            double predicted = mlra.Transform(inputs[0]); // result will be ~72.3

            // Because we opted for doing a MultipleLinearRegressionAnalysis instead of a simple
            // linear regression, we will have further information about the regression available:
            int    inputCount       = mlra.NumberOfInputs;    // should be 4
            int    outputCount      = mlra.NumberOfOutputs;   // should be 1
            double r2               = mlra.RSquared;          // should be 0.12801838425195311
            AnovaSourceCollection a = mlra.Table;             // ANOVA table (bind to a visual control for quick inspection)
            double[][]            h = mlra.InformationMatrix; // should contain Fisher's information matrix for the problem
            ZTest z = mlra.ZTest;                             // should be 0 (p=0.999, non-significant)
            #endregion

            Assert.AreEqual(72.279574468085144d, predicted, 1e-8);
            Assert.AreEqual(4, inputCount, 1e-8);
            Assert.AreEqual(1, outputCount, 1e-8);
            Assert.AreEqual(0.12801838425195311, r2, 1e-8);
            Assert.AreEqual(0.11010987669344097, a[0].Statistic, 1e-8);

            string     str       = h.ToCSharp();
            double[][] expectedH = new double[][]
            {
                new double[] { 0.442293243337911, -0.069833718526197, -0.228692384542512, -0.0141758263063635, 0.143767140269202 },
                new double[] { -0.0698337185261971, 0.717811616891116, -0.112258662892007, -0.0655549422852099, 0.535719235472913 },
                new double[] { -0.228692384542512, -0.112258662892007, 0.717434922237013, -0.0232803210243207, 0.376483874802496 },
                new double[] { -0.0141758263063635, -0.0655549422852099, -0.0232803210243207, 0.0370082984668314, -0.103011089615894 },
                new double[] { 0.143767140269202, 0.535719235472913, 0.376483874802496, -0.103011089615894, 1.05597025054461 }
            };

            Assert.IsTrue(expectedH.IsEqual(h, 1e-8));
            Assert.AreEqual(0, z.Statistic, 1e-8);
            Assert.AreEqual(1, z.PValue, 1e-8);
        }

Пример #12

Показать файл

        public void missing_values_test()
        {
            #region doc_missing
            // In this example, we will be using a modified version of the famous Play Tennis
            // example by Tom Mitchell (1998), where some values have been replaced by missing
            // values. We will use NaN double values to represent values missing from the data.

            // Note: this example uses DataTables to represent the input data,
            // but this is not required. The same could be performed using plain
            // double[][] matrices and vectors instead.
            DataTable data = new DataTable("Tennis Example with Missing Values");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(string));
            data.Columns.Add("Humidity", typeof(string));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", null, "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", null, null, "High", null, "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", null, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", null, "Mild", "High", null, "No");
            data.Rows.Add("D9", null, "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", null, null, "Normal", null, "Yes");
            data.Rows.Add("D11", null, "Mild", "Normal", null, "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", null, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", null, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to convert
            // the strings above into numeric, integer labels:
            var codebook = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            // Learn the codebook
            codebook.Learn(data);

            // Use the codebook to convert all the data
            DataTable symbols = codebook.Apply(data);

            // Grab the training input and output instances:
            string[]   inputNames = new[] { "Outlook", "Temperature", "Humidity", "Wind" };
            double[][] inputs     = symbols.ToJagged(inputNames);
            int[]      outputs    = symbols.ToArray <int>("PlayTennis");

            // Create a new learning algorithm
            var teacher = new C45Learning()
            {
                Attributes = DecisionVariable.FromCodebook(codebook, inputNames)
            };

            // Use the learning algorithm to induce a new tree:
            DecisionTree tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // The classification error (~0.214) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "PlayTennis",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output should be:
            string expected = @"No =: (Outlook == Sunny)
No =: (Outlook == Rain) && (Wind == Strong)
Yes =: (Outlook == Overcast)
Yes =: (Outlook == Rain) && (Wind == Weak)
";
            #endregion

            expected = expected.Replace("\r\n", Environment.NewLine);
            Assert.AreEqual(expected, ruleText);

            Assert.AreEqual(14, codebook["Day"].NumberOfSymbols);
            Assert.AreEqual(3, codebook["Outlook"].NumberOfSymbols);
            Assert.AreEqual(3, codebook["Temperature"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["Humidity"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["Wind"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["PlayTennis"].NumberOfSymbols);

            foreach (var col in codebook)
            {
                Assert.AreEqual(Double.NaN, col.MissingValueReplacement);
                Assert.AreEqual(CodificationVariable.Ordinal, col.VariableType);
            }

            Assert.AreEqual(0.21428571428571427, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(2, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.21428571428571427, newError, 1e-10);
        }