Esempio n. 1
0
        public void testing_saving_and_loading_saved_model()
        {
            // Save
            Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv")).
            Classifiers.Trees.RandomForest.
            NumExecutionSlots(4).
            NumFeatures(5).
            NumTrees(50).
            FlushToFile("titanic_randor_forest.model");

            // Load
            IUntypedBaseClassifier <weka.classifiers.Classifier> classifier = BaseClassifier.Read("titanic_randor_forest.model");
            TitanicDataRow row = new TitanicDataRow
            {
                age      = 10,
                pclass   = "1",
                sex      = "male",
                embarked = "C"
            };
            // Classify
            double prediction = classifier.ClassifyInstance(Runtime.BuildInstance(0, row));
            double proba      = classifier.ClassifyInstanceProba(Runtime.BuildInstance(0, row));

            Assert.AreEqual(0.0, prediction);
            Assert.IsTrue(proba < 0.5);
        }
Esempio n. 2
0
        public void test_basic_unsupervised_attribute_filtering()
        {
            Runtime rt    = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            Runtime newrt = rt.Filters.UnsupervisedAttribute.RemoveByName.
                            Expression("sex").
                            RunFilter();

            Assert.AreEqual(rt.NumAttributes, newrt.NumAttributes + 1);
        }
Esempio n. 3
0
        public void test_basic_unsupervised_instance_filtering()
        {
            Runtime rt    = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            Runtime newrt = rt.Filters.UnsupervisedInstance.RemoveRange.
                            InstancesIndices("1-100").
                            RunFilter();

            Assert.AreEqual(rt.NumInstances, newrt.NumInstances + 100);
        }
Esempio n. 4
0
        public void test_basic_supervised_attribute_filtering()
        {
            Runtime rt    = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            Runtime newrt = rt.Filters.SupervisedAttribute.NominalToBinary.
                            AttributeIndices("2").     // pcclass (1,2,3)
                            RunFilter();

            // Add 3 new binary attributes and remove orignial
            Assert.AreEqual(rt.NumAttributes + 3 - 1, newrt.NumAttributes);
        }
Esempio n. 5
0
        public void test_simple_evaluation()
        {
            Runtime rt = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));

            rt.Classifiers.Trees.RandomForest.
            NumExecutionSlots(4).
            NumFeatures(5).
            NumTrees(50).
            EvaluateWithCrossValidation();
        }
Esempio n. 6
0
        public void test_basic_supervised_instance_filtering()
        {
            Runtime rt    = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            Runtime newrt = rt.Filters.SupervisedInstance.SpreadSubsample.
                            DistributionSpread(1.0).
                            RunFilter();

            Assert.AreEqual(rt.NumInstances, 891);
            Assert.AreEqual(newrt.NumInstances, 684);
            Assert.AreEqual(newrt.Count(i => i.ClassValue == 0.0), 342);
            Assert.AreEqual(newrt.Count(i => i.ClassValue == 1.0), 342);
        }
Esempio n. 7
0
        public void simple_attribute_selection_tests_with_indexes()
        {
            Runtime   rt  = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            BestFirst alg = rt.AttributeSelections.Algorithms.BestFirst.
                            Direction(BestFirst.EDirection.Bi_directional).
                            LookupCacheSize(10);

            PicNetML.AttrSel.Evals.CfsSubset eval = rt.AttributeSelections.Evaluators.CfsSubset.
                                                    LocallyPredictive(true).
                                                    MissingSeparate(true);

            int[] indexes = alg.SearchIndexes(eval);
            Assert.AreEqual(new[] { 2, 0 }, indexes);
        }
Esempio n. 8
0
        public void test_building_predictions_lines()
        {
            Runtime      rt  = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            RandomForest cls = rt.Classifiers.Trees.RandomForest.
                               NumExecutionSlots(4).
                               NumFeatures(5).
                               NumTrees(50);

            Runtime testset = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_test.csv"), preprocessor: TestLinePreproc);
            int     count   = testset.NumInstances;

            System.Collections.Generic.List <string> lines = testset.GeneratePredictions(GeneratePredictionLine, cls);
            Assert.AreEqual(count, lines.Count);
        }
        [Test] public void simple_attribute_selection_tests_with_new_runtime()
        {
            var rt  = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            var alg = rt.AttributeSelections.Algorithms.BestFirst.
                      Direction(BestFirst.EDirection.Bi_directional).
                      LookupCacheSize(10);
            var eval = rt.AttributeSelections.Evaluators.CfsSubset.
                       LocallyPredictive(true).
                       MissingSeparate(true);

            var newrt = alg.Search(eval);
            var names = newrt.EnumerateAttributes.Select(a => a.Name).ToArray();

            Assert.AreEqual(new[] { "sex", "survived" }, names);
        }
Esempio n. 10
0
        public void test_basic_clustering()
        {
            Runtime rt = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));

            // Remove the classifier (which upsets clusterers)
            rt = rt.Filters.UnsupervisedAttribute.Remove.AttributeIndices("1").RunFilter();

            PicNetML.Clstr.IBaseClusterer <weka.clusterers.SimpleKMeans> clusterer = rt.Clusterers.SimpleKMeans.
                                                                                     NumClusters(10).
                                                                                     Build();

            Assert.AreEqual(1, clusterer.ClusterInstance(rt[0]));
            Assert.AreEqual(0, clusterer.ClusterInstance(rt[2]));
            Assert.AreEqual(0, clusterer.ClusterInstance(rt[3]));
            Assert.AreEqual(1, clusterer.ClusterInstance(rt[4]));
        }
Esempio n. 11
0
        [Test] public void test_making_single_predictions_from_trained_model()
        {
            var rt         = Runtime.LoadFromFile <TitanicDataRow>(0, TestingHelpers.GetResourceFileName("titanic_train.csv"));
            var classifier = rt.Classifiers.Trees.RandomForest.
                             NumExecutionSlots(4).
                             NumFeatures(5).
                             NumTrees(50);

            var row = new TitanicDataRow {
                age      = 10,
                pclass   = "1",
                sex      = "male",
                embarked = "C"
            };
            var prediction = classifier.ClassifyRow(row);
            var proba      = classifier.ClassifyRowProba(row);

            Assert.AreEqual(0, prediction);
            Assert.IsTrue(proba < 0.5);
        }