Beispiel #1
0
        /// <summary>
        /// Train a model on a single example,
        /// </summary>
        /// <typeparam name="TOutput"></typeparam>
        /// <param name="trainerMaker"></param>
        /// <param name="checker"></param>
        private static void TrivialHelper <TOutput>(Func <ITrainerHost, ITrainer <Instances, IPredictorProducing <TOutput> > > trainerMaker, Action <TOutput, TOutput> checker)
        {
            // The following simple instance should result in a "trivial" predictor for binary classification, regression, and multiclass, I think.
            ListInstances instances = new ListInstances();

            instances.AddInst(new Float[] { (Float)0.0 }, (Float)0);
            instances.CopyMetadata(null);
            ITrainerHost host = new TrainHost(new Random(1), 0);

            var trainer = trainerMaker(host);

            trainer.Train(instances);
            IPredictor <Instance, TOutput> predictor       = (IPredictor <Instance, TOutput>)trainer.CreatePredictor();
            IPredictor <Instance, TOutput> loadedPredictor = default(IPredictor <Instance, TOutput>);

            using (Stream stream = new MemoryStream())
            {
                using (RepositoryWriter writer = RepositoryWriter.CreateNew(stream, false))
                {
                    ModelSaveContext.SaveModel(writer, predictor, "foo");
                    writer.Commit();
                }
                stream.Position = 0;
                using (RepositoryReader reader = RepositoryReader.Open(stream, false))
                {
                    ModelLoadContext.LoadModel(out loadedPredictor, reader, "foo");
                }
                Assert.AreNotEqual(default(IPredictor <Instance, TOutput>), loadedPredictor, "did not load expected model");
            }

            TOutput result       = predictor.Predict(instances[0]);
            TOutput loadedResult = loadedPredictor.Predict(instances[0]);

            checker(result, loadedResult);
        }
Beispiel #2
0
        [Ignore] // REVIEW: OVA no longer implements BulkPredict.
        public void MulticlassExampleTest()
        {
            string dataFilename = GetDataPath(TestDatasets.msm.trainFilename);

            ///*********  Training a model *******//
            // assume data is in memory in matrix/vector form. Sparse format also supported.
            Float[][] data;
            Float[]   labels;
            // below just reads some actual data into these arrays
            PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels);

            // Create an Instances dataset.
            ListInstances instances = new ListInstances();

            for (int i = 0; i < data.Length; i++)
            {
                instances.AddInst(data[i], labels[i]);
            }
            instances.CopyMetadata(null);

            // Create a predictor and specify some non-default settings
            var args = new OVA.OldArguments();

            PredictionUtil.ParseArguments(args, "p=FR ps=iter:20");
            ITrainer <Instances, IPredictor <Instance, Float[]> > trainer = new OVA(args, new TrainHost(new Random(1), 0));

            // Train a predictor
            trainer.Train(instances);
            var predictor = trainer.CreatePredictor();

            ///*********  Several ways to save models. Only binary can be used to-reload in TLC. *******//

            // Save the model in internal binary format that can be used for loading it.
            string modelFilename = Path.GetTempFileName();

            PredictorUtils.Save(modelFilename, predictor, instances, null);

            // Save the model as a plain-text description
            string modelFilenameText = Path.GetTempFileName();

            PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText);

            // Save the model in Bing's INI format
            string modelFilenameIni = Path.GetTempFileName();

            PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni);

            ///*********  Loading and making predictions with a previously saved model *******//
            // Note:   there are several alternative ways to construct instances
            // For example, see FactoryExampleTest  below that demonstrates named-feature : value pairs.

            // Load saved model
            IDataModel dataModel;
            IDataStats dataStats;
            var        pred = PredictorUtils.LoadPredictor <Float[]>(out dataModel, out dataStats, modelFilename);

            // Get predictions for instances
            Float[][] predictions = new Float[instances.Count][];

            for (int i = 0; i < instances.Count; i++)
            {
                predictions[i] = pred.Predict(instances[i]);
            }

            // REVIEW: This looks like it wasn't doing what was expected - OVA didn't override
            // BulkPredict, so this wasn't using FastRank's BulkPredict.
            Float[][] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float[], Float[][]>)pred).BulkPredict(instances);

            Assert.Equal(predictions.Length, bulkPredictions.Length);
            for (int i = 0; i < predictions.Length; i++)
            {
                Assert.Equal(predictions[i].Length, bulkPredictions[i].Length);
                for (int j = 0; j < predictions[i].Length; j++)
                {
                    Assert.Equal(predictions[i][j], bulkPredictions[i][j]);
                }
            }

            //test new testers
            {
                var results = new MulticlassTester(new MulticlassTesterArguments()).Test(pred, instances);

                // Get metric names and print them alongside numbers
                for (int i = 0; i < results.Length; i++)
                {
                    Log("{0,-30} {1}", results[i].Name, results[i].Value);
                }

                // sanity check vs. original predictor
                var results2 = new MulticlassTester(new MulticlassTesterArguments()).Test(predictor, instances);
                Assert.Equal(results.Length, results2.Length);
                for (int i = 0; i < results.Length; i++)
                {
                    Assert.Equal(results[i].Name, results2[i].Name);
                    Assert.Equal(results[i].Value, results2[i].Value);
                }
            }
            File.Delete(modelFilename);
            File.Delete(modelFilenameText);
            File.Delete(modelFilenameIni);

            Done();
        }
Beispiel #3
0
        public void SimpleExampleTest()
        {
            RunMTAThread(() =>
            {
                string dataFilename = GetDataPath(TestDatasets.msm.trainFilename);

                ///*********  Training a model *******//
                // assume data is in memory in matrix/vector form. Sparse format also supported.
                Float[][] data;
                Float[] labels;
                // below just reads some actual data into these arrays
                PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels);

                // Create an Instances dataset.
                ListInstances instances = new ListInstances();
                for (int i = 0; i < data.Length; i++)
                {
                    instances.AddInst(data[i], labels[i]);
                }
                instances.CopyMetadata(null);

                // Create a predictor and specify some non-default settings
                var sub = new SubComponent <ITrainer <Instances, IPredictor <Instance, Float> >, SignatureOldBinaryClassifierTrainer>(
                    "FastRank", "nl=5 lr =0.25 iter= 20");
                var trainer = sub.CreateInstance(new TrainHost(new Random(1), 0));

                // Train a predictor
                trainer.Train(instances);
                var predictor = trainer.CreatePredictor();

                ///*********  Several ways to save models. Only binary can be used to-reload in TLC. *******//

                // Save the model in internal binary format that can be used for loading it.
                string modelFilename = Path.GetTempFileName();
                PredictorUtils.Save(modelFilename, predictor, instances, null);

                // Save the model as a plain-text description
                string modelFilenameText = Path.GetTempFileName();
                PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText);

                // Save the model in Bing's INI format
                string modelFilenameIni = Path.GetTempFileName();
                PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni);

                ///*********  Loading and making predictions with a previously saved model *******//
                // Note:   there are several alternative ways to construct instances
                // For example, see FactoryExampleTest  below that demonstrates named-feature : value pairs.

                // Load saved model
                IDataModel dataModel;
                IDataStats dataStats;
                var pred = PredictorUtils.LoadPredictor <Float>(out dataModel, out dataStats, modelFilename);
                var dp   = pred as IDistributionPredictor <Instance, Float, Float>;

                // Get predictions for instances
                Float[] probPredictions = new Float[instances.Count];
                Float[] rawPredictions  = new Float[instances.Count];
                Float[] rawPredictions1 = new Float[instances.Count];
                for (int i = 0; i < instances.Count; i++)
                {
                    probPredictions[i] = dp.PredictDistribution(instances[i], out rawPredictions[i]);
                    rawPredictions1[i] = dp.Predict(new Instance(data[i]));
                }

                Float[] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float, Float[]>)pred).BulkPredict(instances);

                Assert.Equal(rawPredictions.Length, bulkPredictions.Length);
                Assert.Equal(rawPredictions.Length, rawPredictions1.Length);
                for (int i = 0; i < rawPredictions.Length; i++)
                {
                    Assert.Equal(rawPredictions[i], bulkPredictions[i]);
                }
                for (int i = 0; i < rawPredictions.Length; i++)
                {
                    Assert.Equal(rawPredictions[i], rawPredictions1[i]);
                }

                //test new testers
                {
                    var results = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(pred, instances);

                    // Get metric names and print them alongside numbers
                    for (int i = 0; i < results.Length; i++)
                    {
                        Log("{0,-30} {1}", results[i].Name, results[i].Value);
                    }

                    // sanity check vs. original predictor
                    var results2 = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(predictor, instances);
                    Assert.Equal(results.Length, results2.Length);
                    for (int i = 0; i < results.Length; i++)
                    {
                        Assert.Equal(results[i].Name, results2[i].Name);
                        Assert.Equal(results[i].Value, results2[i].Value);
                    }
                }
                File.Delete(modelFilename);
                File.Delete(modelFilenameText);
                File.Delete(modelFilenameIni);
            });
            Done();
        }