/// <summary> /// Train a model on a single example, /// </summary> /// <typeparam name="TOutput"></typeparam> /// <param name="trainerMaker"></param> /// <param name="checker"></param> private static void TrivialHelper <TOutput>(Func <ITrainerHost, ITrainer <Instances, IPredictorProducing <TOutput> > > trainerMaker, Action <TOutput, TOutput> checker) { // The following simple instance should result in a "trivial" predictor for binary classification, regression, and multiclass, I think. ListInstances instances = new ListInstances(); instances.AddInst(new Float[] { (Float)0.0 }, (Float)0); instances.CopyMetadata(null); ITrainerHost host = new TrainHost(new Random(1), 0); var trainer = trainerMaker(host); trainer.Train(instances); IPredictor <Instance, TOutput> predictor = (IPredictor <Instance, TOutput>)trainer.CreatePredictor(); IPredictor <Instance, TOutput> loadedPredictor = default(IPredictor <Instance, TOutput>); using (Stream stream = new MemoryStream()) { using (RepositoryWriter writer = RepositoryWriter.CreateNew(stream, false)) { ModelSaveContext.SaveModel(writer, predictor, "foo"); writer.Commit(); } stream.Position = 0; using (RepositoryReader reader = RepositoryReader.Open(stream, false)) { ModelLoadContext.LoadModel(out loadedPredictor, reader, "foo"); } Assert.AreNotEqual(default(IPredictor <Instance, TOutput>), loadedPredictor, "did not load expected model"); } TOutput result = predictor.Predict(instances[0]); TOutput loadedResult = loadedPredictor.Predict(instances[0]); checker(result, loadedResult); }
[Ignore] // REVIEW: OVA no longer implements BulkPredict. public void MulticlassExampleTest() { string dataFilename = GetDataPath(TestDatasets.msm.trainFilename); ///********* Training a model *******// // assume data is in memory in matrix/vector form. Sparse format also supported. Float[][] data; Float[] labels; // below just reads some actual data into these arrays PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels); // Create an Instances dataset. ListInstances instances = new ListInstances(); for (int i = 0; i < data.Length; i++) { instances.AddInst(data[i], labels[i]); } instances.CopyMetadata(null); // Create a predictor and specify some non-default settings var args = new OVA.OldArguments(); PredictionUtil.ParseArguments(args, "p=FR ps=iter:20"); ITrainer <Instances, IPredictor <Instance, Float[]> > trainer = new OVA(args, new TrainHost(new Random(1), 0)); // Train a predictor trainer.Train(instances); var predictor = trainer.CreatePredictor(); ///********* Several ways to save models. Only binary can be used to-reload in TLC. *******// // Save the model in internal binary format that can be used for loading it. string modelFilename = Path.GetTempFileName(); PredictorUtils.Save(modelFilename, predictor, instances, null); // Save the model as a plain-text description string modelFilenameText = Path.GetTempFileName(); PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText); // Save the model in Bing's INI format string modelFilenameIni = Path.GetTempFileName(); PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni); ///********* Loading and making predictions with a previously saved model *******// // Note: there are several alternative ways to construct instances // For example, see FactoryExampleTest below that demonstrates named-feature : value pairs. // Load saved model IDataModel dataModel; IDataStats dataStats; var pred = PredictorUtils.LoadPredictor <Float[]>(out dataModel, out dataStats, modelFilename); // Get predictions for instances Float[][] predictions = new Float[instances.Count][]; for (int i = 0; i < instances.Count; i++) { predictions[i] = pred.Predict(instances[i]); } // REVIEW: This looks like it wasn't doing what was expected - OVA didn't override // BulkPredict, so this wasn't using FastRank's BulkPredict. Float[][] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float[], Float[][]>)pred).BulkPredict(instances); Assert.Equal(predictions.Length, bulkPredictions.Length); for (int i = 0; i < predictions.Length; i++) { Assert.Equal(predictions[i].Length, bulkPredictions[i].Length); for (int j = 0; j < predictions[i].Length; j++) { Assert.Equal(predictions[i][j], bulkPredictions[i][j]); } } //test new testers { var results = new MulticlassTester(new MulticlassTesterArguments()).Test(pred, instances); // Get metric names and print them alongside numbers for (int i = 0; i < results.Length; i++) { Log("{0,-30} {1}", results[i].Name, results[i].Value); } // sanity check vs. original predictor var results2 = new MulticlassTester(new MulticlassTesterArguments()).Test(predictor, instances); Assert.Equal(results.Length, results2.Length); for (int i = 0; i < results.Length; i++) { Assert.Equal(results[i].Name, results2[i].Name); Assert.Equal(results[i].Value, results2[i].Value); } } File.Delete(modelFilename); File.Delete(modelFilenameText); File.Delete(modelFilenameIni); Done(); }
public void SimpleExampleTest() { RunMTAThread(() => { string dataFilename = GetDataPath(TestDatasets.msm.trainFilename); ///********* Training a model *******// // assume data is in memory in matrix/vector form. Sparse format also supported. Float[][] data; Float[] labels; // below just reads some actual data into these arrays PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels); // Create an Instances dataset. ListInstances instances = new ListInstances(); for (int i = 0; i < data.Length; i++) { instances.AddInst(data[i], labels[i]); } instances.CopyMetadata(null); // Create a predictor and specify some non-default settings var sub = new SubComponent <ITrainer <Instances, IPredictor <Instance, Float> >, SignatureOldBinaryClassifierTrainer>( "FastRank", "nl=5 lr =0.25 iter= 20"); var trainer = sub.CreateInstance(new TrainHost(new Random(1), 0)); // Train a predictor trainer.Train(instances); var predictor = trainer.CreatePredictor(); ///********* Several ways to save models. Only binary can be used to-reload in TLC. *******// // Save the model in internal binary format that can be used for loading it. string modelFilename = Path.GetTempFileName(); PredictorUtils.Save(modelFilename, predictor, instances, null); // Save the model as a plain-text description string modelFilenameText = Path.GetTempFileName(); PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText); // Save the model in Bing's INI format string modelFilenameIni = Path.GetTempFileName(); PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni); ///********* Loading and making predictions with a previously saved model *******// // Note: there are several alternative ways to construct instances // For example, see FactoryExampleTest below that demonstrates named-feature : value pairs. // Load saved model IDataModel dataModel; IDataStats dataStats; var pred = PredictorUtils.LoadPredictor <Float>(out dataModel, out dataStats, modelFilename); var dp = pred as IDistributionPredictor <Instance, Float, Float>; // Get predictions for instances Float[] probPredictions = new Float[instances.Count]; Float[] rawPredictions = new Float[instances.Count]; Float[] rawPredictions1 = new Float[instances.Count]; for (int i = 0; i < instances.Count; i++) { probPredictions[i] = dp.PredictDistribution(instances[i], out rawPredictions[i]); rawPredictions1[i] = dp.Predict(new Instance(data[i])); } Float[] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float, Float[]>)pred).BulkPredict(instances); Assert.Equal(rawPredictions.Length, bulkPredictions.Length); Assert.Equal(rawPredictions.Length, rawPredictions1.Length); for (int i = 0; i < rawPredictions.Length; i++) { Assert.Equal(rawPredictions[i], bulkPredictions[i]); } for (int i = 0; i < rawPredictions.Length; i++) { Assert.Equal(rawPredictions[i], rawPredictions1[i]); } //test new testers { var results = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(pred, instances); // Get metric names and print them alongside numbers for (int i = 0; i < results.Length; i++) { Log("{0,-30} {1}", results[i].Name, results[i].Value); } // sanity check vs. original predictor var results2 = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(predictor, instances); Assert.Equal(results.Length, results2.Length); for (int i = 0; i < results.Length; i++) { Assert.Equal(results[i].Name, results2[i].Name); Assert.Equal(results[i].Value, results2[i].Value); } } File.Delete(modelFilename); File.Delete(modelFilenameText); File.Delete(modelFilenameIni); }); Done(); }