/// <summary> /// Output the INI model to a given writer /// </summary> void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema) { using (var ch = Host.Start("SaveAsText")) { for (int i = 0; i < Models.Length; i++) { writer.WriteLine(";; Partition model {0}", i); writer.WriteLine(";; Weight={0}", (Weights != null ? Weights[i] : 1)); PredictorUtils.SaveText(ch, Models[i].Predictor, schema, writer); } } }
public void SimpleExampleTest() { RunMTAThread(() => { string dataFilename = GetDataPath(TestDatasets.msm.trainFilename); ///********* Training a model *******// // assume data is in memory in matrix/vector form. Sparse format also supported. Float[][] data; Float[] labels; // below just reads some actual data into these arrays PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels); // Create an Instances dataset. ListInstances instances = new ListInstances(); for (int i = 0; i < data.Length; i++) { instances.AddInst(data[i], labels[i]); } instances.CopyMetadata(null); // Create a predictor and specify some non-default settings var sub = new SubComponent <ITrainer <Instances, IPredictor <Instance, Float> >, SignatureOldBinaryClassifierTrainer>( "FastRank", "nl=5 lr =0.25 iter= 20"); var trainer = sub.CreateInstance(new TrainHost(new Random(1), 0)); // Train a predictor trainer.Train(instances); var predictor = trainer.CreatePredictor(); ///********* Several ways to save models. Only binary can be used to-reload in TLC. *******// // Save the model in internal binary format that can be used for loading it. string modelFilename = Path.GetTempFileName(); PredictorUtils.Save(modelFilename, predictor, instances, null); // Save the model as a plain-text description string modelFilenameText = Path.GetTempFileName(); PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText); // Save the model in Bing's INI format string modelFilenameIni = Path.GetTempFileName(); PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni); ///********* Loading and making predictions with a previously saved model *******// // Note: there are several alternative ways to construct instances // For example, see FactoryExampleTest below that demonstrates named-feature : value pairs. // Load saved model IDataModel dataModel; IDataStats dataStats; var pred = PredictorUtils.LoadPredictor <Float>(out dataModel, out dataStats, modelFilename); var dp = pred as IDistributionPredictor <Instance, Float, Float>; // Get predictions for instances Float[] probPredictions = new Float[instances.Count]; Float[] rawPredictions = new Float[instances.Count]; Float[] rawPredictions1 = new Float[instances.Count]; for (int i = 0; i < instances.Count; i++) { probPredictions[i] = dp.PredictDistribution(instances[i], out rawPredictions[i]); rawPredictions1[i] = dp.Predict(new Instance(data[i])); } Float[] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float, Float[]>)pred).BulkPredict(instances); Assert.Equal(rawPredictions.Length, bulkPredictions.Length); Assert.Equal(rawPredictions.Length, rawPredictions1.Length); for (int i = 0; i < rawPredictions.Length; i++) { Assert.Equal(rawPredictions[i], bulkPredictions[i]); } for (int i = 0; i < rawPredictions.Length; i++) { Assert.Equal(rawPredictions[i], rawPredictions1[i]); } //test new testers { var results = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(pred, instances); // Get metric names and print them alongside numbers for (int i = 0; i < results.Length; i++) { Log("{0,-30} {1}", results[i].Name, results[i].Value); } // sanity check vs. original predictor var results2 = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(predictor, instances); Assert.Equal(results.Length, results2.Length); for (int i = 0; i < results.Length; i++) { Assert.Equal(results[i].Name, results2[i].Name); Assert.Equal(results[i].Value, results2[i].Value); } } File.Delete(modelFilename); File.Delete(modelFilenameText); File.Delete(modelFilenameIni); }); Done(); }
[Ignore] // REVIEW: OVA no longer implements BulkPredict. public void MulticlassExampleTest() { string dataFilename = GetDataPath(TestDatasets.msm.trainFilename); ///********* Training a model *******// // assume data is in memory in matrix/vector form. Sparse format also supported. Float[][] data; Float[] labels; // below just reads some actual data into these arrays PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels); // Create an Instances dataset. ListInstances instances = new ListInstances(); for (int i = 0; i < data.Length; i++) { instances.AddInst(data[i], labels[i]); } instances.CopyMetadata(null); // Create a predictor and specify some non-default settings var args = new OVA.OldArguments(); PredictionUtil.ParseArguments(args, "p=FR ps=iter:20"); ITrainer <Instances, IPredictor <Instance, Float[]> > trainer = new OVA(args, new TrainHost(new Random(1), 0)); // Train a predictor trainer.Train(instances); var predictor = trainer.CreatePredictor(); ///********* Several ways to save models. Only binary can be used to-reload in TLC. *******// // Save the model in internal binary format that can be used for loading it. string modelFilename = Path.GetTempFileName(); PredictorUtils.Save(modelFilename, predictor, instances, null); // Save the model as a plain-text description string modelFilenameText = Path.GetTempFileName(); PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText); // Save the model in Bing's INI format string modelFilenameIni = Path.GetTempFileName(); PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni); ///********* Loading and making predictions with a previously saved model *******// // Note: there are several alternative ways to construct instances // For example, see FactoryExampleTest below that demonstrates named-feature : value pairs. // Load saved model IDataModel dataModel; IDataStats dataStats; var pred = PredictorUtils.LoadPredictor <Float[]>(out dataModel, out dataStats, modelFilename); // Get predictions for instances Float[][] predictions = new Float[instances.Count][]; for (int i = 0; i < instances.Count; i++) { predictions[i] = pred.Predict(instances[i]); } // REVIEW: This looks like it wasn't doing what was expected - OVA didn't override // BulkPredict, so this wasn't using FastRank's BulkPredict. Float[][] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float[], Float[][]>)pred).BulkPredict(instances); Assert.Equal(predictions.Length, bulkPredictions.Length); for (int i = 0; i < predictions.Length; i++) { Assert.Equal(predictions[i].Length, bulkPredictions[i].Length); for (int j = 0; j < predictions[i].Length; j++) { Assert.Equal(predictions[i][j], bulkPredictions[i][j]); } } //test new testers { var results = new MulticlassTester(new MulticlassTesterArguments()).Test(pred, instances); // Get metric names and print them alongside numbers for (int i = 0; i < results.Length; i++) { Log("{0,-30} {1}", results[i].Name, results[i].Value); } // sanity check vs. original predictor var results2 = new MulticlassTester(new MulticlassTesterArguments()).Test(predictor, instances); Assert.Equal(results.Length, results2.Length); for (int i = 0; i < results.Length; i++) { Assert.Equal(results[i].Name, results2[i].Name); Assert.Equal(results[i].Value, results2[i].Value); } } File.Delete(modelFilename); File.Delete(modelFilenameText); File.Delete(modelFilenameIni); Done(); }
public static void SavePredictor(IChannel ch, IPredictor predictor, RoleMappedSchema schema, Stream binaryModelStream = null, Stream summaryModelStream = null, Stream textModelStream = null, Stream iniModelStream = null, Stream codeModelStream = null) { Contracts.CheckValue(ch, nameof(ch)); ch.CheckValue(predictor, nameof(predictor)); ch.CheckValue(schema, nameof(schema)); int count = 0; if (binaryModelStream != null) { ch.Info("Saving predictor as binary"); using (var writer = new BinaryWriter(binaryModelStream, Encoding.UTF8, true)) PredictorUtils.SaveBinary(ch, predictor, writer); count++; } ch.CheckValue(schema, nameof(schema)); if (summaryModelStream != null) { ch.Info("Saving predictor summary"); using (StreamWriter writer = Utils.OpenWriter(summaryModelStream)) PredictorUtils.SaveSummary(ch, predictor, schema, writer); count++; } if (textModelStream != null) { ch.Info("Saving predictor as text"); using (StreamWriter writer = Utils.OpenWriter(textModelStream)) PredictorUtils.SaveText(ch, predictor, schema, writer); count++; } if (iniModelStream != null) { ch.Info("Saving predictor as ini"); using (StreamWriter writer = Utils.OpenWriter(iniModelStream)) { // Test if our predictor implements the more modern INI export interface. // If it does not, use the old utility method. ICanSaveInIniFormat saver = predictor as ICanSaveInIniFormat; if (saver == null) { PredictorUtils.SaveIni(ch, predictor, schema, writer); } else { saver.SaveAsIni(writer, schema); } } count++; } if (codeModelStream != null) { ch.Info("Saving predictor as code"); using (StreamWriter writer = Utils.OpenWriter(codeModelStream)) PredictorUtils.SaveCode(ch, predictor, schema, writer); count++; } // Note that we don't check for this case up front so this command can be used to simply // check that the predictor is loadable. if (count == 0) { ch.Info("No files saved. Must specify at least one output file."); } }