/// <summary> /// Load Model from given file. /// </summary> private void LoadModel() { this.predictor = PredictorUtils.LoadPredictor <float[]>(out dataModel, out dataStats, modelFile); if (predictor == null) { throw new Exception("Predictor is not a binary classifier"); } }
/// <summary> /// Output the INI model to a given writer /// </summary> void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema) { using (var ch = Host.Start("SaveAsText")) { for (int i = 0; i < Models.Length; i++) { writer.WriteLine(";; Partition model {0}", i); writer.WriteLine(";; Weight={0}", (Weights != null ? Weights[i] : 1)); PredictorUtils.SaveText(ch, Models[i].Predictor, schema, writer); } } }
public void FactoryExampleTest() { var dataset = TestDatasets.adultText; string dataFilename = GetDataPath(dataset.trainFilename); string testDataFilename = GetDataPath(dataset.testFilename); ///********* Training a model *******// string modelFilename = Path.GetTempFileName(); TLCArguments cmd = new TLCArguments(); Assert.True(CmdParser.ParseArguments(dataset.extraSettings, cmd)); cmd.command = Command.Train; cmd.modelfile = modelFilename; cmd.datafile = dataFilename; cmd.instancesSettings = dataset.settings; cmd.classifierName = TestLearners.linearSVM.Trainer; RunExperiments.Run(cmd); // Load and make predictions with a previously saved model. IDataModel dataModel; IDataStats dataStats; var predictor = (IDistributionPredictor <Instance, Float, Float>)PredictorUtils.LoadPredictor( out dataModel, out dataStats, modelFilename); var instanceFactory = ReflectionUtilsOld.CreateInstanceOld <IInstanceFactory, SignatureInstances>( cmd.instancesClass, cmd.instancesSettings, null, dataModel); bool headerSkip = true; List <Float> outputs = new List <Float>(); List <Float> probabilities = new List <Float>(); using (StreamReader reader = new StreamReader(testDataFilename)) { List <string> features = new List <string>(); string text; long line = 0; while ((text = reader.ReadLine()) != null) { ++line; if (string.IsNullOrWhiteSpace(text)) { continue; } string[] cols = text.Split(','); Assert.True(cols.Length == 15); if (headerSkip) { // skip header line headerSkip = false; continue; } features.Clear(); // Add in the "max dimensionality" features.Add("15"); for (int col = 0; col < cols.Length; col++) { string s = cols[col].Trim(); switch (col) { case 0: case 2: case 4: case 10: case 11: case 12: case 14: // numeric feature or label -- add if non-zero Float val = InstancesUtils.FloatParse(s); if (val == 0) // Beware of NaNs - they should be recorded! { continue; } break; } features.Add(col + ":" + s); } Instance instance = instanceFactory.ProduceInstance(line, features.ToArray()); Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); outputs.Add(rawOutput); probabilities.Add(probability); } } List <Float> originalOutputs = new List <Float>(); List <Float> originalProbabilities = new List <Float>(); var env = new LocalEnvironment(SysRandom.Wrap(RunExperiments.GetRandom(cmd))); Instances instances = RunExperiments.CreateTestData(cmd, testDataFilename, dataModel, null, env); foreach (Instance instance in instances) { Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); originalOutputs.Add(rawOutput); originalProbabilities.Add(probability); } CollectionAssert.Equal(outputs, originalOutputs); CollectionAssert.Equal(probabilities, originalProbabilities); File.Delete(modelFilename); Done(); }
[Ignore] // REVIEW: OVA no longer implements BulkPredict. public void MulticlassExampleTest() { string dataFilename = GetDataPath(TestDatasets.msm.trainFilename); ///********* Training a model *******// // assume data is in memory in matrix/vector form. Sparse format also supported. Float[][] data; Float[] labels; // below just reads some actual data into these arrays PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels); // Create an Instances dataset. ListInstances instances = new ListInstances(); for (int i = 0; i < data.Length; i++) { instances.AddInst(data[i], labels[i]); } instances.CopyMetadata(null); // Create a predictor and specify some non-default settings var args = new OVA.OldArguments(); PredictionUtil.ParseArguments(args, "p=FR ps=iter:20"); ITrainer <Instances, IPredictor <Instance, Float[]> > trainer = new OVA(args, new TrainHost(new Random(1), 0)); // Train a predictor trainer.Train(instances); var predictor = trainer.CreatePredictor(); ///********* Several ways to save models. Only binary can be used to-reload in TLC. *******// // Save the model in internal binary format that can be used for loading it. string modelFilename = Path.GetTempFileName(); PredictorUtils.Save(modelFilename, predictor, instances, null); // Save the model as a plain-text description string modelFilenameText = Path.GetTempFileName(); PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText); // Save the model in Bing's INI format string modelFilenameIni = Path.GetTempFileName(); PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni); ///********* Loading and making predictions with a previously saved model *******// // Note: there are several alternative ways to construct instances // For example, see FactoryExampleTest below that demonstrates named-feature : value pairs. // Load saved model IDataModel dataModel; IDataStats dataStats; var pred = PredictorUtils.LoadPredictor <Float[]>(out dataModel, out dataStats, modelFilename); // Get predictions for instances Float[][] predictions = new Float[instances.Count][]; for (int i = 0; i < instances.Count; i++) { predictions[i] = pred.Predict(instances[i]); } // REVIEW: This looks like it wasn't doing what was expected - OVA didn't override // BulkPredict, so this wasn't using FastRank's BulkPredict. Float[][] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float[], Float[][]>)pred).BulkPredict(instances); Assert.Equal(predictions.Length, bulkPredictions.Length); for (int i = 0; i < predictions.Length; i++) { Assert.Equal(predictions[i].Length, bulkPredictions[i].Length); for (int j = 0; j < predictions[i].Length; j++) { Assert.Equal(predictions[i][j], bulkPredictions[i][j]); } } //test new testers { var results = new MulticlassTester(new MulticlassTesterArguments()).Test(pred, instances); // Get metric names and print them alongside numbers for (int i = 0; i < results.Length; i++) { Log("{0,-30} {1}", results[i].Name, results[i].Value); } // sanity check vs. original predictor var results2 = new MulticlassTester(new MulticlassTesterArguments()).Test(predictor, instances); Assert.Equal(results.Length, results2.Length); for (int i = 0; i < results.Length; i++) { Assert.Equal(results[i].Name, results2[i].Name); Assert.Equal(results[i].Value, results2[i].Value); } } File.Delete(modelFilename); File.Delete(modelFilenameText); File.Delete(modelFilenameIni); Done(); }
public void SimpleExampleTest() { RunMTAThread(() => { string dataFilename = GetDataPath(TestDatasets.msm.trainFilename); ///********* Training a model *******// // assume data is in memory in matrix/vector form. Sparse format also supported. Float[][] data; Float[] labels; // below just reads some actual data into these arrays PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels); // Create an Instances dataset. ListInstances instances = new ListInstances(); for (int i = 0; i < data.Length; i++) { instances.AddInst(data[i], labels[i]); } instances.CopyMetadata(null); // Create a predictor and specify some non-default settings var sub = new SubComponent <ITrainer <Instances, IPredictor <Instance, Float> >, SignatureOldBinaryClassifierTrainer>( "FastRank", "nl=5 lr =0.25 iter= 20"); var trainer = sub.CreateInstance(new TrainHost(new Random(1), 0)); // Train a predictor trainer.Train(instances); var predictor = trainer.CreatePredictor(); ///********* Several ways to save models. Only binary can be used to-reload in TLC. *******// // Save the model in internal binary format that can be used for loading it. string modelFilename = Path.GetTempFileName(); PredictorUtils.Save(modelFilename, predictor, instances, null); // Save the model as a plain-text description string modelFilenameText = Path.GetTempFileName(); PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText); // Save the model in Bing's INI format string modelFilenameIni = Path.GetTempFileName(); PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni); ///********* Loading and making predictions with a previously saved model *******// // Note: there are several alternative ways to construct instances // For example, see FactoryExampleTest below that demonstrates named-feature : value pairs. // Load saved model IDataModel dataModel; IDataStats dataStats; var pred = PredictorUtils.LoadPredictor <Float>(out dataModel, out dataStats, modelFilename); var dp = pred as IDistributionPredictor <Instance, Float, Float>; // Get predictions for instances Float[] probPredictions = new Float[instances.Count]; Float[] rawPredictions = new Float[instances.Count]; Float[] rawPredictions1 = new Float[instances.Count]; for (int i = 0; i < instances.Count; i++) { probPredictions[i] = dp.PredictDistribution(instances[i], out rawPredictions[i]); rawPredictions1[i] = dp.Predict(new Instance(data[i])); } Float[] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float, Float[]>)pred).BulkPredict(instances); Assert.Equal(rawPredictions.Length, bulkPredictions.Length); Assert.Equal(rawPredictions.Length, rawPredictions1.Length); for (int i = 0; i < rawPredictions.Length; i++) { Assert.Equal(rawPredictions[i], bulkPredictions[i]); } for (int i = 0; i < rawPredictions.Length; i++) { Assert.Equal(rawPredictions[i], rawPredictions1[i]); } //test new testers { var results = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(pred, instances); // Get metric names and print them alongside numbers for (int i = 0; i < results.Length; i++) { Log("{0,-30} {1}", results[i].Name, results[i].Value); } // sanity check vs. original predictor var results2 = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(predictor, instances); Assert.Equal(results.Length, results2.Length); for (int i = 0; i < results.Length; i++) { Assert.Equal(results[i].Name, results2[i].Name); Assert.Equal(results[i].Value, results2[i].Value); } } File.Delete(modelFilename); File.Delete(modelFilenameText); File.Delete(modelFilenameIni); }); Done(); }
public static void SavePredictor(IChannel ch, IPredictor predictor, RoleMappedSchema schema, Stream binaryModelStream = null, Stream summaryModelStream = null, Stream textModelStream = null, Stream iniModelStream = null, Stream codeModelStream = null) { Contracts.CheckValue(ch, nameof(ch)); ch.CheckValue(predictor, nameof(predictor)); ch.CheckValue(schema, nameof(schema)); int count = 0; if (binaryModelStream != null) { ch.Info("Saving predictor as binary"); using (var writer = new BinaryWriter(binaryModelStream, Encoding.UTF8, true)) PredictorUtils.SaveBinary(ch, predictor, writer); count++; } ch.CheckValue(schema, nameof(schema)); if (summaryModelStream != null) { ch.Info("Saving predictor summary"); using (StreamWriter writer = Utils.OpenWriter(summaryModelStream)) PredictorUtils.SaveSummary(ch, predictor, schema, writer); count++; } if (textModelStream != null) { ch.Info("Saving predictor as text"); using (StreamWriter writer = Utils.OpenWriter(textModelStream)) PredictorUtils.SaveText(ch, predictor, schema, writer); count++; } if (iniModelStream != null) { ch.Info("Saving predictor as ini"); using (StreamWriter writer = Utils.OpenWriter(iniModelStream)) { // Test if our predictor implements the more modern INI export interface. // If it does not, use the old utility method. ICanSaveInIniFormat saver = predictor as ICanSaveInIniFormat; if (saver == null) { PredictorUtils.SaveIni(ch, predictor, schema, writer); } else { saver.SaveAsIni(writer, schema); } } count++; } if (codeModelStream != null) { ch.Info("Saving predictor as code"); using (StreamWriter writer = Utils.OpenWriter(codeModelStream)) PredictorUtils.SaveCode(ch, predictor, schema, writer); count++; } // Note that we don't check for this case up front so this command can be used to simply // check that the predictor is loadable. if (count == 0) { ch.Info("No files saved. Must specify at least one output file."); } }