Ejemplo n.º 1
0
 /// <summary>
 /// Load Model from given file.
 /// </summary>
 private void LoadModel()
 {
     this.predictor = PredictorUtils.LoadPredictor <float[]>(out dataModel, out dataStats, modelFile);
     if (predictor == null)
     {
         throw new Exception("Predictor is not a binary classifier");
     }
 }
Ejemplo n.º 2
0
 /// <summary>
 /// Output the INI model to a given writer
 /// </summary>
 void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema)
 {
     using (var ch = Host.Start("SaveAsText"))
     {
         for (int i = 0; i < Models.Length; i++)
         {
             writer.WriteLine(";; Partition model {0}", i);
             writer.WriteLine(";; Weight={0}", (Weights != null ? Weights[i] : 1));
             PredictorUtils.SaveText(ch, Models[i].Predictor, schema, writer);
         }
     }
 }
Ejemplo n.º 3
0
        public void FactoryExampleTest()
        {
            var    dataset          = TestDatasets.adultText;
            string dataFilename     = GetDataPath(dataset.trainFilename);
            string testDataFilename = GetDataPath(dataset.testFilename);

            ///*********  Training a model *******//
            string       modelFilename = Path.GetTempFileName();
            TLCArguments cmd           = new TLCArguments();

            Assert.True(CmdParser.ParseArguments(dataset.extraSettings, cmd));
            cmd.command           = Command.Train;
            cmd.modelfile         = modelFilename;
            cmd.datafile          = dataFilename;
            cmd.instancesSettings = dataset.settings;
            cmd.classifierName    = TestLearners.linearSVM.Trainer;
            RunExperiments.Run(cmd);

            // Load and make predictions with a previously saved model.

            IDataModel dataModel;
            IDataStats dataStats;
            var        predictor = (IDistributionPredictor <Instance, Float, Float>)PredictorUtils.LoadPredictor(
                out dataModel, out dataStats, modelFilename);
            var instanceFactory = ReflectionUtilsOld.CreateInstanceOld <IInstanceFactory, SignatureInstances>(
                cmd.instancesClass, cmd.instancesSettings, null, dataModel);

            bool         headerSkip    = true;
            List <Float> outputs       = new List <Float>();
            List <Float> probabilities = new List <Float>();

            using (StreamReader reader = new StreamReader(testDataFilename))
            {
                List <string> features = new List <string>();
                string        text;
                long          line = 0;
                while ((text = reader.ReadLine()) != null)
                {
                    ++line;
                    if (string.IsNullOrWhiteSpace(text))
                    {
                        continue;
                    }

                    string[] cols = text.Split(',');
                    Assert.True(cols.Length == 15);

                    if (headerSkip)
                    {
                        // skip header line
                        headerSkip = false;
                        continue;
                    }

                    features.Clear();
                    // Add in the "max dimensionality"
                    features.Add("15");
                    for (int col = 0; col < cols.Length; col++)
                    {
                        string s = cols[col].Trim();
                        switch (col)
                        {
                        case 0:
                        case 2:
                        case 4:
                        case 10:
                        case 11:
                        case 12:
                        case 14:
                            // numeric feature or label -- add if non-zero
                            Float val = InstancesUtils.FloatParse(s);
                            if (val == 0) // Beware of NaNs - they should be recorded!
                            {
                                continue;
                            }
                            break;
                        }
                        features.Add(col + ":" + s);
                    }

                    Instance instance = instanceFactory.ProduceInstance(line, features.ToArray());
                    Float    rawOutput, probability;
                    probability = predictor.PredictDistribution(instance, out rawOutput);
                    outputs.Add(rawOutput);
                    probabilities.Add(probability);
                }
            }

            List <Float> originalOutputs       = new List <Float>();
            List <Float> originalProbabilities = new List <Float>();
            var          env       = new LocalEnvironment(SysRandom.Wrap(RunExperiments.GetRandom(cmd)));
            Instances    instances = RunExperiments.CreateTestData(cmd, testDataFilename, dataModel, null, env);

            foreach (Instance instance in instances)
            {
                Float rawOutput, probability;
                probability = predictor.PredictDistribution(instance, out rawOutput);
                originalOutputs.Add(rawOutput);
                originalProbabilities.Add(probability);
            }

            CollectionAssert.Equal(outputs, originalOutputs);
            CollectionAssert.Equal(probabilities, originalProbabilities);

            File.Delete(modelFilename);

            Done();
        }
Ejemplo n.º 4
0
        [Ignore] // REVIEW: OVA no longer implements BulkPredict.
        public void MulticlassExampleTest()
        {
            string dataFilename = GetDataPath(TestDatasets.msm.trainFilename);

            ///*********  Training a model *******//
            // assume data is in memory in matrix/vector form. Sparse format also supported.
            Float[][] data;
            Float[]   labels;
            // below just reads some actual data into these arrays
            PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels);

            // Create an Instances dataset.
            ListInstances instances = new ListInstances();

            for (int i = 0; i < data.Length; i++)
            {
                instances.AddInst(data[i], labels[i]);
            }
            instances.CopyMetadata(null);

            // Create a predictor and specify some non-default settings
            var args = new OVA.OldArguments();

            PredictionUtil.ParseArguments(args, "p=FR ps=iter:20");
            ITrainer <Instances, IPredictor <Instance, Float[]> > trainer = new OVA(args, new TrainHost(new Random(1), 0));

            // Train a predictor
            trainer.Train(instances);
            var predictor = trainer.CreatePredictor();

            ///*********  Several ways to save models. Only binary can be used to-reload in TLC. *******//

            // Save the model in internal binary format that can be used for loading it.
            string modelFilename = Path.GetTempFileName();

            PredictorUtils.Save(modelFilename, predictor, instances, null);

            // Save the model as a plain-text description
            string modelFilenameText = Path.GetTempFileName();

            PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText);

            // Save the model in Bing's INI format
            string modelFilenameIni = Path.GetTempFileName();

            PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni);

            ///*********  Loading and making predictions with a previously saved model *******//
            // Note:   there are several alternative ways to construct instances
            // For example, see FactoryExampleTest  below that demonstrates named-feature : value pairs.

            // Load saved model
            IDataModel dataModel;
            IDataStats dataStats;
            var        pred = PredictorUtils.LoadPredictor <Float[]>(out dataModel, out dataStats, modelFilename);

            // Get predictions for instances
            Float[][] predictions = new Float[instances.Count][];

            for (int i = 0; i < instances.Count; i++)
            {
                predictions[i] = pred.Predict(instances[i]);
            }

            // REVIEW: This looks like it wasn't doing what was expected - OVA didn't override
            // BulkPredict, so this wasn't using FastRank's BulkPredict.
            Float[][] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float[], Float[][]>)pred).BulkPredict(instances);

            Assert.Equal(predictions.Length, bulkPredictions.Length);
            for (int i = 0; i < predictions.Length; i++)
            {
                Assert.Equal(predictions[i].Length, bulkPredictions[i].Length);
                for (int j = 0; j < predictions[i].Length; j++)
                {
                    Assert.Equal(predictions[i][j], bulkPredictions[i][j]);
                }
            }

            //test new testers
            {
                var results = new MulticlassTester(new MulticlassTesterArguments()).Test(pred, instances);

                // Get metric names and print them alongside numbers
                for (int i = 0; i < results.Length; i++)
                {
                    Log("{0,-30} {1}", results[i].Name, results[i].Value);
                }

                // sanity check vs. original predictor
                var results2 = new MulticlassTester(new MulticlassTesterArguments()).Test(predictor, instances);
                Assert.Equal(results.Length, results2.Length);
                for (int i = 0; i < results.Length; i++)
                {
                    Assert.Equal(results[i].Name, results2[i].Name);
                    Assert.Equal(results[i].Value, results2[i].Value);
                }
            }
            File.Delete(modelFilename);
            File.Delete(modelFilenameText);
            File.Delete(modelFilenameIni);

            Done();
        }
Ejemplo n.º 5
0
        public void SimpleExampleTest()
        {
            RunMTAThread(() =>
            {
                string dataFilename = GetDataPath(TestDatasets.msm.trainFilename);

                ///*********  Training a model *******//
                // assume data is in memory in matrix/vector form. Sparse format also supported.
                Float[][] data;
                Float[] labels;
                // below just reads some actual data into these arrays
                PredictionUtil.ReadInstancesAsArrays(dataFilename, out data, out labels);

                // Create an Instances dataset.
                ListInstances instances = new ListInstances();
                for (int i = 0; i < data.Length; i++)
                {
                    instances.AddInst(data[i], labels[i]);
                }
                instances.CopyMetadata(null);

                // Create a predictor and specify some non-default settings
                var sub = new SubComponent <ITrainer <Instances, IPredictor <Instance, Float> >, SignatureOldBinaryClassifierTrainer>(
                    "FastRank", "nl=5 lr =0.25 iter= 20");
                var trainer = sub.CreateInstance(new TrainHost(new Random(1), 0));

                // Train a predictor
                trainer.Train(instances);
                var predictor = trainer.CreatePredictor();

                ///*********  Several ways to save models. Only binary can be used to-reload in TLC. *******//

                // Save the model in internal binary format that can be used for loading it.
                string modelFilename = Path.GetTempFileName();
                PredictorUtils.Save(modelFilename, predictor, instances, null);

                // Save the model as a plain-text description
                string modelFilenameText = Path.GetTempFileName();
                PredictorUtils.SaveText(predictor, instances.Schema.FeatureNames, modelFilenameText);

                // Save the model in Bing's INI format
                string modelFilenameIni = Path.GetTempFileName();
                PredictorUtils.SaveIni(predictor, instances.Schema.FeatureNames, modelFilenameIni);

                ///*********  Loading and making predictions with a previously saved model *******//
                // Note:   there are several alternative ways to construct instances
                // For example, see FactoryExampleTest  below that demonstrates named-feature : value pairs.

                // Load saved model
                IDataModel dataModel;
                IDataStats dataStats;
                var pred = PredictorUtils.LoadPredictor <Float>(out dataModel, out dataStats, modelFilename);
                var dp   = pred as IDistributionPredictor <Instance, Float, Float>;

                // Get predictions for instances
                Float[] probPredictions = new Float[instances.Count];
                Float[] rawPredictions  = new Float[instances.Count];
                Float[] rawPredictions1 = new Float[instances.Count];
                for (int i = 0; i < instances.Count; i++)
                {
                    probPredictions[i] = dp.PredictDistribution(instances[i], out rawPredictions[i]);
                    rawPredictions1[i] = dp.Predict(new Instance(data[i]));
                }

                Float[] bulkPredictions = ((IBulkPredictor <Instance, Instances, Float, Float[]>)pred).BulkPredict(instances);

                Assert.Equal(rawPredictions.Length, bulkPredictions.Length);
                Assert.Equal(rawPredictions.Length, rawPredictions1.Length);
                for (int i = 0; i < rawPredictions.Length; i++)
                {
                    Assert.Equal(rawPredictions[i], bulkPredictions[i]);
                }
                for (int i = 0; i < rawPredictions.Length; i++)
                {
                    Assert.Equal(rawPredictions[i], rawPredictions1[i]);
                }

                //test new testers
                {
                    var results = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(pred, instances);

                    // Get metric names and print them alongside numbers
                    for (int i = 0; i < results.Length; i++)
                    {
                        Log("{0,-30} {1}", results[i].Name, results[i].Value);
                    }

                    // sanity check vs. original predictor
                    var results2 = new ClassifierTester(new ProbabilityPredictorTesterArguments()).Test(predictor, instances);
                    Assert.Equal(results.Length, results2.Length);
                    for (int i = 0; i < results.Length; i++)
                    {
                        Assert.Equal(results[i].Name, results2[i].Name);
                        Assert.Equal(results[i].Value, results2[i].Value);
                    }
                }
                File.Delete(modelFilename);
                File.Delete(modelFilenameText);
                File.Delete(modelFilenameIni);
            });
            Done();
        }
Ejemplo n.º 6
0
        public static void SavePredictor(IChannel ch, IPredictor predictor, RoleMappedSchema schema,
                                         Stream binaryModelStream = null, Stream summaryModelStream = null, Stream textModelStream = null,
                                         Stream iniModelStream    = null, Stream codeModelStream    = null)
        {
            Contracts.CheckValue(ch, nameof(ch));
            ch.CheckValue(predictor, nameof(predictor));
            ch.CheckValue(schema, nameof(schema));

            int count = 0;

            if (binaryModelStream != null)
            {
                ch.Info("Saving predictor as binary");
                using (var writer = new BinaryWriter(binaryModelStream, Encoding.UTF8, true))
                    PredictorUtils.SaveBinary(ch, predictor, writer);
                count++;
            }

            ch.CheckValue(schema, nameof(schema));

            if (summaryModelStream != null)
            {
                ch.Info("Saving predictor summary");

                using (StreamWriter writer = Utils.OpenWriter(summaryModelStream))
                    PredictorUtils.SaveSummary(ch, predictor, schema, writer);
                count++;
            }

            if (textModelStream != null)
            {
                ch.Info("Saving predictor as text");
                using (StreamWriter writer = Utils.OpenWriter(textModelStream))
                    PredictorUtils.SaveText(ch, predictor, schema, writer);
                count++;
            }

            if (iniModelStream != null)
            {
                ch.Info("Saving predictor as ini");
                using (StreamWriter writer = Utils.OpenWriter(iniModelStream))
                {
                    // Test if our predictor implements the more modern INI export interface.
                    // If it does not, use the old utility method.
                    ICanSaveInIniFormat saver = predictor as ICanSaveInIniFormat;
                    if (saver == null)
                    {
                        PredictorUtils.SaveIni(ch, predictor, schema, writer);
                    }
                    else
                    {
                        saver.SaveAsIni(writer, schema);
                    }
                }
                count++;
            }

            if (codeModelStream != null)
            {
                ch.Info("Saving predictor as code");
                using (StreamWriter writer = Utils.OpenWriter(codeModelStream))
                    PredictorUtils.SaveCode(ch, predictor, schema, writer);
                count++;
            }

            // Note that we don't check for this case up front so this command can be used to simply
            // check that the predictor is loadable.
            if (count == 0)
            {
                ch.Info("No files saved. Must specify at least one output file.");
            }
        }