public void TestCreateTextInstancesConstant()
        {
            TestDataset breast    = TestDatasets.breastCancerConst;
            string      trainData = GetDataPath(breast.trainFilename);

            var prefix = TestContext.TestName + "-";

            string outName   = prefix + "BreastCancer.txt";
            string statsName = prefix + "BreastCancer.stats.txt";

            string outFile   = DeleteOutputPath(Dir, outName);
            string statsFile = DeleteOutputPath(Dir, statsName);

            var argsStr =
                string.Format(
                    "c=CreateInstances {0} {1} cifile={2} cistatsfile={3}",
                    trainData, breast.extraSettings, outFile, statsFile);

            argsStr += " writer=TextInstanceWriter{stats+} disableTracking+";
            var args = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr, args));

            RunExperiments.Run(args);

            CheckEquality(Dir, outName);
            CheckEquality(Dir, statsName);
            Done();
        }
        public void TestCreateTextInstancesWithNormalization()
        {
            TestDataset dataset   = TestDatasets.mnistTiny28;
            string      trainData = GetDataPath(dataset.trainFilename);
            string      testData  = GetDataPath(dataset.testFilename);

            var    prefix       = TestContext.TestName + "-";
            string outFile1     = DeleteOutputPath(Dir, prefix + "Norm-Separate-Train.txt");
            string outTestFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Test.txt");
            string outFile2     = DeleteOutputPath(Dir, prefix + "Norm-Trans-Train.txt");
            string outTestFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Test.txt");

            string transArgs = "inst=Trans{trans=RFF {rng=1}}";

            var argsStr1 = string.Format(
                "/c=CreateInstances {0} /test={1} /norm=MinMaxNormalizer /{2} /cifile={3} /citestfile={4}",
                trainData, testData, transArgs, outFile1, outTestFile1);
            var args1 = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1));

            RunExperiments.Run(args1);

            var argsStr2 = string.Format(
                "/c=CreateInstances {0} /test={1} /inst Trans{{trans=MinMaxNormalizer {2}}} /cifile={3} /citestfile={4}",
                trainData, testData, transArgs, outFile2, outTestFile2);
            var args2 = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr2, args2));

            RunExperiments.Run(args2);

            var instances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile1);
            var instances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile2);

            CompareInstances(instances1, instances2);

            var testInstances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile1);
            var testInstances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile2);

            CompareInstances(testInstances1, testInstances2);

            Done();
        }
        public void TestCreateTextInstances()
        {
            TestDataset adult     = TestDatasets.adult;
            string      trainData = GetDataPath(adult.trainFilename);
            string      testData  = GetDataPath(adult.testFilename);

            var prefix = TestContext.TestName + "-";

            string outName        = prefix + "Adult-Train.txt";
            string statsName      = prefix + "Adult-Train.stats.txt";
            string outTestName    = prefix + "Adult-Test.txt";
            string testStatsName  = prefix + "Adult-Test.stats.txt";
            string outValidName   = prefix + "Adult-Valid.txt";
            string validStatsName = prefix + "Adult-Valid.stats.txt";

            string outFile        = DeleteOutputPath(Dir, outName);
            string statsFile      = DeleteOutputPath(Dir, statsName);
            string outTestFile    = DeleteOutputPath(Dir, outTestName);
            string testStatsFile  = DeleteOutputPath(Dir, testStatsName);
            string outValidFile   = DeleteOutputPath(Dir, outValidName);
            string validStatsFile = DeleteOutputPath(Dir, validStatsName);

            var argsStr =
                string.Format(
                    "/c=CreateInstances {0} /test={1} /valid={1} /cacheinst=- {2} " +
                    "/cifile={3} /cistatsfile={4} /citestfile={5} /citeststatsfile={6} /civalidfile={7} /civalidstatsfile={8}",
                    trainData, testData, adult.extraSettings,
                    outFile, statsFile, outTestFile, testStatsFile, outValidFile, validStatsFile);

            argsStr += " /writer TextInstanceWriter{/stats=+} /disableTracking=+";
            var args = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr, args));

            RunExperiments.Run(args);

            CheckEquality(Dir, outName);
            CheckEquality(Dir, statsName);
            CheckEquality(Dir, outTestName);
            CheckEquality(Dir, testStatsName);
            CheckEquality(Dir, outValidName);
            CheckEquality(Dir, validStatsName);
            Done();
        }
        public void TestPcaTransform()
        {
            // Force Microsoft.ML.Runtime.PCA assembly to be loaded into the AppDomain so
            // ReflectionUtils.FindClassCore does not return null when called by ReflectionUtils.CreateInstance
            Assert.AreEqual(typeof(PCAPredictor).Name, "PCAPredictor");

            string trainData = GetDataPath(TestDatasets.mnistTiny28.trainFilename);
            string fileName  = TestContext.TestName + "-Train.txt";
            string outFile   = DeleteOutputPath(Dir, fileName);

            const int rank             = 3;
            string    pcaTransformArgs = string.Format("/inst Trans{{trans=pca {{k={0} seed=1}}}}", rank);
            var       argsStr1         = string.Format(
                "/c CreateInstances {0} {1} /rs=1 /cifile={2}",
                trainData,
                pcaTransformArgs,
                outFile);
            var args1 = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1));

            RunExperiments.Run(args1);
            CheckEquality(Dir, fileName);

            // Verify the scales of the transformed features decrease with respect to the feature index
            TlcTextInstances outputInstances = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile);

            Double[] l1norms = new Double[rank];
            foreach (Instance instance in outputInstances)
            {
                Assert.IsTrue(instance.Features.Count == rank);
                for (int i = 0; i < instance.Features.Values.Length; i++)
                {
                    l1norms[i] += (instance.Features.Values[i] < 0 ? -instance.Features.Values[i] : instance.Features.Values[i]);
                }
            }

            for (int i = 0; i < l1norms.Length - 1; i++)
            {
                Assert.IsTrue(l1norms[i] > l1norms[i + 1]);
            }

            Done();
        }
        public void TestCrossValidationWithInvalidTester()
        {
            var argsStr = GetDataPath(TestDatasets.breastCancer.trainFilename)
                          + " /ev=MulticlassTester /o z.txt /threads=+ /disableTracking=+";

            var args = new TLCArguments();

            CmdParser.ParseArguments(argsStr, args);

            try
            {
                RunExperiments.Run(args);
            }
            catch (AggregateException ex)
            {
                Log("Caught expected exception: {0}", ex);
                Done();
                return;
            }

            Fail("Expected exception!");
            Done();
        }
        public void TestFeatureHandlerIncorrectMapping()
        {
            string trainData     = GetDataPath(TestDatasets.breastCancer.trainFilename);
            string dataModelFile = DeleteOutputPath(Dir, TestContext.TestName + "-data-model.zip");
            string ciFile        = DeleteOutputPath(Dir, TestContext.TestName + "-ci.tsv");
            string argsString    = string.Format(
                "/c CreateInstances {0} /inst Text{{text=1,2,3}} /m {1} /cifile {2}",
                trainData,
                dataModelFile,
                ciFile);
            var args = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsString, args));
            RunExperiments.Run(args);

            string ciFailFile = DeleteOutputPath(Dir, TestContext.TestName + "-ci-fail.tsv");

            argsString = string.Format(
                "/c CreateInstances {0} /inst Text{{text=1,2}} /im {1} /cifile {2}",
                trainData,
                dataModelFile,
                ciFailFile);
            args = new TLCArguments();
            Assert.IsTrue(CmdParser.ParseArguments(argsString, args));
            try
            {
                RunExperiments.Run(args);
                Assert.Fail("Expected to throw with different input model format");
            }
            catch (Exception ex)
            {
                Assert.IsTrue(ex.GetBaseException() is InvalidOperationException);
            }

            Done();
        }
Example #7
0
        public void FactoryExampleTest()
        {
            var    dataset          = TestDatasets.adultText;
            string dataFilename     = GetDataPath(dataset.trainFilename);
            string testDataFilename = GetDataPath(dataset.testFilename);

            ///*********  Training a model *******//
            string       modelFilename = Path.GetTempFileName();
            TLCArguments cmd           = new TLCArguments();

            Assert.True(CmdParser.ParseArguments(dataset.extraSettings, cmd));
            cmd.command           = Command.Train;
            cmd.modelfile         = modelFilename;
            cmd.datafile          = dataFilename;
            cmd.instancesSettings = dataset.settings;
            cmd.classifierName    = TestLearners.linearSVM.Trainer;
            RunExperiments.Run(cmd);

            // Load and make predictions with a previously saved model.

            IDataModel dataModel;
            IDataStats dataStats;
            var        predictor = (IDistributionPredictor <Instance, Float, Float>)PredictorUtils.LoadPredictor(
                out dataModel, out dataStats, modelFilename);
            var instanceFactory = ReflectionUtilsOld.CreateInstanceOld <IInstanceFactory, SignatureInstances>(
                cmd.instancesClass, cmd.instancesSettings, null, dataModel);

            bool         headerSkip    = true;
            List <Float> outputs       = new List <Float>();
            List <Float> probabilities = new List <Float>();

            using (StreamReader reader = new StreamReader(testDataFilename))
            {
                List <string> features = new List <string>();
                string        text;
                long          line = 0;
                while ((text = reader.ReadLine()) != null)
                {
                    ++line;
                    if (string.IsNullOrWhiteSpace(text))
                    {
                        continue;
                    }

                    string[] cols = text.Split(',');
                    Assert.True(cols.Length == 15);

                    if (headerSkip)
                    {
                        // skip header line
                        headerSkip = false;
                        continue;
                    }

                    features.Clear();
                    // Add in the "max dimensionality"
                    features.Add("15");
                    for (int col = 0; col < cols.Length; col++)
                    {
                        string s = cols[col].Trim();
                        switch (col)
                        {
                        case 0:
                        case 2:
                        case 4:
                        case 10:
                        case 11:
                        case 12:
                        case 14:
                            // numeric feature or label -- add if non-zero
                            Float val = InstancesUtils.FloatParse(s);
                            if (val == 0) // Beware of NaNs - they should be recorded!
                            {
                                continue;
                            }
                            break;
                        }
                        features.Add(col + ":" + s);
                    }

                    Instance instance = instanceFactory.ProduceInstance(line, features.ToArray());
                    Float    rawOutput, probability;
                    probability = predictor.PredictDistribution(instance, out rawOutput);
                    outputs.Add(rawOutput);
                    probabilities.Add(probability);
                }
            }

            List <Float> originalOutputs       = new List <Float>();
            List <Float> originalProbabilities = new List <Float>();
            var          env       = new LocalEnvironment(SysRandom.Wrap(RunExperiments.GetRandom(cmd)));
            Instances    instances = RunExperiments.CreateTestData(cmd, testDataFilename, dataModel, null, env);

            foreach (Instance instance in instances)
            {
                Float rawOutput, probability;
                probability = predictor.PredictDistribution(instance, out rawOutput);
                originalOutputs.Add(rawOutput);
                originalProbabilities.Add(probability);
            }

            CollectionAssert.Equal(outputs, originalOutputs);
            CollectionAssert.Equal(probabilities, originalProbabilities);

            File.Delete(modelFilename);

            Done();
        }