public void TestCreateTextInstancesConstant() { TestDataset breast = TestDatasets.breastCancerConst; string trainData = GetDataPath(breast.trainFilename); var prefix = TestContext.TestName + "-"; string outName = prefix + "BreastCancer.txt"; string statsName = prefix + "BreastCancer.stats.txt"; string outFile = DeleteOutputPath(Dir, outName); string statsFile = DeleteOutputPath(Dir, statsName); var argsStr = string.Format( "c=CreateInstances {0} {1} cifile={2} cistatsfile={3}", trainData, breast.extraSettings, outFile, statsFile); argsStr += " writer=TextInstanceWriter{stats+} disableTracking+"; var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr, args)); RunExperiments.Run(args); CheckEquality(Dir, outName); CheckEquality(Dir, statsName); Done(); }
public void TestCreateTextInstancesWithNormalization() { TestDataset dataset = TestDatasets.mnistTiny28; string trainData = GetDataPath(dataset.trainFilename); string testData = GetDataPath(dataset.testFilename); var prefix = TestContext.TestName + "-"; string outFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Train.txt"); string outTestFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Test.txt"); string outFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Train.txt"); string outTestFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Test.txt"); string transArgs = "inst=Trans{trans=RFF {rng=1}}"; var argsStr1 = string.Format( "/c=CreateInstances {0} /test={1} /norm=MinMaxNormalizer /{2} /cifile={3} /citestfile={4}", trainData, testData, transArgs, outFile1, outTestFile1); var args1 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1)); RunExperiments.Run(args1); var argsStr2 = string.Format( "/c=CreateInstances {0} /test={1} /inst Trans{{trans=MinMaxNormalizer {2}}} /cifile={3} /citestfile={4}", trainData, testData, transArgs, outFile2, outTestFile2); var args2 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr2, args2)); RunExperiments.Run(args2); var instances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile1); var instances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile2); CompareInstances(instances1, instances2); var testInstances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile1); var testInstances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile2); CompareInstances(testInstances1, testInstances2); Done(); }
public void TestCreateTextInstances() { TestDataset adult = TestDatasets.adult; string trainData = GetDataPath(adult.trainFilename); string testData = GetDataPath(adult.testFilename); var prefix = TestContext.TestName + "-"; string outName = prefix + "Adult-Train.txt"; string statsName = prefix + "Adult-Train.stats.txt"; string outTestName = prefix + "Adult-Test.txt"; string testStatsName = prefix + "Adult-Test.stats.txt"; string outValidName = prefix + "Adult-Valid.txt"; string validStatsName = prefix + "Adult-Valid.stats.txt"; string outFile = DeleteOutputPath(Dir, outName); string statsFile = DeleteOutputPath(Dir, statsName); string outTestFile = DeleteOutputPath(Dir, outTestName); string testStatsFile = DeleteOutputPath(Dir, testStatsName); string outValidFile = DeleteOutputPath(Dir, outValidName); string validStatsFile = DeleteOutputPath(Dir, validStatsName); var argsStr = string.Format( "/c=CreateInstances {0} /test={1} /valid={1} /cacheinst=- {2} " + "/cifile={3} /cistatsfile={4} /citestfile={5} /citeststatsfile={6} /civalidfile={7} /civalidstatsfile={8}", trainData, testData, adult.extraSettings, outFile, statsFile, outTestFile, testStatsFile, outValidFile, validStatsFile); argsStr += " /writer TextInstanceWriter{/stats=+} /disableTracking=+"; var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr, args)); RunExperiments.Run(args); CheckEquality(Dir, outName); CheckEquality(Dir, statsName); CheckEquality(Dir, outTestName); CheckEquality(Dir, testStatsName); CheckEquality(Dir, outValidName); CheckEquality(Dir, validStatsName); Done(); }
public void TestPcaTransform() { // Force Microsoft.ML.Runtime.PCA assembly to be loaded into the AppDomain so // ReflectionUtils.FindClassCore does not return null when called by ReflectionUtils.CreateInstance Assert.AreEqual(typeof(PCAPredictor).Name, "PCAPredictor"); string trainData = GetDataPath(TestDatasets.mnistTiny28.trainFilename); string fileName = TestContext.TestName + "-Train.txt"; string outFile = DeleteOutputPath(Dir, fileName); const int rank = 3; string pcaTransformArgs = string.Format("/inst Trans{{trans=pca {{k={0} seed=1}}}}", rank); var argsStr1 = string.Format( "/c CreateInstances {0} {1} /rs=1 /cifile={2}", trainData, pcaTransformArgs, outFile); var args1 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1)); RunExperiments.Run(args1); CheckEquality(Dir, fileName); // Verify the scales of the transformed features decrease with respect to the feature index TlcTextInstances outputInstances = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile); Double[] l1norms = new Double[rank]; foreach (Instance instance in outputInstances) { Assert.IsTrue(instance.Features.Count == rank); for (int i = 0; i < instance.Features.Values.Length; i++) { l1norms[i] += (instance.Features.Values[i] < 0 ? -instance.Features.Values[i] : instance.Features.Values[i]); } } for (int i = 0; i < l1norms.Length - 1; i++) { Assert.IsTrue(l1norms[i] > l1norms[i + 1]); } Done(); }
public void TestCrossValidationWithInvalidTester() { var argsStr = GetDataPath(TestDatasets.breastCancer.trainFilename) + " /ev=MulticlassTester /o z.txt /threads=+ /disableTracking=+"; var args = new TLCArguments(); CmdParser.ParseArguments(argsStr, args); try { RunExperiments.Run(args); } catch (AggregateException ex) { Log("Caught expected exception: {0}", ex); Done(); return; } Fail("Expected exception!"); Done(); }
public void TestFeatureHandlerIncorrectMapping() { string trainData = GetDataPath(TestDatasets.breastCancer.trainFilename); string dataModelFile = DeleteOutputPath(Dir, TestContext.TestName + "-data-model.zip"); string ciFile = DeleteOutputPath(Dir, TestContext.TestName + "-ci.tsv"); string argsString = string.Format( "/c CreateInstances {0} /inst Text{{text=1,2,3}} /m {1} /cifile {2}", trainData, dataModelFile, ciFile); var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsString, args)); RunExperiments.Run(args); string ciFailFile = DeleteOutputPath(Dir, TestContext.TestName + "-ci-fail.tsv"); argsString = string.Format( "/c CreateInstances {0} /inst Text{{text=1,2}} /im {1} /cifile {2}", trainData, dataModelFile, ciFailFile); args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsString, args)); try { RunExperiments.Run(args); Assert.Fail("Expected to throw with different input model format"); } catch (Exception ex) { Assert.IsTrue(ex.GetBaseException() is InvalidOperationException); } Done(); }
public void FactoryExampleTest() { var dataset = TestDatasets.adultText; string dataFilename = GetDataPath(dataset.trainFilename); string testDataFilename = GetDataPath(dataset.testFilename); ///********* Training a model *******// string modelFilename = Path.GetTempFileName(); TLCArguments cmd = new TLCArguments(); Assert.True(CmdParser.ParseArguments(dataset.extraSettings, cmd)); cmd.command = Command.Train; cmd.modelfile = modelFilename; cmd.datafile = dataFilename; cmd.instancesSettings = dataset.settings; cmd.classifierName = TestLearners.linearSVM.Trainer; RunExperiments.Run(cmd); // Load and make predictions with a previously saved model. IDataModel dataModel; IDataStats dataStats; var predictor = (IDistributionPredictor <Instance, Float, Float>)PredictorUtils.LoadPredictor( out dataModel, out dataStats, modelFilename); var instanceFactory = ReflectionUtilsOld.CreateInstanceOld <IInstanceFactory, SignatureInstances>( cmd.instancesClass, cmd.instancesSettings, null, dataModel); bool headerSkip = true; List <Float> outputs = new List <Float>(); List <Float> probabilities = new List <Float>(); using (StreamReader reader = new StreamReader(testDataFilename)) { List <string> features = new List <string>(); string text; long line = 0; while ((text = reader.ReadLine()) != null) { ++line; if (string.IsNullOrWhiteSpace(text)) { continue; } string[] cols = text.Split(','); Assert.True(cols.Length == 15); if (headerSkip) { // skip header line headerSkip = false; continue; } features.Clear(); // Add in the "max dimensionality" features.Add("15"); for (int col = 0; col < cols.Length; col++) { string s = cols[col].Trim(); switch (col) { case 0: case 2: case 4: case 10: case 11: case 12: case 14: // numeric feature or label -- add if non-zero Float val = InstancesUtils.FloatParse(s); if (val == 0) // Beware of NaNs - they should be recorded! { continue; } break; } features.Add(col + ":" + s); } Instance instance = instanceFactory.ProduceInstance(line, features.ToArray()); Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); outputs.Add(rawOutput); probabilities.Add(probability); } } List <Float> originalOutputs = new List <Float>(); List <Float> originalProbabilities = new List <Float>(); var env = new LocalEnvironment(SysRandom.Wrap(RunExperiments.GetRandom(cmd))); Instances instances = RunExperiments.CreateTestData(cmd, testDataFilename, dataModel, null, env); foreach (Instance instance in instances) { Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); originalOutputs.Add(rawOutput); originalProbabilities.Add(probability); } CollectionAssert.Equal(outputs, originalOutputs); CollectionAssert.Equal(probabilities, originalProbabilities); File.Delete(modelFilename); Done(); }