public void TestRandomBalancedFoldCreation() { ListInstances li = CreateInstancesWithNKeys(5); var foldCreator = new CVFoldCreation(); var cmd = new TLCArguments(); cmd.numFolds = 5; cmd.command = Command.CrossValidation; cmd.stratifyInstances = true; int[] folds = foldCreator.CreateFoldIndicesStratified(li, cmd, new Random(1)); int[] expectedIndices = { 1, 0, 3, 4, 2 }; for (int i = 0; i < folds.Length; i++) { Assert.Equal <int>(folds[i], expectedIndices[i]); } li = CreateInstancesWithNKeys(7); folds = foldCreator.CreateFoldIndicesStratified(li, cmd, new Random(1)); expectedIndices = new int[] { 1, 0, 4, 1, 0, 2, 3 }; for (int i = 0; i < folds.Length; i++) { Assert.Equal <int>(folds[i], expectedIndices[i]); } li = CreateInstancesWithNKeys(10); folds = foldCreator.CreateFoldIndicesStratified(li, cmd, new Random(1)); expectedIndices = new int[] { 2, 1, 0, 3, 2, 4, 0, 4, 3, 1 }; for (int i = 0; i < folds.Length; i++) { Assert.Equal <int>(folds[i], expectedIndices[i]); } Done(); }
public void TestCreateTextInstances() { TestDataset adult = TestDatasets.adult; string trainData = GetDataPath(adult.trainFilename); string testData = GetDataPath(adult.testFilename); var prefix = TestContext.TestName + "-"; string outName = prefix + "Adult-Train.txt"; string statsName = prefix + "Adult-Train.stats.txt"; string outTestName = prefix + "Adult-Test.txt"; string testStatsName = prefix + "Adult-Test.stats.txt"; string outValidName = prefix + "Adult-Valid.txt"; string validStatsName = prefix + "Adult-Valid.stats.txt"; string outFile = DeleteOutputPath(Dir, outName); string statsFile = DeleteOutputPath(Dir, statsName); string outTestFile = DeleteOutputPath(Dir, outTestName); string testStatsFile = DeleteOutputPath(Dir, testStatsName); string outValidFile = DeleteOutputPath(Dir, outValidName); string validStatsFile = DeleteOutputPath(Dir, validStatsName); var argsStr = string.Format( "/c=CreateInstances {0} /test={1} /valid={1} /cacheinst=- {2} " + "/cifile={3} /cistatsfile={4} /citestfile={5} /citeststatsfile={6} /civalidfile={7} /civalidstatsfile={8}", trainData, testData, adult.extraSettings, outFile, statsFile, outTestFile, testStatsFile, outValidFile, validStatsFile); argsStr += " /writer TextInstanceWriter{/stats=+} /disableTracking=+"; var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr, args)); RunExperiments.Run(args); CheckEquality(Dir, outName); CheckEquality(Dir, statsName); CheckEquality(Dir, outTestName); CheckEquality(Dir, testStatsName); CheckEquality(Dir, outValidName); CheckEquality(Dir, validStatsName); Done(); }
public void TestPcaTransform() { // Force Microsoft.ML.Runtime.PCA assembly to be loaded into the AppDomain so // ReflectionUtils.FindClassCore does not return null when called by ReflectionUtils.CreateInstance Assert.AreEqual(typeof(PCAPredictor).Name, "PCAPredictor"); string trainData = GetDataPath(TestDatasets.mnistTiny28.trainFilename); string fileName = TestContext.TestName + "-Train.txt"; string outFile = DeleteOutputPath(Dir, fileName); const int rank = 3; string pcaTransformArgs = string.Format("/inst Trans{{trans=pca {{k={0} seed=1}}}}", rank); var argsStr1 = string.Format( "/c CreateInstances {0} {1} /rs=1 /cifile={2}", trainData, pcaTransformArgs, outFile); var args1 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1)); RunExperiments.Run(args1); CheckEquality(Dir, fileName); // Verify the scales of the transformed features decrease with respect to the feature index TlcTextInstances outputInstances = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile); Double[] l1norms = new Double[rank]; foreach (Instance instance in outputInstances) { Assert.IsTrue(instance.Features.Count == rank); for (int i = 0; i < instance.Features.Values.Length; i++) { l1norms[i] += (instance.Features.Values[i] < 0 ? -instance.Features.Values[i] : instance.Features.Values[i]); } } for (int i = 0; i < l1norms.Length - 1; i++) { Assert.IsTrue(l1norms[i] > l1norms[i + 1]); } Done(); }
public void TestCrossValidationWithInvalidTester() { var argsStr = GetDataPath(TestDatasets.breastCancer.trainFilename) + " /ev=MulticlassTester /o z.txt /threads=+ /disableTracking=+"; var args = new TLCArguments(); CmdParser.ParseArguments(argsStr, args); try { RunExperiments.Run(args); } catch (AggregateException ex) { Log("Caught expected exception: {0}", ex); Done(); return; } Fail("Expected exception!"); Done(); }
public void TestFeatureHandlerIncorrectMapping() { string trainData = GetDataPath(TestDatasets.breastCancer.trainFilename); string dataModelFile = DeleteOutputPath(Dir, TestContext.TestName + "-data-model.zip"); string ciFile = DeleteOutputPath(Dir, TestContext.TestName + "-ci.tsv"); string argsString = string.Format( "/c CreateInstances {0} /inst Text{{text=1,2,3}} /m {1} /cifile {2}", trainData, dataModelFile, ciFile); var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsString, args)); RunExperiments.Run(args); string ciFailFile = DeleteOutputPath(Dir, TestContext.TestName + "-ci-fail.tsv"); argsString = string.Format( "/c CreateInstances {0} /inst Text{{text=1,2}} /im {1} /cifile {2}", trainData, dataModelFile, ciFailFile); args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsString, args)); try { RunExperiments.Run(args); Assert.Fail("Expected to throw with different input model format"); } catch (Exception ex) { Assert.IsTrue(ex.GetBaseException() is InvalidOperationException); } Done(); }