private void CompareInstances(TlcTextInstances instances1, TlcTextInstances instances2) { Assert.IsTrue(instances1.Schema.NumFeatures == instances2.Schema.NumFeatures, "mismatch on schema features"); using (var e1 = instances1.GetEnumerator()) using (var e2 = instances2.GetEnumerator()) { for (; ;) { bool b1 = e1.MoveNext(); bool b2 = e2.MoveNext(); Assert.IsTrue(b1 == b2, "different number of instances"); if (!b1) { break; } var inst1 = e1.Current; var inst2 = e2.Current; Assert.IsTrue(inst1.Label == inst2.Label, "mismatch on instance label"); Assert.IsTrue(inst1.NumFeatures == inst2.NumFeatures, "mismatch on number of features"); Assert.IsTrue(inst1.NumFeatures == instances1.Schema.NumFeatures, "mismatch on number of instance vs. schema features"); Assert.IsTrue(Utils.AreEqual(inst1.Features.Values, inst2.Features.Values), "mismatch on feature values"); Assert.IsTrue(Utils.AreEqual(inst1.Features.Indices, inst2.Features.Indices), "mismatch on feature indices"); } } }
public void TestCreateTextInstancesWithNormalization() { TestDataset dataset = TestDatasets.mnistTiny28; string trainData = GetDataPath(dataset.trainFilename); string testData = GetDataPath(dataset.testFilename); var prefix = TestContext.TestName + "-"; string outFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Train.txt"); string outTestFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Test.txt"); string outFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Train.txt"); string outTestFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Test.txt"); string transArgs = "inst=Trans{trans=RFF {rng=1}}"; var argsStr1 = string.Format( "/c=CreateInstances {0} /test={1} /norm=MinMaxNormalizer /{2} /cifile={3} /citestfile={4}", trainData, testData, transArgs, outFile1, outTestFile1); var args1 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1)); RunExperiments.Run(args1); var argsStr2 = string.Format( "/c=CreateInstances {0} /test={1} /inst Trans{{trans=MinMaxNormalizer {2}}} /cifile={3} /citestfile={4}", trainData, testData, transArgs, outFile2, outTestFile2); var args2 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr2, args2)); RunExperiments.Run(args2); var instances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile1); var instances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile2); CompareInstances(instances1, instances2); var testInstances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile1); var testInstances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile2); CompareInstances(testInstances1, testInstances2); Done(); }
public void TestPcaTransform() { // Force Microsoft.ML.Runtime.PCA assembly to be loaded into the AppDomain so // ReflectionUtils.FindClassCore does not return null when called by ReflectionUtils.CreateInstance Assert.AreEqual(typeof(PCAPredictor).Name, "PCAPredictor"); string trainData = GetDataPath(TestDatasets.mnistTiny28.trainFilename); string fileName = TestContext.TestName + "-Train.txt"; string outFile = DeleteOutputPath(Dir, fileName); const int rank = 3; string pcaTransformArgs = string.Format("/inst Trans{{trans=pca {{k={0} seed=1}}}}", rank); var argsStr1 = string.Format( "/c CreateInstances {0} {1} /rs=1 /cifile={2}", trainData, pcaTransformArgs, outFile); var args1 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1)); RunExperiments.Run(args1); CheckEquality(Dir, fileName); // Verify the scales of the transformed features decrease with respect to the feature index TlcTextInstances outputInstances = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile); Double[] l1norms = new Double[rank]; foreach (Instance instance in outputInstances) { Assert.IsTrue(instance.Features.Count == rank); for (int i = 0; i < instance.Features.Values.Length; i++) { l1norms[i] += (instance.Features.Values[i] < 0 ? -instance.Features.Values[i] : instance.Features.Values[i]); } } for (int i = 0; i < l1norms.Length - 1; i++) { Assert.IsTrue(l1norms[i] > l1norms[i + 1]); } Done(); }