private void CompareInstances(TlcTextInstances instances1, TlcTextInstances instances2)
        {
            Assert.IsTrue(instances1.Schema.NumFeatures == instances2.Schema.NumFeatures, "mismatch on schema features");

            using (var e1 = instances1.GetEnumerator())
                using (var e2 = instances2.GetEnumerator())
                {
                    for (; ;)
                    {
                        bool b1 = e1.MoveNext();
                        bool b2 = e2.MoveNext();
                        Assert.IsTrue(b1 == b2, "different number of instances");
                        if (!b1)
                        {
                            break;
                        }
                        var inst1 = e1.Current;
                        var inst2 = e2.Current;
                        Assert.IsTrue(inst1.Label == inst2.Label, "mismatch on instance label");
                        Assert.IsTrue(inst1.NumFeatures == inst2.NumFeatures, "mismatch on number of features");
                        Assert.IsTrue(inst1.NumFeatures == instances1.Schema.NumFeatures, "mismatch on number of instance vs. schema features");
                        Assert.IsTrue(Utils.AreEqual(inst1.Features.Values, inst2.Features.Values), "mismatch on feature values");
                        Assert.IsTrue(Utils.AreEqual(inst1.Features.Indices, inst2.Features.Indices), "mismatch on feature indices");
                    }
                }
        }
        public void TestCreateTextInstancesWithNormalization()
        {
            TestDataset dataset   = TestDatasets.mnistTiny28;
            string      trainData = GetDataPath(dataset.trainFilename);
            string      testData  = GetDataPath(dataset.testFilename);

            var    prefix       = TestContext.TestName + "-";
            string outFile1     = DeleteOutputPath(Dir, prefix + "Norm-Separate-Train.txt");
            string outTestFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Test.txt");
            string outFile2     = DeleteOutputPath(Dir, prefix + "Norm-Trans-Train.txt");
            string outTestFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Test.txt");

            string transArgs = "inst=Trans{trans=RFF {rng=1}}";

            var argsStr1 = string.Format(
                "/c=CreateInstances {0} /test={1} /norm=MinMaxNormalizer /{2} /cifile={3} /citestfile={4}",
                trainData, testData, transArgs, outFile1, outTestFile1);
            var args1 = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1));

            RunExperiments.Run(args1);

            var argsStr2 = string.Format(
                "/c=CreateInstances {0} /test={1} /inst Trans{{trans=MinMaxNormalizer {2}}} /cifile={3} /citestfile={4}",
                trainData, testData, transArgs, outFile2, outTestFile2);
            var args2 = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr2, args2));

            RunExperiments.Run(args2);

            var instances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile1);
            var instances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile2);

            CompareInstances(instances1, instances2);

            var testInstances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile1);
            var testInstances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile2);

            CompareInstances(testInstances1, testInstances2);

            Done();
        }
        public void TestPcaTransform()
        {
            // Force Microsoft.ML.Runtime.PCA assembly to be loaded into the AppDomain so
            // ReflectionUtils.FindClassCore does not return null when called by ReflectionUtils.CreateInstance
            Assert.AreEqual(typeof(PCAPredictor).Name, "PCAPredictor");

            string trainData = GetDataPath(TestDatasets.mnistTiny28.trainFilename);
            string fileName  = TestContext.TestName + "-Train.txt";
            string outFile   = DeleteOutputPath(Dir, fileName);

            const int rank             = 3;
            string    pcaTransformArgs = string.Format("/inst Trans{{trans=pca {{k={0} seed=1}}}}", rank);
            var       argsStr1         = string.Format(
                "/c CreateInstances {0} {1} /rs=1 /cifile={2}",
                trainData,
                pcaTransformArgs,
                outFile);
            var args1 = new TLCArguments();

            Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1));

            RunExperiments.Run(args1);
            CheckEquality(Dir, fileName);

            // Verify the scales of the transformed features decrease with respect to the feature index
            TlcTextInstances outputInstances = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile);

            Double[] l1norms = new Double[rank];
            foreach (Instance instance in outputInstances)
            {
                Assert.IsTrue(instance.Features.Count == rank);
                for (int i = 0; i < instance.Features.Values.Length; i++)
                {
                    l1norms[i] += (instance.Features.Values[i] < 0 ? -instance.Features.Values[i] : instance.Features.Values[i]);
                }
            }

            for (int i = 0; i < l1norms.Length - 1; i++)
            {
                Assert.IsTrue(l1norms[i] > l1norms[i + 1]);
            }

            Done();
        }