public void TestI_ScalerTransformSerialize()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView loader = host.CreateStreamingDataView(inputs);
                var       data   = host.CreateTransform("Scaler{col=X}", loader);
                (data as ITrainableTransform).Estimate();

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                var nb = DataViewUtils.ComputeRowCount(data);
                if (nb < 1)
                {
                    throw new Exception("empty view");
                }

                // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline.
                // It checks it gives the same result.
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestTagViewTransform()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3 }
                    }
                };

                IDataView loader = host.CreateStreamingDataView(inputs);
                var       data   = host.CreateTransform("Scaler{col=X1:X}", loader);
                data = host.CreateTransform("tag{t=memory}", data);

                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestI_PolynomialTransformSerialize()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView loader = host.CreateStreamingDataView(inputs);
                var       data   = host.CreateTransform("poly{col=poly:X d=3}", loader);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

                // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline.
                // It checks it gives the same result.
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestChainTransformSerialize()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView      loader = host.CreateStreamingDataView(inputs);
                IDataTransform data   = host.CreateTransform("Scaler{col=X4:X}", loader);
                data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestDBScanTransform()
        {
            var methodName         = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath       = FileHelper.GetTestFile("three_classes_2d.txt");
            var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
            var outModelFilePath   = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: 1))
            {
                //var loader = env.CreateLoader("text{col=RowId:I4:0 col=Features:R4:1-2 header=+}", new MultiFileSource(dataFilePath));
                var loader = TextLoader.Create(env, new TextLoader.Arguments()
                {
                    HasHeader = true,
                    Column    = new[] { TextLoader.Column.Parse("RowId:R4:0"),
                                        TextLoader.Column.Parse("Features:R4:1-2") }
                },
                                               new MultiFileSource(dataFilePath));
                var xf = env.CreateTransform("DBScan{col=Features}", loader);

                string schema = SchemaHelper.ToString(xf.Schema);
                if (string.IsNullOrEmpty(schema))
                {
                    throw new Exception("Schema is null.");
                }
                if (!schema.Contains("Cluster"))
                {
                    throw new Exception("Schema does not contain Cluster.");
                }
                if (!schema.Contains("Score"))
                {
                    throw new Exception("Schema does not contain Score.");
                }

                StreamHelper.SaveModel(env, xf, outModelFilePath);

                var saver = env.CreateSaver("Text{header=- schema=-}");
                using (var fs2 = File.Create(outputDataFilePath))
                    saver.SaveData(fs2, TestTransformHelper.AddFlatteningTransform(env, xf),
                                   StreamHelper.GetColumnsIndex(xf.Schema, new[] { "Features", "ClusterId", "Score" }));

                // Checking the values.
                var lines = File.ReadAllLines(outputDataFilePath).Select(c => c.Split('\t')).Where(c => c.Length == 4);
                if (!lines.Any())
                {
                    throw new Exception(string.Format("The output file is empty or not containing three columns '{0}'", outputDataFilePath));
                }
                var clusters = lines.Select(c => c[1]).Distinct();
                if (clusters.Count() <= 1)
                {
                    throw new Exception("Only one cluster, this is unexpected.");
                }

                // Serialization.
                var outData  = FileHelper.GetOutputFile("outData1.txt", methodName);
                var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(env, outModelFilePath, xf, loader, outData, outData2);
            }
        }
Beispiel #6
0
        public void TrainTestPipelinePredictTransform()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("mc_iris.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var pipe = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                pipe = env.CreateTransform("SplitTrainTest{col=base tag=train tag=test}", pipe);
                pipe = env.CreateTransform("SelectTag{tag=unused selectTag=train}", pipe);
                pipe = env.CreateTransform(string.Format("TagTrainScore{{tag=trainP out={0} tr=mlr}}", outModelFilePath), pipe);
                pipe = env.CreateTransform("SelectTag{tag=scoredTrain selectTag=test}", pipe);
                pipe = env.CreateTransform("TagPredict{in=trainP}", pipe);

                string schema  = SchemaHelper.ToString(pipe.Schema);
                var    cursor  = pipe.GetRowCursor(i => true);
                string schema2 = SchemaHelper.ToString(cursor.Schema);
                if (schema != schema2)
                {
                    throw new Exception("Schema mismatch.");
                }
                long count = DataViewUtils.ComputeRowCount(pipe);
                if (count != 49)
                {
                    throw new Exception(string.Format("Unexpected number of rows {0}", count));
                }

                // Checks the outputs.
                var saver   = env.CreateSaver("Text");
                var columns = new string[pipe.Schema.Count];
                for (int i = 0; i < columns.Length; ++i)
                {
                    columns[i] = pipe.Schema[i].Name;
                }
                using (var fs2 = File.Create(outData))
                    saver.SaveData(fs2, pipe, StreamHelper.GetColumnsIndex(pipe.Schema));

                var lines = File.ReadAllLines(outData);
                if (lines.Length < 40)
                {
                    throw new Exception("Something is missing:" + string.Join("\n", lines));
                }
                if (lines.Length > 70)
                {
                    throw new Exception("Too much data:" + string.Join("\n", lines));
                }

                TestTransformHelper.SerializationTestTransform(env, outModelFilePath, pipe, loader, outData, outData2);
            }
        }
Beispiel #7
0
        public void TestOpticsOrderingTransform()
        {
            var methodName         = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath       = FileHelper.GetTestFile("three_classes_2d.txt");
            var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
            var outModelFilePath   = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var loader = env.CreateLoader("text{col=DataViewRowId:I4:0 col=Features:R4:1-2 header=+}",
                                              new MultiFileSource(dataFilePath));
                var xf = env.CreateTransform("OpticsOrd{col=Features epsilon=0.3 minPoints=6}", loader);

                string schema = SchemaHelper.ToString(xf.Schema);
                if (string.IsNullOrEmpty(schema))
                {
                    throw new Exception("DataViewSchema is null.");
                }
                if (!schema.Contains("Ordering"))
                {
                    throw new Exception("DataViewSchema does not contain Ordering.");
                }
                if (!schema.Contains("Reachability"))
                {
                    throw new Exception("DataViewSchema does not contain Reachability.");
                }
                StreamHelper.SaveModel(env, xf, outModelFilePath);

                var saver = env.CreateSaver("Text{header=- schema=-}");
                using (var fs2 = File.Create(outputDataFilePath))
                {
                    saver.SaveData(fs2, TestTransformHelper.AddFlatteningTransform(env, xf),
                                   StreamHelper.GetColumnsIndex(xf.Schema, new[] { "Features", "Ordering", "Reachability" }));
                }
                // Checking the values.
                var lines = File.ReadAllLines(outputDataFilePath).Select(c => c.Split('\t')).Where(c => c.Length == 4);
                if (!lines.Any())
                {
                    throw new Exception(string.Format("The output file is empty or not containing three columns '{0}'", outputDataFilePath));
                }
                var clusters = lines.Select(c => c[1]).Distinct();
                if (clusters.Count() <= 1)
                {
                    throw new Exception("Only one cluster, this is unexpected.");
                }

                // Serialization.
                var outData  = FileHelper.GetOutputFile("outData1.txt", methodName);
                var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(env, outModelFilePath, xf, loader, outData, outData2);
            }
        }
Beispiel #8
0
        public void TestTimeSeriesDeTrendSerialize()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new InputOutput()
                    {
                        X = 7f, time = 0f
                    },
                    new InputOutput()
                    {
                        X = 7f, time = 1f
                    },
                    new InputOutput()
                    {
                        X = 9f, time = 2f
                    },
                    new InputOutput()
                    {
                        X = 9f, time = 3f
                    },
                    new InputOutput()
                    {
                        X = 8f, time = 4f
                    },
                };

                IDataView loader = host.CreateStreamingDataView(inputs);
                var       data   = host.CreateTransform("detrend{col=Y:X time=time optim=sasdcar{iter=50}}", loader);

                // To train the model.
                using (var cursor = data.GetRowCursor(i => true)) { }

                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestI_PassThroughTransform()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("iris.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
            var tempFile         = FileHelper.GetOutputFile("dump.idv", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 col=Uid:TX:5 header=+}",
                                              new MultiFileSource(dataFilePath));

                var xf1 = env.CreateTransform("Concat{col=Feat:Slength,Swidth}", loader);
                var xf2 = env.CreateTransform("Scaler{col=Feat}", xf1);
                var xf3 = env.CreateTransform(string.Format("DumpView{{s=+ f={0}}}", tempFile), xf2);
                TestTransformHelper.SerializationTestTransform(env, outModelFilePath, xf3, loader, outData, outData2, false);
                if (!File.Exists(tempFile))
                {
                    throw new FileNotFoundException(tempFile);
                }
            }
        }
Beispiel #10
0
        public static void TrainkNNTransformId(int k, NearestNeighborsWeights weight, int threads, string distance = "L2")
        {
            var methodName       = string.Format("{0}-k{1}-W{2}-T{3}-D{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, k, weight, threads, distance);
            var dataFilePath     = FileHelper.GetTestFile("iris_binary_id.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            var env = k == 1 ? EnvHelper.NewTestEnvironment(conc: 1) : EnvHelper.NewTestEnvironment();

            using (env)
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 col=Uid:I8:5 header=+}",
                                              new MultiFileSource(dataFilePath));

                var concat = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                if (distance == "cosine")
                {
                    concat = env.CreateTransform("Scaler{col=Features}", concat);
                }
                concat = env.CreateTransform("knntr{k=5 id=Uid}", concat);
                long nb = DataViewUtils.ComputeRowCount(concat);
                if (nb == 0)
                {
                    throw new System.Exception("Empty pipeline.");
                }

                using (var cursor = concat.GetRowCursor(i => true))
                {
                    var getdist = cursor.GetGetter <VBuffer <float> >(7);
                    var getid   = cursor.GetGetter <VBuffer <long> >(8);
                    var ddist   = new VBuffer <float>();
                    var did     = new VBuffer <long>();
                    while (cursor.MoveNext())
                    {
                        getdist(ref ddist);
                        getid(ref did);
                        if (!ddist.IsDense || !did.IsDense)
                        {
                            throw new System.Exception("not dense");
                        }
                        if (ddist.Count != did.Count)
                        {
                            throw new System.Exception("not the same dimension");
                        }
                        for (int i = 1; i < ddist.Count; ++i)
                        {
                            if (ddist.Values[i - 1] > ddist.Values[i])
                            {
                                throw new System.Exception("not sorted");
                            }
                            if (did.Values[i] % 2 != 1)
                            {
                                throw new System.Exception("wrong id");
                            }
                        }
                    }
                }

                TestTransformHelper.SerializationTestTransform(env, outModelFilePath, concat, loader, outData, outData2, false);
            }
        }