Esempio n. 1
0
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var est      = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC")
            });
            var transformer = est.Fit(dataView);

            dataView = transformer.Transform(dataView);
            var pipe = new KeyToBinaryVectorMappingEstimator(Env,
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermA", "CatA"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermB", "CatB"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermC", "CatC")
                                                             );
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
Esempio n. 2
0
        void TestMetadataCopy()
        {
            var data = new[] { new TestMetaClass()
                               {
                                   Term = "A", NotUsed = 1
                               }, new TestMetaClass()
                               {
                                   Term = "B"
                               }, new TestMetaClass()
                               {
                                   Term = "C"
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("Term", "T")
            });

            var termTransformer = termEst.Fit(dataView);
            var result          = termTransformer.Transform(dataView);

            result.Schema.TryGetColumnIndex("T", out int termIndex);
            var names1    = default(VBuffer <ReadOnlyMemory <char> >);
            var type1     = result.Schema.GetColumnType(termIndex);
            var itemType1 = (type1 as VectorType)?.ItemType ?? type1;
            int size      = itemType1 is KeyType keyType ? keyType.Count : -1;

            result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, termIndex, ref names1);
            Assert.True(names1.GetValues().Length > 0);
        }
        void TestMetadataCopy()
        {
            var data = new[] { new TestMetaClass()
                               {
                                   Term = "A", NotUsed = 1
                               }, new TestMetaClass()
                               {
                                   Term = "B"
                               }, new TestMetaClass()
                               {
                                   Term = "C"
                               } };
            var dataView = ML.Data.ReadFromEnumerable(data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnInfo("T", "Term")
            });

            var termTransformer = termEst.Fit(dataView);
            var result          = termTransformer.Transform(dataView);

            result.Schema.TryGetColumnIndex("T", out int termIndex);
            var names1    = default(VBuffer <ReadOnlyMemory <char> >);
            var type1     = result.Schema[termIndex].Type;
            var itemType1 = (type1 as VectorType)?.ItemType ?? type1;

            result.Schema[termIndex].GetKeyValues(ref names1);
            Assert.True(names1.GetValues().Length > 0);
        }
        void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ML.Data.ReadFromEnumerable(data);
            var est      = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C")
            });
            var transformer = est.Fit(dataView);
            var result      = transformer.Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
                ValidateTermTransformer(loadedView);
            }
        }
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ML.Data.LoadFromEnumerable(data);
            var est      = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"),
                new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B"),
                new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C")
            });
            var transformer = est.Fit(dataView);

            dataView = transformer.Transform(dataView);
            var pipe = ML.Transforms.Conversion.MapKeyToVector(
                new KeyToVectorMappingEstimator.ColumnOptions("CatA", "TermA", false),
                new KeyToVectorMappingEstimator.ColumnOptions("CatB", "TermB", true)
                );
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
        public void BuildAndTrain()
        {
            var featurizerModelLocation = inputModelLocation;

            ConsoleWriteHeader("Read model");
            Console.WriteLine($"Model location: {featurizerModelLocation}");
            Console.WriteLine($"Images folder: {imagesFolder}");
            Console.WriteLine($"Training file: {dataLocation}");
            Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}");



            var loader = new TextLoader(env,
                                        new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("ImagePath", DataKind.Text, 0),
                    new TextLoader.Column("Label", DataKind.Text, 1)
                }
            });



            var pipeline = new ValueToKeyMappingEstimator(env, "Label", "LabelTokey")
                           .Append(new ImageLoadingEstimator(env, imagesFolder, ("ImagePath", "ImageReal")))
                           .Append(new ImageResizingEstimator(env, "ImageReal", "ImageReal", ImageNetSettings.imageHeight, ImageNetSettings.imageWidth))
                           .Append(new ImagePixelExtractingEstimator(env, new[] { new ImagePixelExtractorTransform.ColumnInfo("ImageReal", "input", interleave: ImageNetSettings.channelsLast, offset: ImageNetSettings.mean) }))
                           .Append(new TensorFlowEstimator(env, featurizerModelLocation, new[] { "input" }, new[] { "softmax2_pre_activation" }))
                           .Append(new SdcaMultiClassTrainer(env, "softmax2_pre_activation", "LabelTokey"))
                           .Append(new KeyToValueEstimator(env, ("PredictedLabel", "PredictedLabelValue")));

            // Train the pipeline
            ConsoleWriteHeader("Training classification model");
            var data  = loader.Read(new MultiFileSource(dataLocation));
            var model = pipeline.Fit(data);

            // Process the training data through the model
            // This is an optional step, but it's useful for debugging issues
            var trainData = model.Transform(data);
            var loadedModelOutputColumnNames = trainData.Schema.GetColumnNames();
            var trainData2 = trainData.AsEnumerable <ImageNetPipeline>(env, false, true).ToList();

            trainData2.ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max()));

            // Get some performance metric on the model using training data
            var sdcaContext = new MulticlassClassificationContext(env);

            ConsoleWriteHeader("Classification metrics");
            var metrics = sdcaContext.Evaluate(trainData, label: "LabelTokey", predictedLabel: "PredictedLabel");

            Console.WriteLine($"LogLoss is: {metrics.LogLoss}");
            Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}");

            // Save the model to assets/outputs
            ConsoleWriteHeader("Save model to local file");
            ModelHelpers.DeleteAssets(outputModelLocation);
            using (var f = new FileStream(outputModelLocation, FileMode.Create))
                model.SaveTo(env, f);
            Console.WriteLine($"Model saved: {outputModelLocation}");
        }
 public CategoricalTransform(ValueToKeyMappingEstimator term, IEstimator <ITransformer> toVector, IDataView input)
 {
     if (toVector != null)
     {
         _transformer = term.Append(toVector).Fit(input);
     }
     else
     {
         _transformer = new TransformerChain <ITransformer>(term.Fit(input));
     }
 }
Esempio n. 8
0
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                }
            };


            var dataView = ML.Data.LoadFromEnumerable(data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnOptions("TA", "A", addKeyValueAnnotationsAsText: true),
                new ValueToKeyMappingEstimator.ColumnOptions("TB", "B", addKeyValueAnnotationsAsText: true),
                new ValueToKeyMappingEstimator.ColumnOptions("TC", "C"),
                new ValueToKeyMappingEstimator.ColumnOptions("TD", "D")
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(new[] {
                new InputOutputColumnPair("CatA", "TA"),
                new InputOutputColumnPair("CatB", "TB"),
                new InputOutputColumnPair("CatC", "TC"),
                new InputOutputColumnPair("CatD", "TD")
            });

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
Esempio n. 9
0
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TA", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TB", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TC"),
                new ValueToKeyMappingTransformer.ColumnInfo("D", "TD")
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToBinaryVectorMappingEstimator(Env,
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TA", "CatA"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TB", "CatB"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TC", "CatC"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TD", "CatD"));

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
Esempio n. 10
0
        void TestDifferentTypes()
        {
            string dataPath = GetDataPath("adult.test");

            var loader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("float1", DataKind.R4, 0),
                    new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("double1", DataKind.R8, 0),
                    new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("int1", DataKind.I4, 0),
                    new TextLoader.Column("text1", DataKind.TX, 1),
                    new TextLoader.Column("text2", DataKind.TX, new[] { new TextLoader.Range(1), new TextLoader.Range(3) }),
                },
                Separator = ",",
                HasHeader = true
            }, new MultiFileSource(dataPath));

            var pipe = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("float1", "TermFloat1"),
                new ValueToKeyMappingTransformer.ColumnInfo("float4", "TermFloat4"),
                new ValueToKeyMappingTransformer.ColumnInfo("double1", "TermDouble1"),
                new ValueToKeyMappingTransformer.ColumnInfo("double4", "TermDouble4"),
                new ValueToKeyMappingTransformer.ColumnInfo("int1", "TermInt1"),
                new ValueToKeyMappingTransformer.ColumnInfo("text1", "TermText1"),
                new ValueToKeyMappingTransformer.ColumnInfo("text2", "TermText2")
            });
            var data = loader.Read(dataPath);

            data = TakeFilter.Create(Env, data, 10);
            var outputPath = GetOutputPath("Term", "Term.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, pipe.Fit(data).Transform(data), fs, keepHidden: true);
            }

            CheckEquality("Term", "Term.tsv");
            Done();
        }
        void TestDifferentTypes()
        {
            string dataPath = GetDataPath("adult.tiny.with-schema.txt");

            var loader = new TextLoader(ML, new TextLoader.Options
            {
                Columns = new[] {
                    new TextLoader.Column("float1", DataKind.Single, 9),
                    new TextLoader.Column("float4", DataKind.Single, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) }),
                    new TextLoader.Column("double1", DataKind.Double, 9),
                    new TextLoader.Column("double4", DataKind.Double, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) }),
                    new TextLoader.Column("int1", DataKind.Int32, 9),
                    new TextLoader.Column("text1", DataKind.String, 1),
                    new TextLoader.Column("text2", DataKind.String, new[] { new TextLoader.Range(1), new TextLoader.Range(2) }),
                },
                Separator = "\t",
                HasHeader = true
            }, new MultiFileSource(dataPath));

            var pipe = new ValueToKeyMappingEstimator(ML, new[] {
                new ValueToKeyMappingEstimator.ColumnInfo("TermFloat1", "float1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermFloat4", "float4"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermDouble1", "double1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermDouble4", "double4"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermInt1", "int1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermText1", "text1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermText2", "text2")
            });
            var data = loader.Read(dataPath);

            data = ML.Data.TakeRows(data, 10);
            var outputPath = GetOutputPath("Term", "Term.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(ML, new TextSaver.Arguments {
                    Silent = true
                });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, pipe.Fit(data).Transform(data), fs, keepHidden: true);
            }

            CheckEquality("Term", "Term.tsv");
            Done();
        }
Esempio n. 12
0
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        1.0f, 2.0f
                    }, F = 1.0f, G = new string[2] {
                        "A", "D"
                    }, H = "D"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1, E = new float[2] {
                        3.0f, 4.0f
                    }, F = -1.0f, G = new string[2] {
                        "E", "A"
                    }, H = "E"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        5.0f, 6.0f
                    }, F = 1.0f, G = new string[2] {
                        "D", "E"
                    }, H = "D"
                }
            };


            var dataView = ML.Data.ReadFromEnumerable(data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TA", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TB"),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TC", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("D", "TD", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("E", "TE"),
                new ValueToKeyMappingTransformer.ColumnInfo("F", "TF"),
                new ValueToKeyMappingTransformer.ColumnInfo("G", "TG"),
                new ValueToKeyMappingTransformer.ColumnInfo("H", "TH", textKeyValues: true)
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToVectorMappingEstimator(Env,
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TA", "CatA", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TB", "CatB", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TC", "CatC", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TD", "CatD", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TE", "CatE", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TF", "CatF", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TG", "CatG", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TH", "CatH", false)
                                                       );

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }