Esempio n. 1
0
        private (IEstimator <ITransformer>, IDataView) GetMultiClassPipeline()
        {
            var data = new TextLoader(Env, new TextLoader.Arguments()
            {
                Separator = "comma",
                Column    = new[]
                {
                    new TextLoader.Column("Features", DataKind.R4, new [] { new TextLoader.Range(0, 3) }),
                    new TextLoader.Column("Label", DataKind.Text, 4)
                }
            }).Read(GetDataPath(IrisDataPath));

            var pipeline = new ValueToKeyMappingEstimator(Env, "Label");

            return(pipeline, data);
        }
Esempio n. 2
0
        void TestDifferentTypes()
        {
            string dataPath = GetDataPath("adult.test");

            var loader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("float1", DataKind.R4, 0),
                    new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("double1", DataKind.R8, 0),
                    new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("int1", DataKind.I4, 0),
                    new TextLoader.Column("text1", DataKind.TX, 1),
                    new TextLoader.Column("text2", DataKind.TX, new[] { new TextLoader.Range(1), new TextLoader.Range(3) }),
                },
                Separator = ",",
                HasHeader = true
            }, new MultiFileSource(dataPath));

            var pipe = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("float1", "TermFloat1"),
                new ValueToKeyMappingTransformer.ColumnInfo("float4", "TermFloat4"),
                new ValueToKeyMappingTransformer.ColumnInfo("double1", "TermDouble1"),
                new ValueToKeyMappingTransformer.ColumnInfo("double4", "TermDouble4"),
                new ValueToKeyMappingTransformer.ColumnInfo("int1", "TermInt1"),
                new ValueToKeyMappingTransformer.ColumnInfo("text1", "TermText1"),
                new ValueToKeyMappingTransformer.ColumnInfo("text2", "TermText2")
            });
            var data = loader.Read(dataPath);

            data = TakeFilter.Create(Env, data, 10);
            var outputPath = GetOutputPath("Term", "Term.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, pipe.Fit(data).Transform(data), fs, keepHidden: true);
            }

            CheckEquality("Term", "Term.tsv");
            Done();
        }
Esempio n. 3
0
        public void KeyToValueWorkout()
        {
            string dataPath = GetDataPath("iris.txt");

            var reader = new TextLoader(Env, new TextLoader.Options
            {
                Columns = new[]
                {
                    new TextLoader.Column("ScalarString", DataKind.String, 1),
                    new TextLoader.Column("VectorString", DataKind.String, new[] { new TextLoader.Range(1, 4) }),
                    new TextLoader.Column("BareKey", DataKind.UInt32, new[] { new TextLoader.Range(0) }, new KeyCount(6))
                }
            });

            var data = reader.Read(dataPath);

            data = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnInfo("A", "ScalarString"),
                new ValueToKeyMappingEstimator.ColumnInfo("B", "VectorString")
            }).Fit(data).Transform(data);

            var badData1 = new ColumnCopyingTransformer(Env, ("A", "BareKey")).Transform(data);
            var badData2 = new ColumnCopyingTransformer(Env, ("B", "VectorString")).Transform(data);

            var est = new KeyToValueMappingEstimator(Env, ("A_back", "A"), ("B_back", "B"));

            TestEstimatorCore(est, data, invalidInput: badData1);
            TestEstimatorCore(est, data, invalidInput: badData2);


            var outputPath = GetOutputPath("KeyToValue", "featurized.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                IDataView savedData = est.Fit(data).Transform(data);
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("KeyToValue", "featurized.tsv");
            Done();
        }
        void TestDifferentTypes()
        {
            string dataPath = GetDataPath("adult.tiny.with-schema.txt");

            var loader = new TextLoader(ML, new TextLoader.Options
            {
                Columns = new[] {
                    new TextLoader.Column("float1", DataKind.Single, 9),
                    new TextLoader.Column("float4", DataKind.Single, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) }),
                    new TextLoader.Column("double1", DataKind.Double, 9),
                    new TextLoader.Column("double4", DataKind.Double, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) }),
                    new TextLoader.Column("int1", DataKind.Int32, 9),
                    new TextLoader.Column("text1", DataKind.String, 1),
                    new TextLoader.Column("text2", DataKind.String, new[] { new TextLoader.Range(1), new TextLoader.Range(2) }),
                },
                Separator = "\t",
                HasHeader = true
            }, new MultiFileSource(dataPath));

            var pipe = new ValueToKeyMappingEstimator(ML, new[] {
                new ValueToKeyMappingEstimator.ColumnInfo("TermFloat1", "float1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermFloat4", "float4"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermDouble1", "double1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermDouble4", "double4"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermInt1", "int1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermText1", "text1"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermText2", "text2")
            });
            var data = loader.Read(dataPath);

            data = ML.Data.TakeRows(data, 10);
            var outputPath = GetOutputPath("Term", "Term.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(ML, new TextSaver.Arguments {
                    Silent = true
                });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, pipe.Fit(data).Transform(data), fs, keepHidden: true);
            }

            CheckEquality("Term", "Term.tsv");
            Done();
        }
Esempio n. 5
0
 void TestOldSavingAndLoading()
 {
     var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } };
     var dataView = ML.Data.ReadFromEnumerable(data);
     var est = new ValueToKeyMappingEstimator(Env, new[]{
             new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
             new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"),
             new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC")
         });
     var transformer = est.Fit(dataView);
     var result = transformer.Transform(dataView);
     var resultRoles = new RoleMappedData(result);
     using (var ms = new MemoryStream())
     {
         TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
         ms.Position = 0;
         var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
         ValidateTermTransformer(loadedView);
     }
 }
        private (IEstimator<ITransformer>, IDataView) GetRankingPipeline()
        {
            var data = new TextLoader(Env, new TextLoader.Options
            {
                HasHeader = true,
                Separator = "\t",
                Columns = new[]
                     {
                        new TextLoader.Column("Label", DataKind.Single, 0),
                        new TextLoader.Column("Workclass", DataKind.String, 1),
                        new TextLoader.Column("NumericFeatures", DataKind.Single, new [] { new TextLoader.Range(9, 14) })
                    }
            }).Load(GetDataPath(TestDatasets.adultRanking.trainFilename));

            // Pipeline.
            var pipeline = new ValueToKeyMappingEstimator(Env, new[]{
                                    new ValueToKeyMappingEstimator.ColumnOptions("Group", "Workclass"),
                                    new ValueToKeyMappingEstimator.ColumnOptions("Label0", "Label") });

            return (pipeline, data);
        }
        public void KeyToVector()
        {
            string dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var    data     = ML.Data.LoadFromTextFile(dataPath, new[] {
                new TextLoader.Column("ScalarString", DataKind.String, 0),
                new TextLoader.Column("VectorString", DataKind.String, 1, 4),
            });

            var transformedData = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnOptions("A", "ScalarString"),
                new ValueToKeyMappingEstimator.ColumnOptions("B", "VectorString")
            })
                                  .Fit(data).Transform(data);

            var est = ML.Transforms.Conversion.MapKeyToVector("ScalarString", "A")
                      .Append(ML.Transforms.Conversion.MapKeyToVector("VectorString", "B"))
                      .Append(ML.Transforms.Conversion.MapKeyToVector("VectorBaggedString", "B", true));

            TestEstimatorCore(est, transformedData, invalidInput: data);

            Done();
        }
        private (IEstimator <ITransformer>, IDataView) GetRankingPipeline()
        {
            var data = new TextLoader(Env, new TextLoader.Arguments
            {
                HasHeader = true,
                Separator = "\t",
                Column    = new[]
                {
                    new TextLoader.Column("Label", DataKind.R4, 0),
                    new TextLoader.Column("Workclass", DataKind.Text, 1),
                    new TextLoader.Column("NumericFeatures", DataKind.R4, new [] { new TextLoader.Range(9, 14) })
                }
            }).Read(GetDataPath(TestDatasets.adultRanking.trainFilename));

            // Pipeline.
            var pipeline = new ValueToKeyMappingEstimator(Env, new[] {
                new TermTransform.ColumnInfo("Workclass", "Group"),
                new TermTransform.ColumnInfo("Label", "Label0")
            });

            return(pipeline, data);
        }
        private void buildAndTrainModel(IDataView data)
        {
            _answerKeyEstimator = _mlContext.Transforms.Conversion.MapValueToKey(
                inputColumnName: nameof(MLEntry.Text),
                outputColumnName: "Label");

            var trainingPipeline =
                _answerKeyEstimator
                .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(MLEntry.Text),
                                                                 outputColumnName: "TextFeaturized"))
                .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(MLEntry.Intent),
                                                                 outputColumnName: "IntentFeaturized"))
                .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(MLEntry.PreviousIntents),
                                                                 outputColumnName: "PreviousIntentFeaturized"))
                .Append(_mlContext.Transforms.Concatenate("Featurized", "TextFeaturized", "IntentFeaturized", "PreviousIntentFeaturized"))
                .AppendCacheCheckpoint(_mlContext)
                .Append(_mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "QuestionFeaturized"))
                .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            _trainedModel     = trainingPipeline.Fit(data);
            _predictionEngine = _mlContext.Model.CreatePredictionEngine <MLEntry, IntentPrediction>(_trainedModel);
        }
Esempio n. 10
0
        public void KeyToBinaryVectorWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var dataView = ML.Data.ReadFromEnumerable(data);

            dataView = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B"),
                new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C", textKeyValues: true)
            }).Fit(dataView).Transform(dataView);

            var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(("CatA", "TermA"), ("CatC", "TermC"));

            TestEstimatorCore(pipe, dataView);
            Done();
        }
Esempio n. 11
0
        public void KeyToBinaryVectorWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var dataView = ML.Data.LoadFromEnumerable(data);

            dataView = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"),
                new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B"),
                new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C", addKeyValueAnnotationsAsText: true)
            }).Fit(dataView).Transform(dataView);

            var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(new[] { new InputOutputColumnPair("CatA", "TermA"), new InputOutputColumnPair("CatC", "TermC") });

            TestEstimatorCore(pipe, dataView);
            Done();
        }
Esempio n. 12
0
        public void KeyToValuePigsty()
        {
            string dataPath = GetDataPath("breast-cancer.txt");
            var    reader   = TextLoaderStatic.CreateReader(Env, ctx => (
                                                                ScalarString: ctx.LoadText(1),
                                                                VectorString: ctx.LoadText(1, 4)
                                                                ));

            var data = reader.Read(dataPath);

            // Non-pigsty Term.
            var dynamicData = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnInfo("A", "ScalarString"),
                new ValueToKeyMappingEstimator.ColumnInfo("B", "VectorString")
            })
                              .Fit(data.AsDynamic).Transform(data.AsDynamic);

            var data2 = dynamicData.AssertStatic(Env, ctx => (
                                                     A: ctx.KeyU4.TextValues.Scalar,
                                                     B: ctx.KeyU4.TextValues.Vector));

            var est = data2.MakeNewEstimator()
                      .Append(row => (
                                  ScalarString: row.A.ToValue(),
                                  VectorString: row.B.ToValue()));

            TestEstimatorCore(est.AsDynamic, data2.AsDynamic, invalidInput: data.AsDynamic);

            var data2Transformed = est.Fit(data2).Transform(data2).AsDynamic;
            // Check that term and ToValue are round-trippable.
            var dataLeft  = ML.Transforms.SelectColumns(new[] { "ScalarString", "VectorString" }).Fit(data.AsDynamic).Transform(data.AsDynamic);
            var dataRight = ML.Transforms.SelectColumns(new[] { "ScalarString", "VectorString" }).Fit(data2Transformed).Transform(data2Transformed);

            CheckSameSchemas(dataLeft.Schema, dataRight.Schema);
            CheckSameValues(dataLeft, dataRight);
            Done();
        }
Esempio n. 13
0
        public void KeyToBinaryVectorWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var dataView = ComponentCreation.CreateDataView(Env, data);

            dataView = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC", textKeyValues: true)
            }).Fit(dataView).Transform(dataView);

            var pipe = new KeyToBinaryVectorMappingEstimator(Env, new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermA", "CatA"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermC", "CatC"));

            TestEstimatorCore(pipe, dataView);
            Done();
        }
Esempio n. 14
0
                public override IEnumerable <SuggestedTransform> Apply(IntermediateColumn[] columns)
                {
                    var lastLabelColId = Array.FindLastIndex(columns, x => x.Purpose == ColumnPurpose.Label);

                    if (lastLabelColId < 0)
                    {
                        yield break;
                    }

                    var col = columns[lastLabelColId];

                    var columnName = new StringBuilder();

                    columnName.Append(col.ColumnName);

                    if (col.Type.IsText())
                    {
                        col.GetUniqueValueCounts <ReadOnlyMemory <char> >(out var unique, out var _, out var _);

                        string dest   = DefaultColumnNames.Label;
                        string source = columnName.ToString();
                        var    input  = new ValueToKeyMappingEstimator(Env, source, dest);

                        var routingStructure = new ColumnRoutingStructure(
                            new[]
                        {
                            new ColumnRoutingStructure.AnnotatedName {
                                IsNumeric = false, Name = source
                            }
                        },
                            new[]
                        {
                            new ColumnRoutingStructure.AnnotatedName {
                                IsNumeric = true, Name = dest
                            }
                        }
                            );
                        yield return(new SuggestedTransform(input, routingStructure));
                    }
                    else if (col.ColumnName != DefaultColumnNames.Label)
                    {
                        string dest   = DefaultColumnNames.Label;
                        string source = columnName.ToString();
                        var    input  = new ColumnCopyingEstimator(Env, source, dest);

                        var routingStructure = new ColumnRoutingStructure(
                            new[]
                        {
                            new ColumnRoutingStructure.AnnotatedName {
                                IsNumeric = true, Name = source
                            }
                        },
                            new[]
                        {
                            new ColumnRoutingStructure.AnnotatedName {
                                IsNumeric = true, Name = dest
                            }
                        }
                            );

                        yield return(new SuggestedTransform(input, routingStructure));
                    }
                }
        public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns,
                                       string file = null, string termsColumn = null,
                                       IComponentFactory <IMultiStreamSource, IDataLoader> loaderFactory = null)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(OneHotEncodingEstimator));
            _term = new ValueToKeyMappingEstimator(_host, columns, file, termsColumn, loaderFactory);
            var binaryCols = new List <(string input, string output)>();
            var cols       = new List <(string input, string output, bool bag)>();

            for (int i = 0; i < columns.Length; i++)
            {
                var column = columns[i];
                CategoricalTransform.OutputKind kind = columns[i].OutputKind;
                switch (kind)
                {
                default:
                    throw _host.ExceptUserArg(nameof(column.OutputKind));

                case CategoricalTransform.OutputKind.Key:
                    continue;

                case CategoricalTransform.OutputKind.Bin:
                    binaryCols.Add((column.Output, column.Output));
                    break;

                case CategoricalTransform.OutputKind.Ind:
                    cols.Add((column.Output, column.Output, false));
                    break;

                case CategoricalTransform.OutputKind.Bag:
                    cols.Add((column.Output, column.Output, true));
                    break;
                }
            }
            IEstimator <ITransformer> toBinVector = null;
            IEstimator <ITransformer> toVector    = null;

            if (binaryCols.Count > 0)
            {
                toBinVector = new KeyToBinaryVectorEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray());
            }
            if (cols.Count > 0)
            {
                toVector = new KeyToVectorEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray());
            }

            if (toBinVector != null && toVector != null)
            {
                _toSomething = toVector.Append(toBinVector);
            }
            else
            {
                if (toBinVector != null)
                {
                    _toSomething = toBinVector;
                }
                else
                {
                    _toSomething = toVector;
                }
            }
        }
Esempio n. 16
0
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        1.0f, 2.0f
                    }, F = 1.0f, G = new string[2] {
                        "A", "D"
                    }, H = "D"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1, E = new float[2] {
                        3.0f, 4.0f
                    }, F = -1.0f, G = new string[2] {
                        "E", "A"
                    }, H = "E"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        5.0f, 6.0f
                    }, F = 1.0f, G = new string[2] {
                        "D", "E"
                    }, H = "D"
                }
            };


            var dataView = ML.Data.ReadFromEnumerable(data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TA", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TB"),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TC", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("D", "TD", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("E", "TE"),
                new ValueToKeyMappingTransformer.ColumnInfo("F", "TF"),
                new ValueToKeyMappingTransformer.ColumnInfo("G", "TG"),
                new ValueToKeyMappingTransformer.ColumnInfo("H", "TH", textKeyValues: true)
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToVectorMappingEstimator(Env,
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TA", "CatA", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TB", "CatB", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TC", "CatC", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TD", "CatD", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TE", "CatE", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TF", "CatF", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TG", "CatG", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TH", "CatH", false)
                                                       );

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
 internal void WrapTermWithDelegate(Action <TermTransform> onFit)
 {
     _term = (ValueToKeyMappingEstimator)_term.WithOnFitDelegate(onFit);
 }