Beispiel #1
0
        public void KeyToVectorWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var dataView = ML.Data.ReadFromEnumerable(data);

            dataView = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC", textKeyValues: true)
            }).Fit(dataView).Transform(dataView);

            var pipe = new KeyToVectorMappingEstimator(Env, new KeyToVectorMappingTransformer.ColumnInfo("TermA", "CatA", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TermB", "CatB", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TermC", "CatC", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TermC", "CatCNonBag", false));

            TestEstimatorCore(pipe, dataView);
            Done();
        }
Beispiel #2
0
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ML.Data.ReadFromEnumerable(data);
            var est      = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC")
            });
            var transformer = est.Fit(dataView);

            dataView = transformer.Transform(dataView);
            var pipe = new KeyToVectorMappingEstimator(Env,
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TermA", "CatA", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TermB", "CatB", true)
                                                       );
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
Beispiel #3
0
        internal OneHotEncodingEstimator(IHostEnvironment env, ColumnOptions[] columns, IDataView keyData = null)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(OneHotEncodingEstimator));
            _term = new ValueToKeyMappingEstimator(_host, columns, keyData);
            var binaryCols = new List <(string outputColumnName, string inputColumnName)>();
            var cols       = new List <(string outputColumnName, string inputColumnName, bool bag)>();

            for (int i = 0; i < columns.Length; i++)
            {
                var column = columns[i];
                OneHotEncodingTransformer.OutputKind kind = columns[i].OutputKind;
                switch (kind)
                {
                default:
                    throw _host.ExceptUserArg(nameof(column.OutputKind));

                case OneHotEncodingTransformer.OutputKind.Key:
                    continue;

                case OneHotEncodingTransformer.OutputKind.Bin:
                    binaryCols.Add((column.OutputColumnName, column.OutputColumnName));
                    break;

                case OneHotEncodingTransformer.OutputKind.Ind:
                    cols.Add((column.OutputColumnName, column.OutputColumnName, false));
                    break;

                case OneHotEncodingTransformer.OutputKind.Bag:
                    cols.Add((column.OutputColumnName, column.OutputColumnName, true));
                    break;
                }
            }
            IEstimator <ITransformer> toBinVector = null;
            IEstimator <ITransformer> toVector    = null;

            if (binaryCols.Count > 0)
            {
                toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => (x.outputColumnName, x.inputColumnName)).ToArray());
            }
            if (cols.Count > 0)
            {
                toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingEstimator.ColumnOptions(x.outputColumnName, x.inputColumnName, x.bag)).ToArray());
            }

            if (toBinVector != null && toVector != null)
            {
                _toSomething = toVector.Append(toBinVector);
            }
            else
            {
                if (toBinVector != null)
                {
                    _toSomething = toBinVector;
                }
                else
                {
                    _toSomething = toVector;
                }
            }
        }
        public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] columns)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(ValueToKeyMappingEstimator));
            _hash = new HashingEstimator(_host, columns.Select(x => x.HashInfo).ToArray());
            using (var ch = _host.Start(nameof(OneHotHashEncodingEstimator)))
            {
                var binaryCols = new List <(string input, string output)>();
                var cols       = new List <(string input, string output, bool bag)>();
                for (int i = 0; i < columns.Length; i++)
                {
                    var column = columns[i];
                    CategoricalTransform.OutputKind kind = columns[i].OutputKind;
                    switch (kind)
                    {
                    default:
                        throw _host.ExceptUserArg(nameof(column.OutputKind));

                    case CategoricalTransform.OutputKind.Key:
                        continue;

                    case CategoricalTransform.OutputKind.Bin:
                        if ((column.HashInfo.InvertHash) != 0)
                        {
                            ch.Warning("Invert hashing is being used with binary encoding.");
                        }
                        binaryCols.Add((column.HashInfo.Output, column.HashInfo.Output));
                        break;

                    case CategoricalTransform.OutputKind.Ind:
                        cols.Add((column.HashInfo.Output, column.HashInfo.Output, false));
                        break;

                    case CategoricalTransform.OutputKind.Bag:
                        cols.Add((column.HashInfo.Output, column.HashInfo.Output, true));
                        break;
                    }
                }
                IEstimator <ITransformer> toBinVector = null;
                IEstimator <ITransformer> toVector    = null;
                if (binaryCols.Count > 0)
                {
                    toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray());
                }
                if (cols.Count > 0)
                {
                    toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray());
                }

                if (toBinVector != null && toVector != null)
                {
                    _toSomething = toVector.Append(toBinVector);
                }
                else
                {
                    if (toBinVector != null)
                    {
                        _toSomething = toBinVector;
                    }
                    else
                    {
                        _toSomething = toVector;
                    }
                }
            }
        }
        public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns,
                                       string file = null, string termsColumn = null,
                                       IComponentFactory <IMultiStreamSource, IDataLoader> loaderFactory = null)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(OneHotEncodingEstimator));
            _term = new ValueToKeyMappingEstimator(_host, columns, file, termsColumn, loaderFactory);
            var binaryCols = new List <(string input, string output)>();
            var cols       = new List <(string input, string output, bool bag)>();

            for (int i = 0; i < columns.Length; i++)
            {
                var column = columns[i];
                OneHotEncodingTransformer.OutputKind kind = columns[i].OutputKind;
                switch (kind)
                {
                default:
                    throw _host.ExceptUserArg(nameof(column.OutputKind));

                case OneHotEncodingTransformer.OutputKind.Key:
                    continue;

                case OneHotEncodingTransformer.OutputKind.Bin:
                    binaryCols.Add((column.Output, column.Output));
                    break;

                case OneHotEncodingTransformer.OutputKind.Ind:
                    cols.Add((column.Output, column.Output, false));
                    break;

                case OneHotEncodingTransformer.OutputKind.Bag:
                    cols.Add((column.Output, column.Output, true));
                    break;
                }
            }
            IEstimator <ITransformer> toBinVector = null;
            IEstimator <ITransformer> toVector    = null;

            if (binaryCols.Count > 0)
            {
                toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorMappingTransformer.ColumnInfo(x.input, x.output)).ToArray());
            }
            if (cols.Count > 0)
            {
                toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingTransformer.ColumnInfo(x.input, x.output, x.bag)).ToArray());
            }

            if (toBinVector != null && toVector != null)
            {
                _toSomething = toVector.Append(toBinVector);
            }
            else
            {
                if (toBinVector != null)
                {
                    _toSomething = toBinVector;
                }
                else
                {
                    _toSomething = toVector;
                }
            }
        }
Beispiel #6
0
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        1.0f, 2.0f
                    }, F = 1.0f, G = new string[2] {
                        "A", "D"
                    }, H = "D"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1, E = new float[2] {
                        3.0f, 4.0f
                    }, F = -1.0f, G = new string[2] {
                        "E", "A"
                    }, H = "E"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        5.0f, 6.0f
                    }, F = 1.0f, G = new string[2] {
                        "D", "E"
                    }, H = "D"
                }
            };


            var dataView = ML.Data.ReadFromEnumerable(data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TA", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TB"),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TC", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("D", "TD", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("E", "TE"),
                new ValueToKeyMappingTransformer.ColumnInfo("F", "TF"),
                new ValueToKeyMappingTransformer.ColumnInfo("G", "TG"),
                new ValueToKeyMappingTransformer.ColumnInfo("H", "TH", textKeyValues: true)
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToVectorMappingEstimator(Env,
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TA", "CatA", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TB", "CatB", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TC", "CatC", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TD", "CatD", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TE", "CatE", false),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TF", "CatF", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TG", "CatG", true),
                                                       new KeyToVectorMappingTransformer.ColumnInfo("TH", "CatH", false)
                                                       );

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }