public void KeyToVectorWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var dataView = ComponentCreation.CreateDataView(Env, data);

            dataView = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TermA"),
                new TermTransform.ColumnInfo("B", "TermB"),
                new TermTransform.ColumnInfo("C", "TermC", textKeyValues: true)
            }).Fit(dataView).Transform(dataView);

            var pipe = new KeyToVectorEstimator(Env, new KeyToVectorTransform.ColumnInfo("TermA", "CatA", false),
                                                new KeyToVectorTransform.ColumnInfo("TermB", "CatB", true),
                                                new KeyToVectorTransform.ColumnInfo("TermC", "CatC", true),
                                                new KeyToVectorTransform.ColumnInfo("TermC", "CatCNonBag", false));

            TestEstimatorCore(pipe, dataView);
            Done();
        }
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var est      = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TermA"),
                new TermTransform.ColumnInfo("B", "TermB"),
                new TermTransform.ColumnInfo("C", "TermC")
            });
            var transformer = est.Fit(dataView);

            dataView = transformer.Transform(dataView);
            var pipe = new KeyToVectorEstimator(Env,
                                                new KeyToVectorTransform.ColumnInfo("TermA", "CatA", false),
                                                new KeyToVectorTransform.ColumnInfo("TermB", "CatB", true)
                                                );
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
        public CategoricalEstimator(IHostEnvironment env, params ColumnInfo[] columns)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(TermEstimator));
            _term = new TermEstimator(_host, columns);
            var binaryCols = new List <(string input, string output)>();
            var cols       = new List <(string input, string output, bool bag)>();

            for (int i = 0; i < columns.Length; i++)
            {
                var column = columns[i];
                CategoricalTransform.OutputKind kind = columns[i].OutputKind;
                switch (kind)
                {
                default:
                    throw _host.ExceptUserArg(nameof(column.OutputKind));

                case CategoricalTransform.OutputKind.Key:
                    continue;

                case CategoricalTransform.OutputKind.Bin:
                    binaryCols.Add((column.Output, column.Output));
                    break;

                case CategoricalTransform.OutputKind.Ind:
                    cols.Add((column.Output, column.Output, false));
                    break;

                case CategoricalTransform.OutputKind.Bag:
                    cols.Add((column.Output, column.Output, true));
                    break;
                }
            }
            IEstimator <ITransformer> toBinVector = null;
            IEstimator <ITransformer> toVector    = null;

            if (binaryCols.Count > 0)
            {
                toBinVector = new KeyToBinaryVectorEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray());
            }
            if (cols.Count > 0)
            {
                toVector = new KeyToVectorEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray());
            }

            if (toBinVector != null && toVector != null)
            {
                _toSomething = toVector.Append(toBinVector);
            }
            else
            {
                if (toBinVector != null)
                {
                    _toSomething = toBinVector;
                }
                else
                {
                    _toSomething = toVector;
                }
            }
        }
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        1.0f, 2.0f
                    }, F = 1.0f, G = new string[2] {
                        "A", "D"
                    }, H = "D"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1, E = new float[2] {
                        3.0f, 4.0f
                    }, F = -1.0f, G = new string[2] {
                        "E", "A"
                    }, H = "E"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        5.0f, 6.0f
                    }, F = 1.0f, G = new string[2] {
                        "D", "E"
                    }, H = "D"
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env,
                                             new TermTransform.ColumnInfo("A", "TA", textKeyValues: true),
                                             new TermTransform.ColumnInfo("B", "TB"),
                                             new TermTransform.ColumnInfo("C", "TC", textKeyValues: true),
                                             new TermTransform.ColumnInfo("D", "TD", textKeyValues: true),
                                             new TermTransform.ColumnInfo("E", "TE"),
                                             new TermTransform.ColumnInfo("F", "TF"),
                                             new TermTransform.ColumnInfo("G", "TG"),
                                             new TermTransform.ColumnInfo("H", "TH", textKeyValues: true));
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToVectorEstimator(Env,
                                                new KeyToVectorTransform.ColumnInfo("TA", "CatA", true),
                                                new KeyToVectorTransform.ColumnInfo("TB", "CatB", false),
                                                new KeyToVectorTransform.ColumnInfo("TC", "CatC", false),
                                                new KeyToVectorTransform.ColumnInfo("TD", "CatD", true),
                                                new KeyToVectorTransform.ColumnInfo("TE", "CatE", false),
                                                new KeyToVectorTransform.ColumnInfo("TF", "CatF", true),
                                                new KeyToVectorTransform.ColumnInfo("TG", "CatG", true),
                                                new KeyToVectorTransform.ColumnInfo("TH", "CatH", false)
                                                );

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
Example #5
0
        public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] columns)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(ValueToKeyMappingEstimator));
            _hash = new HashEstimator(_host, columns.Select(x => x.HashInfo).ToArray());
            using (var ch = _host.Start(nameof(OneHotHashEncodingEstimator)))
            {
                var binaryCols = new List <(string input, string output)>();
                var cols       = new List <(string input, string output, bool bag)>();
                for (int i = 0; i < columns.Length; i++)
                {
                    var column = columns[i];
                    CategoricalTransform.OutputKind kind = columns[i].OutputKind;
                    switch (kind)
                    {
                    default:
                        throw _host.ExceptUserArg(nameof(column.OutputKind));

                    case CategoricalTransform.OutputKind.Key:
                        continue;

                    case CategoricalTransform.OutputKind.Bin:
                        if ((column.HashInfo.InvertHash) != 0)
                        {
                            ch.Warning("Invert hashing is being used with binary encoding.");
                        }
                        binaryCols.Add((column.HashInfo.Output, column.HashInfo.Output));
                        break;

                    case CategoricalTransform.OutputKind.Ind:
                        cols.Add((column.HashInfo.Output, column.HashInfo.Output, false));
                        break;

                    case CategoricalTransform.OutputKind.Bag:
                        cols.Add((column.HashInfo.Output, column.HashInfo.Output, true));
                        break;
                    }
                }
                IEstimator <ITransformer> toBinVector = null;
                IEstimator <ITransformer> toVector    = null;
                if (binaryCols.Count > 0)
                {
                    toBinVector = new KeyToBinaryVectorEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray());
                }
                if (cols.Count > 0)
                {
                    toVector = new KeyToVectorEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray());
                }

                if (toBinVector != null && toVector != null)
                {
                    _toSomething = toVector.Append(toBinVector);
                }
                else
                {
                    if (toBinVector != null)
                    {
                        _toSomething = toBinVector;
                    }
                    else
                    {
                        _toSomething = toVector;
                    }
                }
            }
        }