Exemple #1
0
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var est      = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC")
            });
            var transformer = est.Fit(dataView);

            dataView = transformer.Transform(dataView);
            var pipe = new KeyToBinaryVectorMappingEstimator(Env,
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermA", "CatA"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermB", "CatB"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermC", "CatC")
                                                             );
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
Exemple #2
0
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TA", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TB", textKeyValues: true),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TC"),
                new ValueToKeyMappingTransformer.ColumnInfo("D", "TD")
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToBinaryVectorMappingEstimator(Env,
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TA", "CatA"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TB", "CatB"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TC", "CatC"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TD", "CatD"));

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
Exemple #3
0
        public void KeyToBinaryVectorWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var dataView = ComponentCreation.CreateDataView(Env, data);

            dataView = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"),
                new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC", textKeyValues: true)
            }).Fit(dataView).Transform(dataView);

            var pipe = new KeyToBinaryVectorMappingEstimator(Env, new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermA", "CatA"),
                                                             new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermC", "CatC"));

            TestEstimatorCore(pipe, dataView);
            Done();
        }
        public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] columns)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(ValueToKeyMappingEstimator));
            _hash = new HashingEstimator(_host, columns.Select(x => x.HashInfo).ToArray());
            using (var ch = _host.Start(nameof(OneHotHashEncodingEstimator)))
            {
                var binaryCols = new List <(string input, string output)>();
                var cols       = new List <(string input, string output, bool bag)>();
                for (int i = 0; i < columns.Length; i++)
                {
                    var column = columns[i];
                    CategoricalTransform.OutputKind kind = columns[i].OutputKind;
                    switch (kind)
                    {
                    default:
                        throw _host.ExceptUserArg(nameof(column.OutputKind));

                    case CategoricalTransform.OutputKind.Key:
                        continue;

                    case CategoricalTransform.OutputKind.Bin:
                        if ((column.HashInfo.InvertHash) != 0)
                        {
                            ch.Warning("Invert hashing is being used with binary encoding.");
                        }
                        binaryCols.Add((column.HashInfo.Output, column.HashInfo.Output));
                        break;

                    case CategoricalTransform.OutputKind.Ind:
                        cols.Add((column.HashInfo.Output, column.HashInfo.Output, false));
                        break;

                    case CategoricalTransform.OutputKind.Bag:
                        cols.Add((column.HashInfo.Output, column.HashInfo.Output, true));
                        break;
                    }
                }
                IEstimator <ITransformer> toBinVector = null;
                IEstimator <ITransformer> toVector    = null;
                if (binaryCols.Count > 0)
                {
                    toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray());
                }
                if (cols.Count > 0)
                {
                    toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray());
                }

                if (toBinVector != null && toVector != null)
                {
                    _toSomething = toVector.Append(toBinVector);
                }
                else
                {
                    if (toBinVector != null)
                    {
                        _toSomething = toBinVector;
                    }
                    else
                    {
                        _toSomething = toVector;
                    }
                }
            }
        }
        public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns,
                                       string file = null, string termsColumn = null,
                                       IComponentFactory <IMultiStreamSource, IDataLoader> loaderFactory = null)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(OneHotEncodingEstimator));
            _term = new ValueToKeyMappingEstimator(_host, columns, file, termsColumn, loaderFactory);
            var binaryCols = new List <(string input, string output)>();
            var cols       = new List <(string input, string output, bool bag)>();

            for (int i = 0; i < columns.Length; i++)
            {
                var column = columns[i];
                OneHotEncodingTransformer.OutputKind kind = columns[i].OutputKind;
                switch (kind)
                {
                default:
                    throw _host.ExceptUserArg(nameof(column.OutputKind));

                case OneHotEncodingTransformer.OutputKind.Key:
                    continue;

                case OneHotEncodingTransformer.OutputKind.Bin:
                    binaryCols.Add((column.Output, column.Output));
                    break;

                case OneHotEncodingTransformer.OutputKind.Ind:
                    cols.Add((column.Output, column.Output, false));
                    break;

                case OneHotEncodingTransformer.OutputKind.Bag:
                    cols.Add((column.Output, column.Output, true));
                    break;
                }
            }
            IEstimator <ITransformer> toBinVector = null;
            IEstimator <ITransformer> toVector    = null;

            if (binaryCols.Count > 0)
            {
                toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorMappingTransformer.ColumnInfo(x.input, x.output)).ToArray());
            }
            if (cols.Count > 0)
            {
                toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingTransformer.ColumnInfo(x.input, x.output, x.bag)).ToArray());
            }

            if (toBinVector != null && toVector != null)
            {
                _toSomething = toVector.Append(toBinVector);
            }
            else
            {
                if (toBinVector != null)
                {
                    _toSomething = toBinVector;
                }
                else
                {
                    _toSomething = toVector;
                }
            }
        }