Esempio n. 1
0
        public void CategoricalOneHotEncodingFromSideData()
        {
            // In this case, whatever the value of the input, the term mapping should come from the optional side data if specified.
            var data = new[] { new TestStringClass()
                               {
                                   A = "Stay"
                               }, new TestStringClass()
                               {
                                   A = "awhile and listen"
                               } };

            var mlContext = new MLContext();
            var dataView  = mlContext.Data.ReadFromEnumerable(data);

            var sideDataBuilder = new ArrayDataViewBuilder(mlContext);

            sideDataBuilder.AddColumn("Hello", "hello", "my", "friend");
            var sideData = sideDataBuilder.GetDataView();

            var ci   = new OneHotEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag);
            var pipe = mlContext.Transforms.Categorical.OneHotEncoding(new[] { ci }, sideData);

            var output = pipe.Fit(dataView).Transform(dataView);

            VBuffer <ReadOnlyMemory <char> > slotNames = default;

            output.Schema["CatA"].GetSlotNames(ref slotNames);

            Assert.Equal(3, slotNames.Length);
            Assert.Equal("hello", slotNames.GetItemOrDefault(0).ToString());
            Assert.Equal("my", slotNames.GetItemOrDefault(1).ToString());
            Assert.Equal("friend", slotNames.GetItemOrDefault(2).ToString());

            Done();
        }
Esempio n. 2
0
            public override IEstimator <ITransformer> Reconcile(IHostEnvironment env, PipelineColumn[] toOutput,
                                                                IReadOnlyDictionary <PipelineColumn, string> inputNames, IReadOnlyDictionary <PipelineColumn, string> outputNames, IReadOnlyCollection <string> usedNames)
            {
                var infos = new OneHotEncodingEstimator.ColumnInfo[toOutput.Length];
                Action <ValueToKeyMappingTransformer> onFit = null;

                for (int i = 0; i < toOutput.Length; ++i)
                {
                    var tcol = (ICategoricalCol)toOutput[i];
                    infos[i] = new OneHotEncodingEstimator.ColumnInfo(inputNames[tcol.Input], outputNames[toOutput[i]], (OneHotEncodingTransformer.OutputKind)tcol.Config.OutputKind,
                                                                      tcol.Config.Max, (ValueToKeyMappingTransformer.SortOrder)tcol.Config.Order);
                    if (tcol.Config.OnFit != null)
                    {
                        int ii = i; // Necessary because if we capture i that will change to toOutput.Length on call.
                        onFit += tt => tcol.Config.OnFit(tt.GetTermMap(ii));
                    }
                }
                var est = new OneHotEncodingEstimator(env, infos);

                if (onFit != null)
                {
                    est.WrapTermWithDelegate(onFit);
                }
                return(est);
            }