Esempio n. 1
0
        internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register("Categorical");

            h.CheckValue(options, nameof(options));
            h.CheckValue(input, nameof(input));
            h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns));

            var columns = new List <OneHotEncodingEstimator.ColumnInfo>();

            foreach (var column in options.Columns)
            {
                var col = new OneHotEncodingEstimator.ColumnInfo(
                    column.Name,
                    column.Source ?? column.Name,
                    column.OutputKind ?? options.OutputKind,
                    column.MaxNumTerms ?? options.MaxNumTerms,
                    column.Sort ?? options.Sort,
                    column.Terms ?? options.Terms);
                col.SetTerms(column.Term ?? options.Term);
                columns.Add(col);
            }
            IDataView keyData = null;

            if (!string.IsNullOrEmpty(options.DataFile))
            {
                using (var ch = h.Start("Load term data"))
                    keyData = ValueToKeyMappingTransformer.GetKeyDataViewOrNull(env, ch, options.DataFile, options.TermsColumn, options.Loader, out bool autoLoaded);
                h.AssertValue(keyData);
            }
            var transformed = new OneHotEncodingEstimator(env, columns.ToArray(), keyData).Fit(input).Transform(input);

            return((IDataTransform)transformed);
        }
Esempio n. 2
0
        internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register("Categorical");

            h.CheckValue(args, nameof(args));
            h.CheckValue(input, nameof(input));
            h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column));

            var columns = new List <OneHotEncodingEstimator.ColumnInfo>();

            foreach (var column in args.Column)
            {
                var col = new OneHotEncodingEstimator.ColumnInfo(
                    column.Source ?? column.Name,
                    column.Name,
                    column.OutputKind ?? args.OutputKind,
                    column.MaxNumTerms ?? args.MaxNumTerms,
                    column.Sort ?? args.Sort,
                    column.Term ?? args.Term);
                col.SetTerms(column.Terms ?? args.Terms);
                columns.Add(col);
            }
            return(new OneHotEncodingEstimator(env, columns.ToArray(), args.DataFile, args.TermsColumn, args.Loader).Fit(input).Transform(input) as IDataTransform);
        }
Esempio n. 3
0
            public override IEstimator <ITransformer> Reconcile(IHostEnvironment env, PipelineColumn[] toOutput,
                                                                IReadOnlyDictionary <PipelineColumn, string> inputNames, IReadOnlyDictionary <PipelineColumn, string> outputNames, IReadOnlyCollection <string> usedNames)
            {
                var infos = new OneHotEncodingEstimator.ColumnInfo[toOutput.Length];
                Action <TermTransform> onFit = null;

                for (int i = 0; i < toOutput.Length; ++i)
                {
                    var tcol = (ICategoricalCol)toOutput[i];
                    infos[i] = new OneHotEncodingEstimator.ColumnInfo(inputNames[tcol.Input], outputNames[toOutput[i]], (OneHotEncodingTransformer.OutputKind)tcol.Config.OutputKind,
                                                                      tcol.Config.Max, (TermTransform.SortOrder)tcol.Config.Order);
                    if (tcol.Config.OnFit != null)
                    {
                        int ii = i; // Necessary because if we capture i that will change to toOutput.Length on call.
                        onFit += tt => tcol.Config.OnFit(tt.GetTermMap(ii));
                    }
                }
                var est = new OneHotEncodingEstimator(env, infos);

                if (onFit != null)
                {
                    est.WrapTermWithDelegate(onFit);
                }
                return(est);
            }