예제 #1
0
        public void SweepablePipeline_Append_SweepableEstimator_Test()
        {
            var pipeline     = new SweepablePipeline();
            var concatOption = new ConcatOption()
            {
                InputColumnNames = new List <string> {
                    "a", "b", "c"
                }.ToArray(),
                OutputColumnName = "a",
            };
            var lgbmOption = new LgbmOption()
            {
                FeatureColumnName = "Feature",
                LabelColumnName   = "Label",
            };

            // pipeline can append a single sweepable estimator
            pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption));

            // pipeline can append muliple sweepable estimators.
            pipeline = pipeline.Append(SweepableEstimatorFactory.CreateLightGbmBinary(lgbmOption), SweepableEstimatorFactory.CreateConcatenate(concatOption));

            // pipeline can append sweepable pipelines mixed with sweepble estimators
            pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption), pipeline);

            // pipeline can append sweepable pipelines.
            pipeline = pipeline.Append(pipeline, pipeline);

            Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions));
        }
예제 #2
0
        private SweepableEstimatorPipeline CreateSweepbaleEstimatorPipeline()
        {
            var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption());
            var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption());
            var oneHot   = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption());
            var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption());
            var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption());

            var pipeline = new SweepableEstimatorPipeline(new SweepableEstimator[] { concat, replaceMissingValue, oneHot, lightGbm, fastTree });

            return(pipeline);
        }
예제 #3
0
        private MultiModelPipeline CreateMultiModelPipeline()
        {
            var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption());
            var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption());
            var oneHot   = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption());
            var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption());
            var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption());

            var pipeline = new MultiModelPipeline();

            pipeline = pipeline.AppendOrSkip(replaceMissingValue + replaceMissingValue * oneHot);
            pipeline = pipeline.AppendOrSkip(concat);
            pipeline = pipeline.Append(lightGbm + fastTree);

            return(pipeline);
        }
예제 #4
0
        /// <summary>
        /// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/>,
        /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
        /// them into a single feature column as output.
        /// </summary>
        /// <param name="data">input data.</param>
        /// <param name="columnInformation">column information.</param>
        /// <param name="outputColumnName">output feature column.</param>
        /// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns>
        public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features")
        {
            Contracts.CheckValue(data, nameof(data));
            Contracts.CheckValue(columnInformation, nameof(columnInformation));

            var columnPurposes            = PurposeInference.InferPurposes(this._context, data, columnInformation);
            var textFeatures              = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature);
            var numericFeatures           = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature);
            var catalogFeatures           = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature);
            var textFeatureColumnNames    = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var numericFeatureColumnNames = numericFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var catalogFeatureColumnNames = catalogFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();

            var pipeline = new MultiModelPipeline();

            if (numericFeatureColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames));
            }

            if (catalogFeatureColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(this.CatalogFeaturizer(catalogFeatureColumnNames, catalogFeatureColumnNames));
            }

            foreach (var textColumn in textFeatureColumnNames)
            {
                pipeline = pipeline.Append(this.TextFeaturizer(textColumn, textColumn));
            }

            var option = new ConcatOption
            {
                InputColumnNames = textFeatureColumnNames.Concat(numericFeatureColumnNames).Concat(catalogFeatureColumnNames).ToArray(),
                OutputColumnName = outputColumnName,
            };

            if (option.InputColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(option));
            }

            return(pipeline);
        }