public void SweepablePipeline_Append_SweepableEstimator_Test() { var pipeline = new SweepablePipeline(); var concatOption = new ConcatOption() { InputColumnNames = new List <string> { "a", "b", "c" }.ToArray(), OutputColumnName = "a", }; var lgbmOption = new LgbmOption() { FeatureColumnName = "Feature", LabelColumnName = "Label", }; // pipeline can append a single sweepable estimator pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption)); // pipeline can append muliple sweepable estimators. pipeline = pipeline.Append(SweepableEstimatorFactory.CreateLightGbmBinary(lgbmOption), SweepableEstimatorFactory.CreateConcatenate(concatOption)); // pipeline can append sweepable pipelines mixed with sweepble estimators pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption), pipeline); // pipeline can append sweepable pipelines. pipeline = pipeline.Append(pipeline, pipeline); Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions)); }
private SweepableEstimatorPipeline CreateSweepbaleEstimatorPipeline() { var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption()); var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption()); var oneHot = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption()); var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption()); var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption()); var pipeline = new SweepableEstimatorPipeline(new SweepableEstimator[] { concat, replaceMissingValue, oneHot, lightGbm, fastTree }); return(pipeline); }
private MultiModelPipeline CreateMultiModelPipeline() { var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption()); var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption()); var oneHot = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption()); var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption()); var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption()); var pipeline = new MultiModelPipeline(); pipeline = pipeline.AppendOrSkip(replaceMissingValue + replaceMissingValue * oneHot); pipeline = pipeline.AppendOrSkip(concat); pipeline = pipeline.Append(lightGbm + fastTree); return(pipeline); }
/// <summary> /// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/>, /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine /// them into a single feature column as output. /// </summary> /// <param name="data">input data.</param> /// <param name="columnInformation">column information.</param> /// <param name="outputColumnName">output feature column.</param> /// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns> public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features") { Contracts.CheckValue(data, nameof(data)); Contracts.CheckValue(columnInformation, nameof(columnInformation)); var columnPurposes = PurposeInference.InferPurposes(this._context, data, columnInformation); var textFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature); var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature); var catalogFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature); var textFeatureColumnNames = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var numericFeatureColumnNames = numericFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var catalogFeatureColumnNames = catalogFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var pipeline = new MultiModelPipeline(); if (numericFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames)); } if (catalogFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.CatalogFeaturizer(catalogFeatureColumnNames, catalogFeatureColumnNames)); } foreach (var textColumn in textFeatureColumnNames) { pipeline = pipeline.Append(this.TextFeaturizer(textColumn, textColumn)); } var option = new ConcatOption { InputColumnNames = textFeatureColumnNames.Concat(numericFeatureColumnNames).Concat(catalogFeatureColumnNames).ToArray(), OutputColumnName = outputColumnName, }; if (option.InputColumnNames.Length > 0) { pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(option)); } return(pipeline); }