/// <summary>
        /// Create a <see cref="CountFeatureSelectingEstimator"/>, which selects the slots for which the count of non-default values is greater than or equal to a threshold.
        /// </summary>
        /// <param name="catalog">The transform's catalog.</param>
        /// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param>
        /// <param name="count">If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[SelectFeaturesBasedOnCount](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCountMultiColumn.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                                InputOutputColumnPair[] columns,
                                                                                long count = CountSelectDefaults.Count)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckValue(columns, nameof(columns));
            var columnOptions = columns.Select(x => new CountFeatureSelectingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, count)).ToArray();

            return(new CountFeatureSelectingEstimator(env, columnOptions));
        }
예제 #2
0
 /// <include file='doc.xml' path='doc/members/member[@name="MutualInformationFeatureSelection"]/*' />
 /// <param name="catalog">The transform's catalog.</param>
 /// <param name="labelColumn">Name of the column to use for labels.</param>
 /// <param name="slotsInOutput">The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.</param>
 /// <param name="numBins">Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.</param>
 /// <param name="columns">Specifies the names of the input columns for the transformation, and their respective output column names.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[SelectFeaturesBasedOnMutualInformation](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs?range=1-4,10-121)]
 /// ]]>
 /// </format>
 /// </example>
 public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                                                 string labelColumn = MutualInfoSelectDefaults.LabelColumn,
                                                                                                 int slotsInOutput  = MutualInfoSelectDefaults.SlotsInOutput,
                                                                                                 int numBins        = MutualInfoSelectDefaults.NumBins,
                                                                                                 params (string input, string output)[] columns)
 /// <summary>
 /// Create a <see cref="CountFeatureSelectingEstimator"/>, which selects the slots for which the count of non-default values is greater than or equal to a threshold.
 /// </summary>
 /// <param name="catalog">The transform's catalog.</param>
 /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
 /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
 /// <param name="count">If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[SelectFeaturesBasedOnCount](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCount.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                         string outputColumnName,
                                                                         string inputColumnName = null,
                                                                         long count             = CountSelectDefaults.Count)
 => new CountFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, count);
 internal static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                           params CountFeatureSelectingEstimator.ColumnOptions[] columns)
 => new CountFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
        /// <summary>
        /// Create a <see cref="MutualInformationFeatureSelectingEstimator"/>, which selects the top k slots across all specified columns ordered by their mutual information with the label column.
        /// </summary>
        /// <param name="catalog">The transform's catalog.</param>
        /// <param name="columns">Specifies the names of the input columns for the transformation, and their respective output column names.</param>
        /// <param name="labelColumnName">The name of the label column.</param>
        /// <param name="slotsInOutput">The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.</param>
        /// <param name="numberOfBins">Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[SelectFeaturesBasedOnMutualInformation](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformationMultiColumn.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                                                        InputOutputColumnPair[] columns,
                                                                                                        string labelColumnName = MutualInfoSelectDefaults.LabelColumn,
                                                                                                        int slotsInOutput      = MutualInfoSelectDefaults.SlotsInOutput,
                                                                                                        int numberOfBins       = MutualInfoSelectDefaults.NumBins)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckValue(columns, nameof(columns));
            return(new MutualInformationFeatureSelectingEstimator(env, labelColumnName, slotsInOutput, numberOfBins,
                                                                  columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray()));
        }
 /// <summary>
 /// Create a <see cref="MutualInformationFeatureSelectingEstimator"/>, which selects the top k slots across all specified columns ordered by their mutual information with the label column.
 /// </summary>
 /// <param name="catalog">The transform's catalog.</param>
 /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
 /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
 /// <param name="labelColumnName">The name of the label column.</param>
 /// <param name="slotsInOutput">The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.</param>
 /// <param name="numberOfBins">Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[SelectFeaturesBasedOnMutualInformation](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformation.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                                                 string outputColumnName, string inputColumnName = null,
                                                                                                 string labelColumnName = MutualInfoSelectDefaults.LabelColumn,
                                                                                                 int slotsInOutput      = MutualInfoSelectDefaults.SlotsInOutput,
                                                                                                 int numberOfBins       = MutualInfoSelectDefaults.NumBins)
 => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, labelColumnName, slotsInOutput, numberOfBins);
 internal static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                                                   string labelColumnName = MutualInfoSelectDefaults.LabelColumn,
                                                                                                   int slotsInOutput      = MutualInfoSelectDefaults.SlotsInOutput,
                                                                                                   int numberOfBins       = MutualInfoSelectDefaults.NumBins,
                                                                                                   params ColumnOptions[] columns)
 => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumnName, slotsInOutput, numberOfBins,
                                                   ColumnOptions.ConvertToValueTuples(columns));
예제 #8
0
 /// <include file='doc.xml' path='doc/members/member[@name="MutualInformationFeatureSelection"]/*' />
 /// <param name="catalog">The transform's catalog.</param>
 /// <param name="labelColumn">Name of the column to use for labels.</param>
 /// <param name="slotsInOutput">The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.</param>
 /// <param name="numBins">Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.</param>
 /// <param name="columns">Specifies the names of the input columns for the transformation, and their respective output column names.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[SelectFeaturesBasedOnMutualInformation](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs?range=1-4,10-121)]
 /// ]]>
 /// </format>
 /// </example>
 public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
                                                                                                 string labelColumn = MutualInfoSelectDefaults.LabelColumn,
                                                                                                 int slotsInOutput  = MutualInfoSelectDefaults.SlotsInOutput,
                                                                                                 int numBins        = MutualInfoSelectDefaults.NumBins,
                                                                                                 params SimpleColumnInfo[] columns)
 => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, slotsInOutput, numBins,
                                                   SimpleColumnInfo.ConvertToValueTuples(columns));