/// <summary> /// Create a new instance of <see cref="IidChangePointEstimator"/> that detects a change of in an /// <a href="https://en.wikipedia.org/wiki/Independent_and_identically_distributed_random_variables"> independent identically distributed (i.i.d.)</a> time series. /// Detection is based on adaptive kernel density estimations and martingale scores. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// Column is a vector of type double and size 4. The vector contains Alert, Raw Score, P-Value and Martingale score as first four values.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="confidence">The confidence for change point detection in the range [0, 100].</param> /// <param name="changeHistoryLength">The length of the sliding window on p-values for computing the martingale score.</param> /// <param name="martingale">The martingale used for scoring.</param> /// <param name="eps">The epsilon parameter for the Power martingale.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[DetectIidChangePoint](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs)] /// ]]> /// </format> /// </example> public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int confidence, int changeHistoryLength, MartingaleType martingale = MartingaleType.Power, double eps = 0.1) => new IidChangePointEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, changeHistoryLength, inputColumnName, martingale, eps);
internal static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, params GlobalContrastNormalizingEstimator.ColumnOptions[] columns) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Creates a new output column, of boolean type, with the same number of slots as the input column. The value in the output column /// is true if the value in the input column is missing. /// </summary> /// <param name="catalog">The transform extensions' catalog.</param> /// <param name="columns">The names of the input columns of the transformation and the corresponding names for the output columns.</param> public static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, params ColumnOptions[] columns) => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, params NormalizingEstimator.ColumnOptionsBase[] columns) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, params LpNormNormalizingEstimator.ColumnOptions[] columns) => new LpNormNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Scores a dataset using a pre-traiend TensorFlow model specified via <paramref name="tensorFlowModel"/>. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="tensorFlowModel">The pre-trained TensorFlow model.</param> /// <param name="inputColumnNames"> The names of the model inputs.</param> /// <param name="outputColumnNames">The names of the requested model outputs.</param> public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog catalog, TensorFlowModelInfo tensorFlowModel, string[] outputColumnNames, string[] inputColumnNames) => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, tensorFlowModel);
/// <summary> /// Load TensorFlow model into memory. This is the convenience method that allows the model to be loaded once and subsequently use it for querying schema and creation of /// <see cref="TensorFlowEstimator"/> using <see cref="TensorFlowModel.ScoreTensorFlowModel(string, string)"/>. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="modelLocation">Location of the TensorFlow model.</param> public static TensorFlowModel LoadTensorFlowModel(this ModelOperationsCatalog catalog, string modelLocation) => TensorFlowUtils.LoadTensorFlowModel(CatalogUtils.GetEnvironment(catalog), modelLocation);
/// <summary> /// Transforms a categorical column into a set of features that includes the count of each label class, /// the log-odds for each label class and the back-off indicator. /// </summary> /// <param name="catalog">The transforms catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="initialCounts">A previously trained count table containing initial counts.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="labelColumn">The name of the label column.</param> /// <returns></returns> public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, string outputColumnName, CountTargetEncodingTransformer initialCounts, string inputColumnName = null, string labelColumn = "Label") { return(new CountTargetEncodingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, initialCounts, new[] { new InputOutputColumnPair(outputColumnName, inputColumnName) })); }
/// <summary> /// Takes column filled with a vector of floats and maps its to a random low-dimensional feature space. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="newDim">The number of random Fourier features to create.</param> /// <param name="useSin">Create two features for every random Fourier frequency? (one for cos and one for sin).</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[CreateRandomFourierFeatures](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)] /// ]]> /// </format> /// </example> public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, string outputColumnName, string inputColumnName = null, int newDim = RandomFourierFeaturizingEstimator.Defaults.NewDim, bool useSin = RandomFourierFeaturizingEstimator.Defaults.UseSin) => new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, newDim, useSin);
/// <summary> /// Create a text loader <see cref="TextLoader"/>. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="options">Defines the settings of the load operation.</param> /// <param name="dataSample">The optional location of a data sample. The sample can be used to infer slot name annotations if present, and also the number /// of slots in <see cref="TextLoader.Options.Columns"/> defined with <see cref="TextLoader.Range"/> with <see langword="null"/> maximum index. /// If the sample has been saved with ML.NET's <see cref="SaveAsText(DataOperationsCatalog, IDataView, Stream, char, bool, bool, bool, bool)"/>, /// it will also contain the schema information in the header that the loader can read even if <see cref="TextLoader.Options.Columns"/> are not specified. /// In order to use the schema defined in the file, all other <see cref="TextLoader.Options"/> sould be left with their default values.</param> public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, TextLoader.Options options, IMultiStreamSource dataSample = null) => new TextLoader(CatalogUtils.GetEnvironment(catalog), options, dataSample);
/// <summary> /// Transforms a categorical column into a set of features that includes the count of each label class, /// the log-odds for each label class and the back-off indicator. /// </summary> /// <param name="catalog">The transforms catalog.</param> /// <param name="columns">The input and output columns.</param> /// <param name="initialCounts">A previously trained count table containing initial counts.</param> /// <param name="labelColumn">The name of the label column.</param> /// <returns></returns> public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, InputOutputColumnPair[] columns, CountTargetEncodingTransformer initialCounts, string labelColumn = "Label") { return(new CountTargetEncodingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, initialCounts, columns)); }
/// <summary> /// Performs image classification using transfer learning. /// usage of this API requires additional NuGet dependencies on TensorFlow redist, see linked document for more information. /// <format type="text/markdown"> /// <![CDATA[ /// [!include[io](~/../docs/samples/docs/api-reference/tensorflow-usage.md)] /// ]]> /// </format> /// </summary> /// <param name="catalog"></param> /// <param name="featuresColumnName">The name of the input features column.</param> /// <param name="labelColumnName">The name of the labels column.</param> /// <param name="scoreColumnName">The name of the output score column.</param> /// <param name="predictedLabelColumnName">The name of the output predicted label columns.</param> /// <param name="arch">The architecture of the image recognition DNN model.</param> /// <param name="epoch">Number of training iterations. Each iteration/epoch refers to one pass over the dataset.</param> /// <param name="batchSize">The batch size for training.</param> /// <param name="learningRate">The learning rate for training.</param> /// <param name="disableEarlyStopping">Whether to disable use of early stopping technique. Training will go on for the full epoch count.</param> /// <param name="earlyStopping">Early stopping technique parameters to be used to terminate training when training metric stops improving.</param> /// <param name="metricsCallback">Callback for reporting model statistics during training phase.</param> /// <param name="statisticFrequency">Indicates the frequency of epochs at which to report model statistics during training phase.</param> /// <param name="framework">Indicates the choice of DNN training framework. Currently only tensorflow is supported.</param> /// <param name="modelSavePath">Optional name of the path where a copy new graph should be saved. The graph will be saved as part of model.</param> /// <param name="finalModelPrefix">The name of the prefix for the final mode and checkpoint files.</param> /// <param name="validationSet">Validation set.</param> /// <param name="testOnTrainSet">Indicates to evaluate the model on train set after every epoch.</param> /// <param name="reuseTrainSetBottleneckCachedValues">Indicates to not re-compute cached trainset bottleneck values if already available in the bin folder.</param> /// <param name="reuseValidationSetBottleneckCachedValues">Indicates to not re-compute validataionset cached bottleneck validationset values if already available in the bin folder.</param> /// <param name="trainSetBottleneckCachedValuesFilePath">Indicates the file path to store trainset bottleneck values for caching.</param> /// <param name="validationSetBottleneckCachedValuesFilePath">Indicates the file path to store validationset bottleneck values for caching.</param> /// <remarks> /// The support for image classification is under preview. /// </remarks> public static ImageClassificationEstimator ImageClassification( this ModelOperationsCatalog catalog, string featuresColumnName, string labelColumnName, string scoreColumnName = "Score", string predictedLabelColumnName = "PredictedLabel", Architecture arch = Architecture.InceptionV3, int epoch = 100, int batchSize = 10, float learningRate = 0.01f, bool disableEarlyStopping = false, EarlyStopping earlyStopping = null, ImageClassificationMetricsCallback metricsCallback = null, int statisticFrequency = 1, DnnFramework framework = DnnFramework.Tensorflow, string modelSavePath = null, string finalModelPrefix = "custom_retrained_model_based_on_", IDataView validationSet = null, bool testOnTrainSet = true, bool reuseTrainSetBottleneckCachedValues = false, bool reuseValidationSetBottleneckCachedValues = false, string trainSetBottleneckCachedValuesFilePath = "trainSetBottleneckFile.csv", string validationSetBottleneckCachedValuesFilePath = "validationSetBottleneckFile.csv" ) { var options = new ImageClassificationEstimator.Options() { ModelLocation = arch == Architecture.ResnetV2101 ? @"resnet_v2_101_299.meta" : @"InceptionV3.meta", InputColumns = new[] { featuresColumnName }, OutputColumns = new[] { scoreColumnName, predictedLabelColumnName }, LabelColumn = labelColumnName, TensorFlowLabel = labelColumnName, Epoch = epoch, LearningRate = learningRate, BatchSize = batchSize, EarlyStoppingCriteria = disableEarlyStopping ? null : earlyStopping == null ? new EarlyStopping() : earlyStopping, ScoreColumnName = scoreColumnName, PredictedLabelColumnName = predictedLabelColumnName, FinalModelPrefix = finalModelPrefix, Arch = arch, MetricsCallback = metricsCallback, StatisticsFrequency = statisticFrequency, Framework = framework, ModelSavePath = modelSavePath, ValidationSet = validationSet, TestOnTrainSet = testOnTrainSet, TrainSetBottleneckCachedValuesFilePath = trainSetBottleneckCachedValuesFilePath, ValidationSetBottleneckCachedValuesFilePath = validationSetBottleneckCachedValuesFilePath, ReuseTrainSetBottleneckCachedValues = reuseTrainSetBottleneckCachedValues, ReuseValidationSetBottleneckCachedValues = reuseValidationSetBottleneckCachedValues }; if (!File.Exists(options.ModelLocation)) { if (options.Arch == Architecture.InceptionV3) { var baseGitPath = @"https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/graph/InceptionV3.meta"; using (WebClient client = new WebClient()) { client.DownloadFile(new Uri($"{baseGitPath}"), @"InceptionV3.meta"); } baseGitPath = @"https://github.com/SciSharp/TensorFlow.NET/raw/master/data/tfhub_modules.zip"; using (WebClient client = new WebClient()) { client.DownloadFile(new Uri($"{baseGitPath}"), @"tfhub_modules.zip"); ZipFile.ExtractToDirectory(Path.Combine(Directory.GetCurrentDirectory(), @"tfhub_modules.zip"), @"tfhub_modules"); } } else if (options.Arch == Architecture.ResnetV2101) { var baseGitPath = @"https://aka.ms/mlnet-resources/image/ResNet101Tensorflow/resnet_v2_101_299.meta"; using (WebClient client = new WebClient()) { client.DownloadFile(new Uri($"{baseGitPath}"), @"resnet_v2_101_299.meta"); } } } var env = CatalogUtils.GetEnvironment(catalog); return(new ImageClassificationEstimator(env, options, DnnUtils.LoadDnnModel(env, options.ModelLocation, true))); }
/// <summary> /// Create a <see cref="DateTimeEstimator"/>, which splits up the input column specified by <paramref name="inputColumnName"/> /// into all its individual datetime components. Input column must be of type Int64 representing the number of seconds since the unix epoch. /// This transformer will append the <paramref name="columnPrefix"/> to all the output columns. If you specify a country, /// Holiday details will be looked up for that country as well. /// </summary> /// <param name="catalog">Transform catalog</param> /// <param name="inputColumnName">Input column name</param> /// <param name="columnPrefix">Prefix to add to the generated columns</param> /// <param name="country">Country name to get holiday details for</param> /// <returns><see cref="DateTimeEstimator"/></returns> public static DateTimeEstimator FeaturizeDateTime(this TransformsCatalog catalog, string inputColumnName, string columnPrefix, DateTimeEstimator.HolidayList country = DateTimeEstimator.HolidayList.None) => new DateTimeEstimator(CatalogUtils.GetEnvironment(catalog), inputColumnName, columnPrefix, country);
/// <summary> /// Create a new instance of <see cref="IidSpikeEstimator"/> that detects a spike in an /// <a href="https://en.wikipedia.org/wiki/Independent_and_identically_distributed_random_variables">independent identically distributed (i.i.d.)</a> time series. /// Detection is based on adaptive kernel density estimations and martingale scores. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/></param>. /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="confidence">The confidence for spike detection in the range [0, 100].</param> /// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param> /// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[DetectIidSpike](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs)] /// ]]> /// </format> /// </example> public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int confidence, int pvalueHistoryLength, AnomalySide side = AnomalySide.TwoSided) => new IidSpikeEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, pvalueHistoryLength, inputColumnName, side);
internal static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this TransformsCatalog catalog, params ApproximatedKernelMappingEstimator.ColumnOptions[] columns) => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Takes columns filled with a vector of floats and maps its to a random low-dimensional feature space. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns">The input columns to use for the transformation.</param> public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RandomFourierFeaturizingEstimator.ColumnInfo[] columns) => new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Normalize (rescale) the column according to the specified <paramref name="mode"/>. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="inputName">The column name</param> /// <param name="outputName">The column name</param> /// <param name="mode">The <see cref="NormalizingEstimator.NormalizerMode"/> used to map the old values in the new scale. </param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ConcatWith] (](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/MinMaxNormalizer.cs?line=36 )] /// ]]> /// </format> /// </example> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ConcatWith] (](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/MinMaxNormalizer.cs?range=6-11,16-89)] /// ]]> /// </format> /// </example> public static NormalizingEstimator Normalize(this TransformsCatalog catalog, string inputName, string outputName = null, NormalizingEstimator.NormalizerMode mode = NormalizingEstimator.NormalizerMode.MinMax) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), inputName, outputName, mode);
/// <summary> /// Takes column filled with a vector of floats and computes L-p norm of it. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="normKind">Type of norm to use to normalize each sample.</param> /// <param name="subMean">Subtract mean from each value before normalizing.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[LpNormalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)] /// ]]> /// </format> /// </example> public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.ProjectionTransforms catalog, string outputColumnName, string inputColumnName = null, LpNormalizingEstimatorBase.NormalizerKind normKind = LpNormalizingEstimatorBase.Defaults.NormKind, bool subMean = LpNormalizingEstimatorBase.Defaults.LpSubstractMean) => new LpNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, normKind, subMean);
/// <summary> /// Scores or retrains (based on setting of the <see cref="TensorFlowTransformer.Options.ReTrain"/>) a pre-traiend TensorFlow model specified via <paramref name="tensorFlowModel"/>. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="options">The <see cref="TensorFlowTransformer.Options"/> specifying the inputs and the settings of the <see cref="TensorFlowEstimator"/>.</param> /// <param name="tensorFlowModel">The pre-trained TensorFlow model.</param> public static TensorFlowEstimator TensorFlow(this TransformsCatalog catalog, TensorFlowTransformer.Options options, TensorFlowModelInfo tensorFlowModel) => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), options, tensorFlowModel);
/// <summary> /// Takes columns filled with a vector of floats and computes L-p norm of it. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns"> Describes the parameters of the lp-normalization process for each column pair.</param> public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.ProjectionTransforms catalog, params LpNormalizingEstimator.LpNormColumnInfo[] columns) => new LpNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// It normalizes the data based on the computed mean and variance of the logarithm of the data. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">List of Output and Input column pairs.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[Normalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs)] /// ]]> /// </format> /// </example> public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns.Select(column => new NormalizingEstimator.LogMeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, useCdf)).ToArray());
/// <summary> /// Takes column filled with a vector of floats and computes global contrast normalization of it. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="substractMean">Subtract mean from each value before normalizing.</param> /// <param name="useStdDev">Normalize by standard deviation rather than L2 norm.</param> /// <param name="scale">Scale features by this value.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[GlobalContrastNormalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)] /// ]]> /// </format> /// </example> public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog.ProjectionTransforms catalog, string outputColumnName, string inputColumnName = null, bool substractMean = LpNormalizingEstimatorBase.Defaults.GcnSubstractMean, bool useStdDev = LpNormalizingEstimatorBase.Defaults.UseStdDev, float scale = LpNormalizingEstimatorBase.Defaults.Scale) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, substractMean, useStdDev, scale);
/// <summary> /// Takes column filled with a vector of floats and normalize its <paramref name="norm"/> to one. By setting <paramref name="ensureZeroMean"/> to <see langword="true"/>, /// a pre-processing step would be applied to make the specified column's mean be a zero vector. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="norm">Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one.</param> /// <param name="ensureZeroMean">If <see langword="true"/>, subtract mean from each value before normalizing and use the raw input otherwise.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[LpNormalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)] /// ]]> /// </format> /// </example> public static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, LpNormNormalizingEstimatorBase.NormFunction norm = LpNormNormalizingEstimatorBase.Defaults.Norm, bool ensureZeroMean = LpNormNormalizingEstimatorBase.Defaults.LpEnsureZeroMean) => new LpNormNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, norm, ensureZeroMean);
/// <summary> /// Takes columns filled with a vector of floats and computes global contrast normalization of it. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns"> Describes the parameters of the gcn-normaliztion process for each column pair.</param> public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog.ProjectionTransforms catalog, params GlobalContrastNormalizingEstimator.GcnColumnInfo[] columns) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Takes column filled with a vector of floats and computes global contrast normalization of it. By setting <paramref name="ensureZeroMean"/> to <see langword="true"/>, /// a pre-processing step would be applied to make the specified column's mean be a zero vector. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="ensureZeroMean">If <see langword="true"/>, subtract mean from each value before normalizing and use the raw input otherwise.</param> /// <param name="ensureUnitStandardDeviation">If <see langword="true"/>, resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one.</param> /// <param name="scale">Scale features by this value.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[GlobalContrastNormalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)] /// ]]> /// </format> /// </example> public static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, bool ensureZeroMean = LpNormNormalizingEstimatorBase.Defaults.GcnEnsureZeroMean, bool ensureUnitStandardDeviation = LpNormNormalizingEstimatorBase.Defaults.EnsureUnitStdDev, float scale = LpNormNormalizingEstimatorBase.Defaults.Scale) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, ensureZeroMean, ensureUnitStandardDeviation, scale);
/// <summary> /// Takes column filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix, /// meaning that they are uncorrelated and each have variance 1. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="kind">Whitening kind (PCA/ZCA).</param> /// <param name="eps">Whitening constant, prevents division by zero.</param> /// <param name="maxRows">Maximum number of rows used to train the transform.</param> /// <param name="pcaNum">In case of PCA whitening, indicates the number of components to retain.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[VectorWhiten](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs)] /// ]]> /// </format> /// </example> public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, string outputColumnName, string inputColumnName = null, WhiteningKind kind = VectorWhiteningEstimator.Defaults.Kind, float eps = VectorWhiteningEstimator.Defaults.Eps, int maxRows = VectorWhiteningEstimator.Defaults.MaxRows, int pcaNum = VectorWhiteningEstimator.Defaults.PcaNum) => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, kind, eps, maxRows, pcaNum);
/// <summary> /// It normalizes the data based on the observed minimum and maximum values of the data. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">List of Output and Input column pairs.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns.Select(column => new NormalizingEstimator.MinMaxColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero)).ToArray());
/// <summary> /// Takes columns filled with a vector of random variables with a known covariance matrix into a set of new variables whose /// covariance is the identity matrix, meaning that they are uncorrelated and each have variance 1. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns">Describes the parameters of the whitening process for each column pair.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[VectorWhiten](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs)] /// ]]> /// </format> /// </example> public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningEstimator.ColumnOptions[] columns) => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Creates a new output column, or replaces the source with a new column /// (depending on whether the <paramref name="inputColumnName"/> is given a value, or left to null) /// of boolean type, with the same number of slots as the input column. The value in the output column /// is true if the value in the input column is missing. /// </summary> /// <param name="catalog">The transform extensions' catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// If left to <value>null</value> the <paramref name="inputColumnName"/> will get replaced.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[RPCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs)] /// ]]></format> /// </example> public static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null) => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName);
/// <summary> /// Create a new instance of <see cref="SsaSpikeEstimator"/> for detecting a spike in a time series signal /// using <a href="https://en.wikipedia.org/wiki/Singular_spectrum_analysis">Singular Spectrum Analysis (SSA)</a>. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// <param name="confidence">The confidence for spike detection in the range [0, 100].</param> /// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param> /// <param name="trainingWindowSize">The number of points from the beginning of the sequence used for training.</param> /// <param name="seasonalityWindowSize">An upper bound on the largest relevant seasonality in the input time-series.</param> /// The vector contains Alert, Raw Score, P-Value as first three values.</param> /// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param> /// <param name="errorFunction">The function used to compute the error between the expected and the observed value.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[DetectSpikeBySsa](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs)] /// ]]> /// </format> /// </example> public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int confidence, int pvalueHistoryLength, int trainingWindowSize, int seasonalityWindowSize, AnomalySide side = AnomalySide.TwoSided, ErrorFunction errorFunction = ErrorFunction.SignedDifference) => new SsaSpikeEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, pvalueHistoryLength, trainingWindowSize, seasonalityWindowSize, inputColumnName, side, errorFunction);