Example #1
0
        public static PredictionEngine <TIn, TOut> NaiveBayes <TIn, TOut>(IEnumerable <TIn> trainDataset, string outputColumnName = "PredictedLabel", Action <ITransformer> additionModelAction = null)
            where TIn : class, new()
            where TOut : class, new()
        {
            var context         = new MLContext();
            var type            = typeof(TIn);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessor = context.OneHotEncoding(properties);

            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);
            var pipeline       = context.Transforms.Conversion.MapValueToKey(labelColumnName)
                                 .Append(preprocessor.OneHotEncodingEstimator)
                                 .Append(context.Transforms.Concatenate("Features", preprocessor.CombinedFeatures.ToArray()))
                                 .Append(context.Transforms.ProjectToPrincipalComponents(outputColumnName: "PCAFeatures", inputColumnName: "Features", rank: 2))
                                 .AppendCacheCheckpoint(context)
                                 .Append(context.MulticlassClassification.Trainers.NaiveBayes(
                                             labelColumnName: labelColumnName,
                                             featureColumnName: "Features"
                                             ))
                                 .Append(context.Transforms.Conversion.MapKeyToValue(outputColumnName));
            var model         = pipeline.Fit(trainDataframe);
            var predictEngine = context.Model.CreatePredictionEngine <TIn, TOut>(model);

            additionModelAction?.Invoke(model);
            return(predictEngine);
        }
Example #2
0
        /// <summary>
        /// Create engine of Stochastic Dual Coordination Ascent (optimization as such Stochastic Gradient Descent) algorithm using training dataset and hyperparameters
        /// </summary>
        /// <typeparam name="TIn"></typeparam>
        /// <typeparam name="TOut"></typeparam>
        /// <param name="trainDataset">Enumerable of TIn type.</param>
        /// <param name="labelColumnName">The name of the label column.</param>
        /// <param name="outputColumnName">The name of the feature column.</param>
        /// <param name="exampleWeightColumnName">The name of the example weight column.</param>
        /// <param name="l2Regularization">The L2 regularization hyperparameter.</param>
        /// <param name="l1Regularization">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="maximumNumberOfIterations">The maximum number of passes to perform over the data.</param>
        /// <returns></returns>
        public static PredictionEngine <TIn, TOut> SdcaMaximumEntropy <TIn, TOut>(IEnumerable <TIn> trainDataset, string outputColumnName = "PredictedLabel", string exampleWeightColumnName = null, ISupportSdcaClassificationLoss loss = null, float?l2Regularization = null, float?l1Regularization = null, int?maximumNumberOfIterations = null, Action <ITransformer> additionModelAction = null)
            where TIn : class, new()
            where TOut : class, new()
        {
            var context         = new MLContext();
            var type            = typeof(TIn);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessor = context.OneHotEncoding(properties);


            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);
            var pipeline       = context.Transforms.Conversion.MapValueToKey(labelColumnName)
                                 .Append(preprocessor.OneHotEncodingEstimator)
                                 .Append(context.Transforms.Concatenate("Features", preprocessor.CombinedFeatures.ToArray()))
                                 .Append(context.Transforms.ProjectToPrincipalComponents(outputColumnName: "PCAFeatures", inputColumnName: "Features", rank: 2))
                                 .AppendCacheCheckpoint(context)
                                 .Append(context.MulticlassClassification.Trainers.SdcaMaximumEntropy(
                                             labelColumnName: labelColumnName,
                                             featureColumnName: "Features",
                                             exampleWeightColumnName: exampleWeightColumnName,
                                             l2Regularization: l2Regularization,
                                             l1Regularization: l1Regularization,
                                             maximumNumberOfIterations: maximumNumberOfIterations
                                             ))
                                 .Append(context.Transforms.Conversion.MapKeyToValue(outputColumnName));
            var model         = pipeline.Fit(trainDataframe);
            var predictEngine = context.Model.CreatePredictionEngine <TIn, TOut>(model);

            additionModelAction?.Invoke(model);
            return(predictEngine);
        }
Example #3
0
        public static PredictionEngine <TIn, TOut> LinearSVM <TIn, TOut>(
            IEnumerable <TIn> trainDataset,
            string exampleWeightColumnName            = null,
            int numberOfIterations                    = 1,
            Action <ITransformer> additionModelAction = null)
            where TIn : class, new()
            where TOut : class, new()
        {
            var context         = new MLContext();
            var type            = typeof(TIn);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessor = context.OneHotEncoding(properties);

            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);
            var pipeline       = context.Transforms.Concatenate("Features", preprocessor.CombinedFeatures.ToArray())
                                 .Append(preprocessor.OneHotEncodingEstimator)
                                 .AppendCacheCheckpoint(context)
                                 .Append(context.BinaryClassification.Trainers.LinearSvm(labelColumnName, featureColumnName: "Features", exampleWeightColumnName, numberOfIterations));

            var model         = pipeline.Fit(trainDataframe);
            var predictEngine = context.Model.CreatePredictionEngine <TIn, TOut>(model);

            additionModelAction?.Invoke(model);
            return(predictEngine);
        }
Example #4
0
        /// <summary>
        /// A base template of regression trainer which contains pre-processing likes OHE,PCA with any choosing algorithm.
        /// </summary>
        /// <typeparam name="TType">Type of training data.</typeparam>
        /// <typeparam name="TTrainer">Type of trainer algorithm.</typeparam>
        /// <param name="context">Microsoft.ML context.</param>
        /// <param name="trainDataset">Training dataset.</param>
        /// <param name="estimator">Algorithm estimator.</param>
        /// <returns>Model of training datatype from given estimator.</returns>
        private static TransformerChain <TTrainer> RegressionTrainerTemplate <TType, TTrainer>(this MLContext context, IEnumerable <TType> trainDataset, IEstimator <TTrainer> estimator)
            where TType : class, new()
            where TTrainer : class, ITransformer
        {
            var type            = typeof(TType);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessor   = context.OneHotEncoding(properties);
            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);

            var pipeline = context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: labelColumnName)
                           .Append(preprocessor.OneHotEncodingEstimator)
                           .Append(context.Transforms.Concatenate("Features", preprocessor.CombinedFeatures.ToArray()))
                           .Append(context.Transforms.ProjectToPrincipalComponents(outputColumnName: "PCAFeatures", inputColumnName: "Features", rank: 2))
                           .Append(estimator);
            var model = pipeline.Fit(trainDataframe);

            return(model);
        }
Example #5
0
        public static PredictionEngine <TIn, TOut> FastTree <TIn, TOut>(
            IEnumerable <TIn> trainDataset,
            string exampleWeightColumnName = null,
            int numberOfLeaves             = 20,
            int numberOfTrees = 100,
            int minimumExampleCountPerLeaft           = 10,
            double learningRate                       = 0.2,
            Action <ITransformer> additionModelAction = null)
            where TIn : class, new()
            where TOut : class, new()
        {
            var context         = new MLContext();
            var type            = typeof(TIn);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessor = context.OneHotEncoding(properties);

            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);
            var pipeline       =
                preprocessor.OneHotEncodingEstimator
                .Append(context.Transforms.Concatenate("Features", preprocessor.CombinedFeatures.ToArray()))
                .AppendCacheCheckpoint(context)
                .Append(context.BinaryClassification.Trainers.FastTree(
                            labelColumnName: labelColumnName,
                            featureColumnName: "Features",
                            exampleWeightColumnName: exampleWeightColumnName,
                            numberOfLeaves: numberOfLeaves,
                            numberOfTrees: numberOfTrees,
                            minimumExampleCountPerLeaf: minimumExampleCountPerLeaft,
                            learningRate: learningRate
                            ));

            var model         = pipeline.Fit(trainDataframe);
            var predictEngine = context.Model.CreatePredictionEngine <TIn, TOut>(model);

            additionModelAction?.Invoke(model);
            return(predictEngine);
        }
Example #6
0
        public static PredictionEngine <TIn, TOut> AveragedPerceptron <TIn, TOut>(
            IEnumerable <TIn> trainDataset,
            IClassificationLoss lossFunction = null,
            float learningRate        = 1f,
            bool decreaseLearningRate = false,
            float l2Regularization    = 0f,
            int numberOfIterations    = 1,
            Action <ITransformer> additionModelAction = null)
            where TIn : class, new()
            where TOut : class, new()
        {
            var context         = new MLContext();
            var type            = typeof(TIn);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessor = context.OneHotEncoding(properties);

            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);
            var pipeline       = context.Transforms.Concatenate("Features", preprocessor.CombinedFeatures.ToArray())
                                 .Append(preprocessor.OneHotEncodingEstimator)
                                 .AppendCacheCheckpoint(context)
                                 .Append(context.BinaryClassification.Trainers.AveragedPerceptron(
                                             labelColumnName,
                                             featureColumnName: "Features",
                                             lossFunction,
                                             learningRate,
                                             decreaseLearningRate,
                                             l2Regularization,
                                             numberOfIterations
                                             ));

            var model         = pipeline.Fit(trainDataframe);
            var predictEngine = context.Model.CreatePredictionEngine <TIn, TOut>(model);

            additionModelAction?.Invoke(model);
            return(predictEngine);
        }
Example #7
0
        public static PredictionEngine <TIn, TOut> KMeans <TIn, TOut>(IEnumerable <TIn> trainDataset, int numberOfClusters = 5, string exampleWeightColumnName = null, Action <ITransformer> additionModelAction = null)
            where TIn : class, new()
            where TOut : class, new()
        {
            var context         = new MLContext();
            var type            = typeof(TIn);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessing  = context.OneHotEncoding(properties);
            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);

            var pipeline = context.Transforms.Concatenate("Features", preprocessing.CombinedFeatures.ToArray())
                           .Append(context.Transforms.ProjectToPrincipalComponents(outputColumnName: "PCAFeatures", inputColumnName: "Features", rank: 2))
                           .Append(preprocessing.OneHotEncodingEstimator)
                           .Append(context.Clustering.Trainers.KMeans(featureColumnName: "Features", numberOfClusters: numberOfClusters));

            var model  = pipeline.Fit(trainDataframe);
            var engine = context.Model.CreatePredictionEngine <TIn, TOut>(model);

            additionModelAction?.Invoke(model);
            return(engine);
        }
Example #8
0
        /// <summary>
        /// Create engine of Limited-Memory Broyden–Fletcher–Goldfarb–Shanno algorithm using training dataset and hyperparameters
        /// </summary>
        /// <typeparam name="TIn"></typeparam>
        /// <typeparam name="TOut"></typeparam>
        /// <param name="trainDataset">Enumerable of TIn type.</param>
        /// <param name="labelColumnName">The name of the label column.</param>
        /// <param name="outputColumnName">The name of the feature column.</param>
        /// <param name="exampleWeightColumnName">The name of the example weight column.</param>
        /// <param name="l1Regularization">Weight of L1 regularization term.</param>
        /// <param name="l2Regularization">Weight of L2 regularization term.</param>
        /// <param name="optimizationTolerance">Threshold for optimizer convergence.</param>
        /// <param name="historySize">Memory size. Low=faster, less accurate.</param>
        /// <param name="enforceNonNegativity">Enforce non-negative weights.</param>
        /// <returns></returns>
        public static PredictionEngine <TIn, TOut> LbfgsMaximumEntropy <TIn, TOut>(IEnumerable <TIn> trainDataset, string outputColumnName = "PredictedLabel", string exampleWeightColumnName = null, float l1Regularization = 1, float l2Regularization = 1, double optimizationTolerance = 1e-07, int historySize = 20, bool enforceNonNegativity = false, Action <ITransformer> additionModelAction = null)
            where TIn : class, new()
            where TOut : class, new()
        {
            var context         = new MLContext();
            var type            = typeof(TIn);
            var labelColumnName = Preprocessing.LabelColumn(type.GetProperties()).Name;
            var properties      = Preprocessing.ExcludeColumns(type.GetProperties());

            var preprocessor = context.OneHotEncoding(properties);

            var trainDataframe = context.Data.LoadFromEnumerable(trainDataset);
            var pipeline       = context.Transforms.Conversion.MapValueToKey(labelColumnName)
                                 .Append(preprocessor.OneHotEncodingEstimator)
                                 .Append(context.Transforms.Concatenate("Features", preprocessor.CombinedFeatures.ToArray()))
                                 .Append(context.Transforms.ProjectToPrincipalComponents(outputColumnName: "PCAFeatures", inputColumnName: "Features", rank: 2))
                                 .AppendCacheCheckpoint(context)
                                 .Append(context.MulticlassClassification.Trainers.LbfgsMaximumEntropy(
                                             labelColumnName: labelColumnName,
                                             featureColumnName: "Features",
                                             exampleWeightColumnName: exampleWeightColumnName,
                                             l1Regularization: l1Regularization,
                                             l2Regularization: l2Regularization,
                                             optimizationTolerance: (float)optimizationTolerance,
                                             historySize: historySize,
                                             enforceNonNegativity: enforceNonNegativity
                                             )
                                         )
                                 .Append(context.Transforms.Conversion.MapKeyToValue(outputColumnName));


            var model         = pipeline.Fit(trainDataframe);
            var predictEngine = context.Model.CreatePredictionEngine <TIn, TOut>(model);

            additionModelAction?.Invoke(model);
            return(predictEngine);
        }