Exemplo n.º 1
0
        public IEstimator <ITransformer> ToEstimator()
        {
            IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>();

            // Append each transformer to the pipeline
            foreach (var transform in Transforms)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            // Get learner
            var learner = Trainer.BuildTrainer();

            if (_cacheBeforeTrainer)
            {
                pipeline = pipeline.AppendCacheCheckpoint(_context);
            }

            // Append learner to pipeline
            pipeline = pipeline.Append(learner);

            // Append each post-trainer transformer to the pipeline
            foreach (var transform in TransformsPostTrainer)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            return(pipeline);
        }
Exemplo n.º 2
0
        public void TrainFastForestOva(IEnumerable <TInput> trainingData, MultiClassOptions <TInput> multiClassOptions, FastForestOvaOptions fastForestOptions)
        {
            this.Options = multiClassOptions;

            // Data Preprocessing pipeline.
            var pipeline = this.ml.Transforms.Conversion.MapValueToKey(inputColumnName: this.Options.LabelName, outputColumnName: "Label")
                           .Append(this.ml.Transforms.Concatenate("Features", this.Options.FeatureColumnNames))
                           .AppendCacheCheckpoint(this.ml);

            // Training pipeline.
            var classifier = this.ml.BinaryClassification.Trainers.FastForest(numberOfLeaves: fastForestOptions.NumberOfLeaves, minimumExampleCountPerLeaf: fastForestOptions.MinimumExampleCountPerLeaf,
                                                                              numberOfTrees: fastForestOptions.NumberOfTrees, labelColumnName: "Label", featureColumnName: "Features");
            var multiClass = this.ml.MulticlassClassification.Trainers.OneVersusAll(classifier, labelColumnName: "Label");

            this.trainingPipeline = pipeline.Append(multiClass);

            // Training.
            var trainData = this.ml.Data.LoadFromEnumerable(trainingData);

            this.model = trainingPipeline
                         .Append(this.ml.Transforms.Conversion.MapKeyToValue("PredictedLabel"))
                         .Fit(trainData);

            this.inputSchema = trainData.Schema;

            this.predictionEngine = this.ml.Model.CreatePredictionEngine <TInput, PredictionOutput>(model);
        }
        /// <summary>
        /// Create a new reader estimator, by appending another estimator to the end of this reader estimator.
        /// </summary>
        public CompositeReaderEstimator <TSource, TNewTrans> Append <TNewTrans>(IEstimator <TNewTrans> estimator)
            where TNewTrans : class, ITransformer
        {
            Contracts.CheckValue(estimator, nameof(estimator));

            return(new CompositeReaderEstimator <TSource, TNewTrans>(_start, _estimatorChain.Append(estimator)));
        }
Exemplo n.º 4
0
        private ITransformer GetModel(AlgorithmType?algorithmType, EstimatorChain <TransformerChain <ColumnConcatenatingTransformer> > pipeline)
        {
            if (_predictedColumn.IsAlphanumeric)
            {
                return(algorithmType != null
                                        ? pipeline.Append(GetAlgorithm(algorithmType.Value)).Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView)
                                        : pipeline.Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView));
            }

            if (algorithmType != null)
            {
                return(pipeline.Append(GetAlgorithm(algorithmType.Value)).Fit(DataView));
            }

            return(pipeline.Fit(DataView));
        }
Exemplo n.º 5
0
        private static EstimatorChain <RegressionPredictionTransformer <LinearRegressionPredictor> > PerformStep3(MLContext mlContext, EstimatorChain <ITransformer> dataProcessPipeline)
        {
            var trainer =
                mlContext.Regression.Trainers.StochasticDualCoordinateAscent("Label", "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            return(trainingPipeline);
        }
Exemplo n.º 6
0
        public static EstimatorChain <KeyToValueMappingTransformer> BuildAndTrainModel(IDataView trainingDataView, EstimatorChain <ITransformer> pipeline)
        {
            // STEP 3: Create the training algorithm/trainer
            // Use the multi-class SDCA model to predict the label using features.
            // <SnippetSdcaMultiClassTrainer>
            var trainer = new SdcaMultiClassTrainer(_mlContext, DefaultColumnNames.Label, DefaultColumnNames.Features);
            // </SnippetSdcaMultiClassTrainer>

            //Set the trainer/algorithm and map label to value (original readable state)
            // <SnippetAddTrainer>
            var trainingPipeline = pipeline.Append(trainer)
                                   .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // </SnippetAddTrainer>

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine($"=============== Training the model  ===============");

            // <SnippetTrainModel>
            _trainedModel = trainingPipeline.Fit(trainingDataView);
            // </SnippetTrainModel>
            Console.WriteLine($"=============== Finished Training the model Ending time: {DateTime.Now.ToString()} ===============");

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            Console.WriteLine($"=============== Single Prediction just-trained-model ===============");

            // Create prediction engine related to the loaded trained model
            // <SnippetCreatePredictionEngine>
            _predEngine = _trainedModel.CreatePredictionEngine <GitHubIssue, IssuePrediction>(_mlContext);
            // </SnippetCreatePredictionEngine>
            // <SnippetCreateTestIssue1>
            GitHubIssue issue = new GitHubIssue()
            {
                Title       = "WebSockets communication is slow in my machine",
                Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
            };
            // </SnippetCreateTestIssue1>

            // <SnippetPredict>
            var prediction = _predEngine.Predict(issue);

            // </SnippetPredict>

            // <SnippetOutputPrediction>
            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            // </SnippetOutputPrediction>

            // Save the new model to .ZIP file
            // <SnippetCallSaveModel>
            SaveModelAsFile(_mlContext, _trainedModel);
            // </SnippetCallSaveModel>

            // <SnippetReturnModel>
            return(trainingPipeline);
            // </SnippetReturnModel>
        }
Exemplo n.º 7
0
        public IPipelineChain BuildPipeline()
        {
            if (_predictedColumn == null)
            {
                throw new ArgumentNullException(nameof(_predictedColumn));
            }
            if (_algorithmType == null)
            {
                throw new ArgumentNullException(nameof(_algorithmType));
            }

            var keyMap        = _predictedColumn.IsAlphanumeric ? MlContext.Transforms.Conversion.MapValueToKey(_predictedColumn.ColumnName) : null;
            var keyConversion = _predictedColumn.DataKind != null?MlContext.Transforms.Conversion.ConvertType(_predictedColumn.ColumnName, outputKind : _predictedColumn.DataKind.Value) : null;

            var keyColumn = MlContext.Transforms.CopyColumns("Label", _predictedColumn.ColumnName);

            if (_alphanumericColumns != null)
            {
                OneHotEncodingEstimator oneHotEncodingTransformer = null;
                EstimatorChain <OneHotEncodingTransformer> oneHotEncodingTransformerChain = null;
                if (_alphanumericColumns != null)
                {
                    for (int i = 0; i < _alphanumericColumns.Length; i++)
                    {
                        if (oneHotEncodingTransformer == null)
                        {
                            oneHotEncodingTransformer = MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]);
                        }
                        else if (oneHotEncodingTransformerChain == null)
                        {
                            oneHotEncodingTransformerChain = oneHotEncodingTransformer.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]));
                        }
                        else
                        {
                            oneHotEncodingTransformerChain = oneHotEncodingTransformerChain.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]));
                        }
                    }
                }

                var columnConcatenatingTransformer = oneHotEncodingTransformerChain?.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns)) ??
                                                     oneHotEncodingTransformer.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns));
                _transformerChain = _predictedColumn.IsAlphanumeric ?
                                    keyMap.Append(keyColumn).Append(columnConcatenatingTransformer) :
                                    _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(columnConcatenatingTransformer) : keyColumn.Append(columnConcatenatingTransformer);
            }
            else
            {
                var featureColumn = MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns);
                _estimatorChain = _predictedColumn.IsAlphanumeric ?
                                  keyMap.Append(keyColumn).Append(featureColumn) :
                                  _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(featureColumn) : keyColumn.Append(featureColumn);
            }

            return(this);
        }
Exemplo n.º 8
0
        public static ITransformer WaitTimeTrainWithSdca(MLContext mlContext, IDataView trainData, EstimatorChain <ColumnConcatenatingTransformer> basePipeline)
        {
            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithSdca start");
            IDataView dataView = trainData;
            var       pipeline = basePipeline.Append(mlContext.Regression.Trainers.Sdca());

            var waitTimeModel = pipeline.Fit(dataView);

            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithSdca end");
            return(waitTimeModel);
        }
Exemplo n.º 9
0
        public ITransformer TrainTransformer(IDataView trainData)
        {
            IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>();

            // append each transformer to the pipeline
            foreach (var transform in Transforms)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            // get learner
            var learner = Trainer.BuildTrainer(_context);

            // append learner to pipeline
            pipeline = pipeline.Append(learner);

            return(pipeline.Fit(trainData));
        }
Exemplo n.º 10
0
        public static IEstimator <ITransformer> InferTransforms(this TransformsCatalog catalog, IDataView data, string label)
        {
            var mlContext           = new MLContext();
            var suggestedTransforms = TransformInferenceApi.InferTransforms(mlContext, data, label);
            var estimators          = suggestedTransforms.Select(s => s.Estimator);
            var pipeline            = new EstimatorChain <ITransformer>();

            foreach (var estimator in estimators)
            {
                pipeline = pipeline.Append(estimator);
            }
            return(pipeline);
        }
Exemplo n.º 11
0
        private ITransformer BuildAndTrainUsingParams(ColumnEnum column)
        {
            List <string>            features        = new List <string>();
            TextFeaturizingEstimator textTransformer = null;
            EstimatorChain <ColumnConcatenatingTransformer> estimatorColumn = null;
            EstimatorChain <ITransformer> estimatorTransformer = null;

            if (_includeDay)
            {
                textTransformer = _mlContext.Transforms.Text.FeaturizeText("DayString", "Day");
                features.Add("DayString");
            }
            if (_includeMonth)
            {
                if (textTransformer != null)
                {
                    estimatorTransformer = textTransformer.Append(_mlContext.Transforms.Text.FeaturizeText("MonthString", "Month"));
                }
                else
                {
                    textTransformer = _mlContext.Transforms.Text.FeaturizeText("MonthString", "Month");
                }
                features.Add("MonthString");
            }
            if (_includeWeek)
            {
                features.Add("Week");
            }

            if (textTransformer == null)
            {
                var res = _mlContext.Transforms.Concatenate("Features", features.ToArray())
                          .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                          .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

                return(res.Fit(_trainData));
            }
            if (estimatorTransformer != null)
            {
                var res2 = estimatorTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                           .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                           .Append(_mlContext.Regression.Trainers.FastTreeTweedie());
                return(res2.Fit(_trainData));
            }
            var res3 = textTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                       .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                       .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

            return(res3.Fit(_trainData));
        }
Exemplo n.º 12
0
        public EstimatorChain <ITransformer> BuildFromParameters(IDictionary <string, string> parameters)
        {
            var pipeline = new EstimatorChain <ITransformer>();

            for (int i = 0; i < this.Estimators.Count; i++)
            {
                if (this.Estimators[i] == SweepableEstimator <IEstimator <ITransformer> > .EmptyNode)
                {
                    continue;
                }

                pipeline = pipeline.Append(this.Estimators[i].BuildFromParameters(parameters), this.Estimators[i].Scope);
            }

            return(pipeline);
        }
Exemplo n.º 13
0
        private static void CreateModel()
        {
            var options = new MatrixFactorizationTrainer.Options
            {
                LabelColumnName             = nameof(InputModel.Rating),
                MatrixColumnIndexColumnName = "Encoded_UserID",
                MatrixRowIndexColumnName    = "Encoded_Book",
                NumberOfIterations          = 100,
                ApproximationRank           = 100
            };

            var trainer  = _context.Recommendation().Trainers.MatrixFactorization(options);
            var pipeline = estimator.Append(trainer);

            model = pipeline.Fit(splitData.TrainSet);
        }
        // ===========================================================================================================


        public void BuildTrainingPipelineAndModel()
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                EstimatorChain <NormalizingTransformer> dataProcessPipeline =
                    _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                    .Append(_mlContext.Transforms.Concatenate(
                                outputColumnName: "Features",
                                inputColumnNames: FeatureNames.ToArray()))
                    .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                    .AppendCacheCheckpoint(_mlContext);

                EstimatorChain <KeyToValueMappingTransformer> trainer =
                    _mlContext.MulticlassClassification.Trainers.OneVersusAll(
                        _mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label",
                                                                                    numberOfIterations: 1, featureColumnName: "Features"), labelColumnName: "Label")
                    .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                IEstimator <ITransformer> trainingPipeline =
                    dataProcessPipeline.Append(trainer);

                // TODO
                // Evaluate quality of Model
                // Evaluate(_mlContext, _trainingDataView, trainingPipeline);

                // Train Model
                _mlModel = trainingPipeline.Fit(_trainingDataView);

                // TODO
                // Save _mlModel
                // SaveModel(_mlContext, mlModel, ModelSettings.ModelFilePath, _trainingDataView.Schema);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }
Exemplo n.º 15
0
        private EstimatorChain <KeyToValueMappingTransformer> BuildAndTrainModel(IDataView trainingDataView,
                                                                                 EstimatorChain <ITransformer> pipeline)
        {
            var trainer          = new SdcaMultiClassTrainer(_mlContext, DefaultColumnNames.Label, DefaultColumnNames.Features);
            var trainingPipeline = pipeline.Append(trainer)
                                   .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            _trainedModel = trainingPipeline.Fit(trainingDataView);
            _predEngine   = _trainedModel.CreatePredictionEngine <ConformChecker, CheckerPrediction>(_mlContext);
            var conf = new ConformChecker
            {
                Name = "Электронный аукцион"
            };
            var prediction = _predEngine.Predict(conf);

            SaveModelAsFile(_mlContext, _trainedModel);
            return(trainingPipeline);
        }
Exemplo n.º 16
0
        public static ITransformer WaitTimeTrainWithLightGbm(MLContext mlContext, IDataView trainData, EstimatorChain <ColumnConcatenatingTransformer> basePipeline)
        {
            var options = new LightGbmRegressionTrainer.Options
            {
                NumberOfIterations         = 100,
                NumberOfLeaves             = 10,
                MinimumExampleCountPerLeaf = 18,
                LearningRate = 0.09
            };

            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithLightGbm start");
            IDataView dataView = trainData;
            var       pipeline = basePipeline.Append(mlContext.Regression.Trainers.LightGbm(options));

            var waitTimeModel = pipeline.Fit(dataView);

            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithLightGbm end");
            return(waitTimeModel);
        }
        public static IEstimator <ITransformer> GetConcatEstimator(IHostEnvironment env, ManyToOneColumn[] columns)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(columns, nameof(columns));

            var estimator = new EstimatorChain <ITransformer>();

            foreach (var col in columns)
            {
                env.CheckUserArg(col != null, nameof(WordBagBuildingTransformer.Options.Columns));
                env.CheckUserArg(!string.IsNullOrWhiteSpace(col.Name), nameof(col.Name));
                env.CheckUserArg(Utils.Size(col.Source) > 0, nameof(col.Source));
                env.CheckUserArg(col.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(col.Source));
                if (col.Source.Length > 1)
                {
                    estimator = estimator.Append <ITransformer>(new ColumnConcatenatingEstimator(env, col.Name, col.Source));
                }
            }
            return(estimator);
        }
Exemplo n.º 18
0
        public static EstimatorChain <KeyToValueMappingTransformer> BuildAndTrainModel(IDataView trainingDataView, EstimatorChain <ITransformer> pipeline)
        {
            var trainer          = new SdcaMultiClassTrainer(_mlContext, DefaultColumnNames.Label, DefaultColumnNames.Features);
            var trainingPipeline = pipeline.Append(trainer)
                                   .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            Console.WriteLine($"=============== Training the model  ===============");
            _trainedModel = trainingPipeline.Fit(trainingDataView);
            Console.WriteLine($"=============== Finished Training the model Ending time: {DateTime.Now.ToString()} ===============");
            Console.WriteLine($"=============== Single Prediction just-trained-model ===============");
            _predEngine = _trainedModel.CreatePredictionEngine <СonformChecker, CheckerPrediction>(_mlContext);
            СonformChecker conf = new СonformChecker()
            {
                Name = "Электронный аукцион"
            };
            var prediction = _predEngine.Predict(conf);

            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Con} ===============");
            SaveModelAsFile(_mlContext, _trainedModel);
            return(trainingPipeline);
        }
Exemplo n.º 19
0
        public static OneHotEncodingPreprocessor OneHotEncoding(this MLContext context, IEnumerable <PropertyInfo> properties, string encodedFormat = "{0}_encoded")
        {
            var ohePreprocessor = new OneHotEncodingPreprocessor();
            var features        = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <OneHotEncodingColumn>(true);
                if (attribute == null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var needToEncodeFeatures = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <OneHotEncodingColumn>(true);
                if (attribute != null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var oheEstimator = new EstimatorChain <OneHotEncodingTransformer>();
            List <CombinedFeature> combinedFeatures = new List <CombinedFeature>();

            foreach (var feature in needToEncodeFeatures)
            {
                var encoded = string.Format(encodedFormat, feature);
                oheEstimator = oheEstimator.Append(context.Transforms.Categorical.OneHotEncoding(inputColumnName: feature, outputColumnName: encoded));
                combinedFeatures.Add(new CombinedFeature
                {
                    Feature        = feature,
                    EncodedFeature = encoded
                });
            }
            ohePreprocessor.OneHotEncodingEstimator = oheEstimator;
            ohePreprocessor.CombinedFeatures        = Shared.Enumerator.CombineEnumerable(features, combinedFeatures.Select(x => x.EncodedFeature));
            return(ohePreprocessor);
        }
Exemplo n.º 20
0
        public static KeyToValueMappingPreprocessor KeyToValueMapping(this MLContext context, IEnumerable <PropertyInfo> properties, string encodedFormat = "{0}")
        {
            var ktvPreprocessor = new KeyToValueMappingPreprocessor();
            var features        = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <KeyToValueColumn>(true);
                if (attribute == null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var needToEncodeFeatures = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <KeyToValueColumn>(true);
                if (attribute != null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var vtkEstimator = new EstimatorChain <KeyToValueMappingTransformer>();
            List <CombinedFeature> combinedFeatures = new List <CombinedFeature>();

            foreach (var feature in needToEncodeFeatures)
            {
                var encoded = string.Format(encodedFormat, feature);
                vtkEstimator = vtkEstimator.Append(context.Transforms.Conversion.MapKeyToValue(inputColumnName: feature, outputColumnName: encoded));
                combinedFeatures.Add(new CombinedFeature
                {
                    Feature        = feature,
                    EncodedFeature = encoded
                });
            }
            ktvPreprocessor.KeyToValueMappingEstimator = vtkEstimator;
            ktvPreprocessor.CombinedFeatures           = Shared.Enumerator.CombineEnumerable(features, combinedFeatures.Select(x => x.EncodedFeature));
            return(ktvPreprocessor);
        }
Exemplo n.º 21
0
        private static Task <ITransformer> TrainAndGetBestModel(string FilePath)
        {
            return(Task.Factory.StartNew(() =>
            {
                MLContext MLC = MLCProvider.Current;

                IDataView TrainingDataView = MLC.Data.LoadFromTextFile <BookRating>(FilePath, ',', true);
                TrainingDataView = MLC.Data.Cache(TrainingDataView);

                Console.WriteLine("=============== 正在读取训练数据文件 ===============");

                EstimatorChain <ColumnConcatenatingTransformer> DataPipeLine = MLC.Transforms.Text.FeaturizeText("UserIdFeaturized", nameof(BookRating.UserId))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("ISBNFeaturized", nameof(BookRating.ISBN)))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("AgeFeaturized", nameof(BookRating.Age)))
                                                                               .Append(MLC.Transforms.Concatenate("Features", "UserIdFeaturized", "ISBNFeaturized", "AgeFeaturized"));

                Console.WriteLine("=============== 正在使用交叉验证训练预测模型 ===============");


                FieldAwareFactorizationMachineTrainer.Options Options = new FieldAwareFactorizationMachineTrainer.Options
                {
                    Verbose = true,
                    NumberOfIterations = 10,
                    FeatureColumnName = "Features",
                    Shuffle = true
                };

                EstimatorChain <FieldAwareFactorizationMachinePredictionTransformer> TrainingPipeLine = DataPipeLine.Append(MLC.BinaryClassification.Trainers.FieldAwareFactorizationMachine(Options));

                var CVResult = MLC.BinaryClassification.CrossValidate(TrainingDataView, TrainingPipeLine);

                return CVResult.OrderByDescending(t => t.Metrics.Accuracy).Select(r => r.Model).FirstOrDefault();
            }, TaskCreationOptions.LongRunning));
        }
        public void TestSingleSentence2Classes()
        {
            var dataView = ML.Data.LoadFromEnumerable(
                new List <TestSingleSentenceData>(new TestSingleSentenceData[] {
                new TestSingleSentenceData()
                {       // Testing longer than 512 words.
                    Sentence1 = "ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .",
                    Sentiment = "Negative"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "with a sharp script and strong performances",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "that director m. night shyamalan can weave an eerie spell and",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "comfortable",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "does have its charms .",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "banal as the telling",
                    Sentiment = "Negative"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "faithful without being forceful , sad without being shrill , `` a walk to remember '' succeeds through sincerity .",
                    Sentiment = "Negative"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "leguizamo 's best movie work so far",
                    Sentiment = "Negative"
                }
            }));
            var chain     = new EstimatorChain <ITransformer>();
            var estimator = chain.Append(ML.Transforms.Conversion.MapValueToKey("Label", "Sentiment"), TransformerScope.TrainTest)
                            .Append(ML.MulticlassClassification.Trainers.TextClassification(outputColumnName: "outputColumn"))
                            .Append(ML.Transforms.Conversion.MapKeyToValue("outputColumn"));

            TestEstimatorCore(estimator, dataView);
            var estimatorSchema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema));

            Assert.Equal(5, estimatorSchema.Count);
            Assert.Equal("outputColumn", estimatorSchema[3].Name);
            Assert.Equal(TextDataViewType.Instance, estimatorSchema[3].ItemType);

            var transformer       = estimator.Fit(dataView);
            var transformerSchema = transformer.GetOutputSchema(dataView.Schema);

            var filteredModel = transformer.GetModelFor(TransformerScope.Scoring);

            Assert.Equal(6, transformerSchema.Count);
            Assert.Equal("outputColumn", transformerSchema[4].Name);
            Assert.Equal(TextDataViewType.Instance, transformerSchema[4].Type);

            var dataNoLabel = ML.Data.LoadFromEnumerable(
                new List <TestSingleSentenceDataNoLabel>(new TestSingleSentenceDataNoLabel[] {
                new ()
                {       // Testing longer than 512 words.
                    Sentence1 = "ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .",
                },
                new ()
                {
                    Sentence1 = "with a sharp script and strong performances",
                },
                new ()
                {
                    Sentence1 = "that director m. night shyamalan can weave an eerie spell and",
                },
                new ()
                {
                    Sentence1 = "comfortable",
                },
                new ()
                {
                    Sentence1 = "does have its charms .",
                },
                new ()
                {
                    Sentence1 = "banal as the telling",
                },
                new ()
                {
                    Sentence1 = "faithful without being forceful , sad without being shrill , `` a walk to remember '' succeeds through sincerity .",
                },
                new ()
                {
                    Sentence1 = "leguizamo 's best movie work so far",
                }
            }));
Exemplo n.º 23
0
        /// <summary>
        /// Command to parse the model component from the model JSON and add it to the data pipeline.
        /// </summary>
        /// <param name="pipeline">Reference to the EstimatorChain that is being appended to.</param>
        /// <param name="modelType">The string representing the type of component to add to the pipeline.</param>
        /// <param name="componentObject">The JSON configuration of the model component being appended.</param>
        /// <returns>True if successful, false otherwise.</returns>
        private bool _ParseComponentJson(ref EstimatorChain <ITransformer> pipeline, string modelType, JToken componentObject)
        {
            switch (modelType)
            {
            //------------------------ A ---------------------------------------------------------------------------------
            case "ApplyOnnxModel":
                pipeline.Append(MLContext._ApplyOnnxModel(componentObject));
                return(true);

            case "ApplyWordEmbedding":
                pipeline.Append(MLContext._ApplyWordEmbedding(componentObject));
                return(true);

            case "ApproximatedKernelMap":
                pipeline.Append(MLContext._ApproximatedKernelMap(componentObject));
                return(true);

            case "AveragedPerceptron":
                pipeline.Append(MLContext._AveragedPerceptronTrainer(componentObject));
                return(true);


            //------------------------ C ---------------------------------------------------------------------------------
            case "CalculateFeatureContribution":
                pipeline.Append(MLContext._CalculateFeatureContribution(componentObject));
                return(true);

            case "Concatenate":
                pipeline.Append(MLContext._Concatenate(componentObject));
                return(true);

            case "ConvertToGreyscale":
                pipeline.Append(MLContext._ConvertToGrayscale(componentObject));
                return(true);

            case "ConvertToImage":
                pipeline.Append(MLContext._ConvertToImage(componentObject));
                return(true);

            case "ConvertType":
                pipeline.Append(MLContext._ConvertType(componentObject));
                return(true);

            case "CopyColumns":
                pipeline.Append(MLContext._CopyColumns(componentObject));
                return(true);


            //------------------------ D ---------------------------------------------------------------------------------
            case "DetectAnomalyBySrCnn":
                pipeline.Append(MLContext._DetectAnomalyBySrCnn(componentObject));
                return(true);

            case "DetectChangePointBySsa":
                pipeline.Append(MLContext._DetectChangePointBySsa(componentObject));
                return(true);

            case "DetectIidChangePoint":
                pipeline.Append(MLContext._DetectIidChangePoint(componentObject));
                return(true);

            case "DetectIidSpike":
                pipeline.Append(MLContext._DetectIidSpike(componentObject));
                return(true);

            case "DetectSpikeBySsa":
                pipeline.Append(MLContext._DetectSpikeBySsa(componentObject));
                return(true);

            case "DnnFeaturizeImage":
                pipeline.Append(MLContext._DnnFeaturizeImage(componentObject));
                return(true);

            case "DropColumns":
                pipeline.Append(MLContext._DropColumns(componentObject));
                return(true);


            //------------------------ E ---------------------------------------------------------------------------------
            case "ExtractPixels":
                pipeline.Append(MLContext._ExtractPixels(componentObject));
                return(true);


            //------------------------ F ---------------------------------------------------------------------------------
            case "FastForestBinary":
                pipeline.Append(MLContext._FastForestBinaryTrainer(componentObject));
                return(true);

            case "FastForestRegression":
                pipeline.Append(MLContext._FastForestRegressionTrainer(componentObject));
                return(true);

            case "FastTreeBinary":
                pipeline.Append(MLContext._FastTreeBinaryTrainer(componentObject));
                return(true);

            case "FastTreeRanking":
                pipeline.Append(MLContext._FastTreeRankingTrainer(componentObject));
                return(true);

            case "FastTreeRegression":
                pipeline.Append(MLContext._FastTreeRegressionTrainer(componentObject));
                return(true);

            case "FastTreeTweedie":
                pipeline.Append(MLContext._FastTreeTweedieTrainer(componentObject));
                return(true);

            case "FeaturizeText":
                pipeline.Append(MLContext._FeaturizeText(componentObject));
                return(true);

            case "FieldAwareFactorizationMachine":
                pipeline.Append(MLContext._FieldAwareFactorizationMachineTrainer(componentObject));
                return(true);

            case "ForecastBySsa":
                pipeline.Append(MLContext._ForecastBySsa(componentObject));
                return(true);


            //------------------------ G ---------------------------------------------------------------------------------
            case "GamBinary":
                pipeline.Append(MLContext._GamBinaryTrainer(componentObject));
                return(true);

            case "GamRegression":
                pipeline.Append(MLContext._GamRegressionTrainer(componentObject));
                return(true);


            //------------------------ H ---------------------------------------------------------------------------------
            case "Hash":
                pipeline.Append(MLContext._Hash(componentObject));
                return(true);


            //------------------------ I ---------------------------------------------------------------------------------
            case "IndicateMissingValues":
                pipeline.Append(MLContext._IndicateMissingValues(componentObject));
                return(true);


            //------------------------ K ---------------------------------------------------------------------------------
            case "KMeans":
                pipeline.Append(MLContext._KMeansTrainer(componentObject));
                return(true);


            //------------------------ L ---------------------------------------------------------------------------------
            case "LatentDirichletAllocation":
                pipeline.Append(MLContext._LatentDirichletAllocation(componentObject));
                return(true);

            case "LbfgsLogisticRegressionBinary":
                pipeline.Append(MLContext._LbfgsLogisticRegressionBinaryTrainer(componentObject));
                return(true);

            case "LbfgsMaximumEntropyMulticlass":
                pipeline.Append(MLContext._LbfgsMaximumEntropyMulticlassTrainer(componentObject));
                return(true);

            case "LbfgsPoissonRegression":
                pipeline.Append(MLContext._LbfgsPoissonRegressionTrainer(componentObject));
                return(true);

            case "LightGbmBinary":
                pipeline.Append(MLContext._LightGbmBinaryTrainer(componentObject));
                return(true);

            case "LightGbmMulticlass":
                pipeline.Append(MLContext._LightGbmMulticlassTrainer(componentObject));
                return(true);

            case "LightGbmRanking":
                pipeline.Append(MLContext._LightGbmRankingTrainer(componentObject));
                return(true);

            case "LightGbmRegression":
                pipeline.Append(MLContext._LightGbmRegressionTrainer(componentObject));
                return(true);

            case "LinearSvm":
                pipeline.Append(MLContext._LinearSvmTrainer(componentObject));
                return(true);

            case "LoadImages":
                pipeline.Append(MLContext._LoadImages(componentObject));
                return(true);

            case "LoadTensorFlowModel":
                pipeline.Append(MLContext._LoadTensorFlowModel(componentObject));
                return(true);


            //------------------------ M ---------------------------------------------------------------------------------
            case "MapKeyToBinaryVector":
                pipeline.Append(MLContext._MapKeyToBinaryVector(componentObject));
                return(true);

            case "MapKeyToValue":
                pipeline.Append(MLContext._MapKeyToValue(componentObject));
                return(true);

            case "MapKeyToVector":
                pipeline.Append(MLContext._MapKeyToVector(componentObject));
                return(true);

            case "MapValue":
                pipeline.Append(MLContext._MapValue(componentObject));
                return(true);

            case "MapValueToKey":
                pipeline.Append(MLContext._MapValueToKey(componentObject));
                return(true);

            case "MatrixFactorization":
                pipeline.Append(MLContext._MatrixFactorizationTrainer(componentObject));
                return(true);


            //------------------------ N ---------------------------------------------------------------------------------
            case "NaiveBayesMulticlass":
                pipeline.Append(MLContext._NaiveBayesMulticlassTrainer(componentObject));
                return(true);

            case "NormalizeBinning":
                pipeline.Append(MLContext._NormalizeBinning(componentObject));
                return(true);

            case "NormalizeGlobalContrast":
                pipeline.Append(MLContext._NormalizeGlobalContrast(componentObject));
                return(true);

            case "NormalizeLogMeanVariance":
                pipeline.Append(MLContext._NormalizeLogMeanVariance(componentObject));
                return(true);

            case "NormalizeLpNorm":
                pipeline.Append(MLContext._NormalizeLpNorm(componentObject));
                return(true);

            case "NormalizeMeanVariance":
                pipeline.Append(MLContext._NormalizeMeanVariance(componentObject));
                return(true);

            case "NormalizeMinMax":
                pipeline.Append(MLContext._NormalizeMinMax(componentObject));
                return(true);

            case "NormalizeSupervisedBinning":
                pipeline.Append(MLContext._NormalizeSupervisedBinning(componentObject));
                return(true);

            case "NormalizeText":
                pipeline.Append(MLContext._NormalizeText(componentObject));
                return(true);


            //------------------------ O ---------------------------------------------------------------------------------
            case "OneHotEncoding":
                pipeline.Append(MLContext._OneHotEncoding(componentObject));
                return(true);

            case "OneHotHashEncoding":
                pipeline.Append(MLContext._OneHotHashEncoding(componentObject));
                return(true);

            case "OnlineGradientDescent":
                pipeline.Append(MLContext._OnlineGradientDescentTrainer(componentObject));
                return(true);

            case "OlsTrainer":
                pipeline.Append(MLContext._OlsTrainer(componentObject));
                return(true);


            //------------------------ P ---------------------------------------------------------------------------------
            case "Prior":
                pipeline.Append(MLContext._PriorTrainer(componentObject));
                return(true);

            case "ProduceHashedNgrams":
                pipeline.Append(MLContext._ProduceHashedNgrams(componentObject));
                return(true);

            case "ProduceHashedWordBags":
                pipeline.Append(MLContext._ProduceHashedWordBags(componentObject));
                return(true);

            case "ProduceNgrams":
                pipeline.Append(MLContext._ProduceNgrams(componentObject));
                return(true);

            case "ProduceWordBags":
                pipeline.Append(MLContext._ProduceWordBags(componentObject));
                return(true);

            case "ProjectToPrincipalComponents":
                pipeline.Append(MLContext._ProjectToPrincipalComponents(componentObject));
                return(true);


            //------------------------ R ---------------------------------------------------------------------------------
            case "RandomizedPca":
                pipeline.Append(MLContext._RandomizedPcaTrainer(componentObject));
                return(true);

            case "RemoveDefaultStopWords":
                pipeline.Append(MLContext._RemoveDefaultStopWords(componentObject));
                return(true);

            case "RemoveStopWords":
                pipeline.Append(MLContext._RemoveStopWords(componentObject));
                return(true);

            case "ReplaceMissingValues":
                pipeline.Append(MLContext._ReplaceMissingValues(componentObject));
                return(true);

            case "ResizeImages":
                pipeline.Append(MLContext._ResizeImages(componentObject));
                return(true);


            //------------------------ S ---------------------------------------------------------------------------------
            case "SdcaLogisticRegressionBinary":
                pipeline.Append(MLContext._SdcaLogisticRegressionBinaryTrainer(componentObject));
                return(true);

            case "SdcaMaximumEntropyMulticlass":
                pipeline.Append(MLContext._SdcaMaximumEntropyMulticlassTrainer(componentObject));
                return(true);

            case "SdcaNonCalibratedBinary":
                pipeline.Append(MLContext._SdcaNonCalibratedBinaryTrainer(componentObject));
                return(true);

            case "SdcaNonCalibratedMulticlass":
                pipeline.Append(MLContext._SdcaNonCalibratedMulticlassTrainer(componentObject));
                return(true);

            case "SdcaRegression":
                pipeline.Append(MLContext._SdcaRegressionTrainer(componentObject));
                return(true);

            case "SelectColumns":
                pipeline.Append(MLContext._SelectColumns(componentObject));
                return(true);

            case "SelectFeaturesBasedOnCount":
                pipeline.Append(MLContext._SelectFeaturesBasedOnCount(componentObject));
                return(true);

            case "SelectFeaturesBasedOnMutualInformation":
                pipeline.Append(MLContext._SelectFeaturesBasedOnMutualInformation(componentObject));
                return(true);

            case "SymbolicSgdLogisticRegressionBinary":
                pipeline.Append(MLContext._SymbolicSgdLogisticRegressionBinaryTrainer(componentObject));
                return(true);


            //------------------------ T ---------------------------------------------------------------------------------
            case "TokenizeIntoCharactersAsKeys":
                pipeline.Append(MLContext._TokenizeIntoCharactersAsKeys(componentObject));
                return(true);

            case "TokenizeIntoWords":
                pipeline.Append(MLContext._TokenizeIntoWords(componentObject));
                return(true);


            //------------------------ default ---------------------------------------------------------------------------
            default:
                return(false);
            }
        }
Exemplo n.º 24
0
 /// <summary>
 /// Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm).
 /// </summary>
 /// <param name="mlContext"></param>
 /// <param name="dataProcessPipeline"></param>
 /// <param name="trainer"></param>
 /// <param name="trainingPipeline"></param>
 private static void SetTrainingAlgorithm(MLContext mlContext, EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline, out SdcaRegressionTrainer trainer, out EstimatorChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > trainingPipeline)
 {
     trainer          = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
     trainingPipeline = dataProcessPipeline.Append(trainer);
 }
Exemplo n.º 25
0
        // ===========================================================================================================


        public void BuildTrainingPipelineAndModel()
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                EstimatorChain <NormalizingTransformer> dataProcessPipeline = _mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("CentralAir", "CentralAir") })
                                                                              .Append(_mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("MSZoning", "MSZoning"), new InputOutputColumnPair("Street", "Street"), new InputOutputColumnPair("Alley", "Alley"), new InputOutputColumnPair("LotShape", "LotShape"), new InputOutputColumnPair("LandContour", "LandContour"), new InputOutputColumnPair("Utilities", "Utilities"), new InputOutputColumnPair("LotConfig", "LotConfig"), new InputOutputColumnPair("LandSlope", "LandSlope"), new InputOutputColumnPair("Neighborhood", "Neighborhood"), new InputOutputColumnPair("Condition1", "Condition1"), new InputOutputColumnPair("Condition2", "Condition2"), new InputOutputColumnPair("BldgType", "BldgType"), new InputOutputColumnPair("HouseStyle", "HouseStyle"), new InputOutputColumnPair("RoofStyle", "RoofStyle"), new InputOutputColumnPair("RoofMatl", "RoofMatl"), new InputOutputColumnPair("Exterior1st", "Exterior1st"), new InputOutputColumnPair("Exterior2nd", "Exterior2nd"), new InputOutputColumnPair("MasVnrType", "MasVnrType"), new InputOutputColumnPair("ExterQual", "ExterQual"), new InputOutputColumnPair("ExterCond", "ExterCond"), new InputOutputColumnPair("Foundation", "Foundation"), new InputOutputColumnPair("BsmtQual", "BsmtQual"), new InputOutputColumnPair("BsmtCond", "BsmtCond"), new InputOutputColumnPair("BsmtExposure", "BsmtExposure"), new InputOutputColumnPair("BsmtFinType1", "BsmtFinType1"), new InputOutputColumnPair("BsmtFinType2", "BsmtFinType2"), new InputOutputColumnPair("Heating", "Heating"), new InputOutputColumnPair("HeatingQC", "HeatingQC"), new InputOutputColumnPair("Electrical", "Electrical"), new InputOutputColumnPair("KitchenQual", "KitchenQual"), new InputOutputColumnPair("Functional", "Functional"), new InputOutputColumnPair("FireplaceQu", "FireplaceQu"), new InputOutputColumnPair("GarageType", "GarageType"), new InputOutputColumnPair("GarageFinish", "GarageFinish"), new InputOutputColumnPair("GarageQual", "GarageQual"), new InputOutputColumnPair("GarageCond", "GarageCond"), new InputOutputColumnPair("PavedDrive", "PavedDrive"), new InputOutputColumnPair("PoolQC", "PoolQC"), new InputOutputColumnPair("Fence", "Fence"), new InputOutputColumnPair("MiscFeature", "MiscFeature"), new InputOutputColumnPair("SaleType", "SaleType"), new InputOutputColumnPair("SaleCondition", "SaleCondition") }))
                                                                              .Append(_mlContext.Transforms.IndicateMissingValues(new[] { new InputOutputColumnPair("LotFrontage_MissingIndicator", "LotFrontage"), new InputOutputColumnPair("MasVnrArea_MissingIndicator", "MasVnrArea"), new InputOutputColumnPair("GarageYrBlt_MissingIndicator", "GarageYrBlt") }))
                                                                              .Append(_mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("LotFrontage_MissingIndicator", "LotFrontage_MissingIndicator"), new InputOutputColumnPair("MasVnrArea_MissingIndicator", "MasVnrArea_MissingIndicator"), new InputOutputColumnPair("GarageYrBlt_MissingIndicator", "GarageYrBlt_MissingIndicator") }))
                                                                              .Append(_mlContext.Transforms.ReplaceMissingValues(new[] { new InputOutputColumnPair("LotFrontage", "LotFrontage"), new InputOutputColumnPair("MasVnrArea", "MasVnrArea"), new InputOutputColumnPair("GarageYrBlt", "GarageYrBlt") }))
                                                                              .Append(_mlContext.Transforms.Concatenate("Features", FeatureNames.ToArray()))
                                                                              .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                                                              .AppendCacheCheckpoint(_mlContext);

                LbfgsPoissonRegressionTrainer trainer = _mlContext.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options()
                {
                    L2Regularization = 0.07404655f, L1Regularization = 0.2087761f, OptimizationTolerance = 0.0001f, HistorySize = 5, MaximumNumberOfIterations = 462473459, InitialWeightsDiameter = 0.5613934f, DenseOptimizer = false, LabelColumnName = "SalePrice", FeatureColumnName = "Features"
                });

                EstimatorChain <RegressionPredictionTransformer <PoissonRegressionModelParameters> > trainingPipeline = dataProcessPipeline.Append(trainer);

                // TODO
                // Evaluate quality of Model
                // Evaluate(_mlContext, _trainingDataView, trainingPipeline);

                // Train Model
                _mlModel = trainingPipeline.Fit(_trainingDataView);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }
        internal static IEstimator <ITransformer> CreateEstimator(IHostEnvironment env, Options options, SchemaShape inputSchema, TermLoaderArguments termLoaderArgs = null)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register(LoaderSignature);

            h.CheckValue(options, nameof(options));
            h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns), "Columns must be specified");

            var chain = new EstimatorChain <ITransformer>();

            var termCols  = new List <Column>();
            var isTermCol = new bool[options.Columns.Length];

            for (int i = 0; i < options.Columns.Length; i++)
            {
                var col = options.Columns[i];

                h.CheckNonWhiteSpace(col.Name, nameof(col.Name));
                h.CheckNonWhiteSpace(col.Source, nameof(col.Source));
                if (inputSchema.TryFindColumn(col.Source, out var colShape) &&
                    colShape.ItemType is TextDataViewType)
                {
                    termCols.Add(col);
                    isTermCol[i] = true;
                }
            }

            // If the column types of args.column are text, apply term transform to convert them to keys.
            // Otherwise, skip term transform and apply n-gram transform directly.
            // This logic allows NgramExtractorTransform to handle both text and key input columns.
            // Note: n-gram transform handles the validation of the types natively (in case the types
            // of args.column are not text nor keys).
            if (termCols.Count > 0)
            {
                var      columnOptions      = new List <ValueToKeyMappingEstimator.ColumnOptionsBase>();
                string[] missingDropColumns = termLoaderArgs != null && termLoaderArgs.DropUnknowns ? new string[termCols.Count] : null;

                for (int iinfo = 0; iinfo < termCols.Count; iinfo++)
                {
                    var column     = termCols[iinfo];
                    var colOptions = new ValueToKeyMappingEstimator.ColumnOptions(
                        column.Name,
                        column.Source,
                        maximumNumberOfKeys: Utils.Size(column.MaxNumTerms) > 0 ? column.MaxNumTerms[0] :
                        Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] :
                        termLoaderArgs == null ? NgramExtractingEstimator.Defaults.MaximumNgramsCount : int.MaxValue,
                        keyOrdinality: termLoaderArgs?.Sort ?? ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence);
                    if (termLoaderArgs != null)
                    {
                        colOptions.Key  = termLoaderArgs.Term;
                        colOptions.Keys = termLoaderArgs.Terms;
                    }
                    columnOptions.Add(colOptions);

                    if (missingDropColumns != null)
                    {
                        missingDropColumns[iinfo] = column.Name;
                    }
                }

                IDataView keyData = null;
                if (termLoaderArgs?.DataFile != null)
                {
                    using (var ch = env.Start("Create key data view"))
                        keyData = ValueToKeyMappingTransformer.GetKeyDataViewOrNull(env, ch, termLoaderArgs.DataFile, termLoaderArgs.TermsColumn, termLoaderArgs.Loader, out var autoConvert);
                }
                chain = chain.Append <ITransformer>(new ValueToKeyMappingEstimator(h, columnOptions.ToArray(), keyData));
                if (missingDropColumns != null)
                {
                    chain = chain.Append <ITransformer>(new MissingValueDroppingEstimator(h, missingDropColumns.Select(x => (x, x)).ToArray()));
                }
            }

            var ngramColumns = new NgramExtractingEstimator.ColumnOptions[options.Columns.Length];

            for (int iinfo = 0; iinfo < options.Columns.Length; iinfo++)
            {
                var column = options.Columns[iinfo];
                ngramColumns[iinfo] = new NgramExtractingEstimator.ColumnOptions(column.Name,
                                                                                 column.NgramLength ?? options.NgramLength,
                                                                                 column.SkipLength ?? options.SkipLength,
                                                                                 column.UseAllLengths ?? options.UseAllLengths,
                                                                                 column.Weighting ?? options.Weighting,
                                                                                 column.MaxNumTerms ?? options.MaxNumTerms,
                                                                                 isTermCol[iinfo] ? column.Name : column.Source
                                                                                 );
            }
            return(chain.Append <ITransformer>(new NgramExtractingEstimator(env, ngramColumns)));
        }
Exemplo n.º 27
0
        public static IEstimator <ITransformer> BuildTrainingPipeline(MLContext mlContext)
        {
            // Data process configuration with pipeline data transformations
            EstimatorChain <NormalizingTransformer> dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Sentiment", "Sentiment")
                                                                          .Append(mlContext.Transforms.Text.FeaturizeText("SentimentText_tf", "SentimentText"))
                                                                          .Append(mlContext.Transforms.CopyColumns("Features", "SentimentText_tf"))
                                                                          .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                                                          .AppendCacheCheckpoint(mlContext);


            // Set the training algorithm
            EstimatorChain <KeyToValueMappingTransformer> trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(labelColumnName: "Sentiment", featureColumnName: "Features")
                                                                    .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

            EstimatorChain <TransformerChain <KeyToValueMappingTransformer> > trainingPipeline = dataProcessPipeline.Append(trainer);

            return(trainingPipeline);
        }
Exemplo n.º 28
0
        // ===========================================================================================================


        public void BuildTrainingPipelineAndModel(ClassificationMode classificationMode)
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                switch (classificationMode)
                {
                case ClassificationMode.OneVersusAll:     // first time this project was builot
                    // Data process configuration with pipeline data transformations
                    EstimatorChain <NormalizingTransformer> dataProcessPipeline1 =
                        _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                        .Append(_mlContext.Transforms.Concatenate(
                                    outputColumnName: "Features",
                                    inputColumnNames: FeatureNames.ToArray()))
                        .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                        .AppendCacheCheckpoint(_mlContext);

                    // Set the training algorithm
                    EstimatorChain <KeyToValueMappingTransformer> trainer1 =
                        _mlContext.MulticlassClassification.Trainers.OneVersusAll(
                            _mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label",
                                                                                        numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label")
                        .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                    IEstimator <ITransformer> trainingPipeline1 =
                        dataProcessPipeline1.Append(trainer1);

                    // Train Model
                    _mlModel = trainingPipeline1.Fit(_trainingDataView);
                    break;

                case ClassificationMode.LightGbm:
                    // Data process configuration with pipeline data transformations
                    EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline2 =
                        _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                        .Append(_mlContext.Transforms.Concatenate(
                                    outputColumnName: "Features",
                                    inputColumnNames: FeatureNames.ToArray()));

                    // Set the training algorithm
                    EstimatorChain <KeyToValueMappingTransformer> trainer2 =
                        _mlContext.MulticlassClassification.Trainers.LightGbm(labelColumnName: "Label", featureColumnName: "Features")
                        .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                    EstimatorChain <TransformerChain <KeyToValueMappingTransformer> > trainingPipeline2 =
                        dataProcessPipeline2.Append(trainer2);

                    // Train Model
                    _mlModel = trainingPipeline2.Fit(_trainingDataView);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(classificationMode), classificationMode, null);
                }

                // TODO
                //// Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
                //// in order to evaluate and get the model's accuracy metrics
                //Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
                //var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numberOfFolds: 5, labelColumnName: "Label");
                //PrintMulticlassClassificationFoldsAverageMetrics(crossValidationResults);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }