コード例 #1
0
        public IEstimator <ITransformer> ToEstimator()
        {
            IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>();

            // Append each transformer to the pipeline
            foreach (var transform in Transforms)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            // Get learner
            var learner = Trainer.BuildTrainer();

            if (_cacheBeforeTrainer)
            {
                pipeline = pipeline.AppendCacheCheckpoint(_context);
            }

            // Append learner to pipeline
            pipeline = pipeline.Append(learner);

            // Append each post-trainer transformer to the pipeline
            foreach (var transform in TransformsPostTrainer)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            return(pipeline);
        }
コード例 #2
0
        public void TrainFastForestOva(IEnumerable <TInput> trainingData, MultiClassOptions <TInput> multiClassOptions, FastForestOvaOptions fastForestOptions)
        {
            this.Options = multiClassOptions;

            // Data Preprocessing pipeline.
            var pipeline = this.ml.Transforms.Conversion.MapValueToKey(inputColumnName: this.Options.LabelName, outputColumnName: "Label")
                           .Append(this.ml.Transforms.Concatenate("Features", this.Options.FeatureColumnNames))
                           .AppendCacheCheckpoint(this.ml);

            // Training pipeline.
            var classifier = this.ml.BinaryClassification.Trainers.FastForest(numberOfLeaves: fastForestOptions.NumberOfLeaves, minimumExampleCountPerLeaf: fastForestOptions.MinimumExampleCountPerLeaf,
                                                                              numberOfTrees: fastForestOptions.NumberOfTrees, labelColumnName: "Label", featureColumnName: "Features");
            var multiClass = this.ml.MulticlassClassification.Trainers.OneVersusAll(classifier, labelColumnName: "Label");

            this.trainingPipeline = pipeline.Append(multiClass);

            // Training.
            var trainData = this.ml.Data.LoadFromEnumerable(trainingData);

            this.model = trainingPipeline
                         .Append(this.ml.Transforms.Conversion.MapKeyToValue("PredictedLabel"))
                         .Fit(trainData);

            this.inputSchema = trainData.Schema;

            this.predictionEngine = this.ml.Model.CreatePredictionEngine <TInput, PredictionOutput>(model);
        }
コード例 #3
0
        /// <summary>
        /// Create a new reader estimator, by appending another estimator to the end of this reader estimator.
        /// </summary>
        public CompositeReaderEstimator <TSource, TNewTrans> Append <TNewTrans>(IEstimator <TNewTrans> estimator)
            where TNewTrans : class, ITransformer
        {
            Contracts.CheckValue(estimator, nameof(estimator));

            return(new CompositeReaderEstimator <TSource, TNewTrans>(_start, _estimatorChain.Append(estimator)));
        }
コード例 #4
0
        private ITransformer GetModel(AlgorithmType?algorithmType, EstimatorChain <TransformerChain <ColumnConcatenatingTransformer> > pipeline)
        {
            if (_predictedColumn.IsAlphanumeric)
            {
                return(algorithmType != null
                                        ? pipeline.Append(GetAlgorithm(algorithmType.Value)).Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView)
                                        : pipeline.Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView));
            }

            if (algorithmType != null)
            {
                return(pipeline.Append(GetAlgorithm(algorithmType.Value)).Fit(DataView));
            }

            return(pipeline.Fit(DataView));
        }
コード例 #5
0
ファイル: Program.cs プロジェクト: hdnebat/taxifare
        private static EstimatorChain <RegressionPredictionTransformer <LinearRegressionPredictor> > PerformStep3(MLContext mlContext, EstimatorChain <ITransformer> dataProcessPipeline)
        {
            var trainer =
                mlContext.Regression.Trainers.StochasticDualCoordinateAscent("Label", "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            return(trainingPipeline);
        }
コード例 #6
0
ファイル: Program.cs プロジェクト: varmar777/samples
        public static EstimatorChain <KeyToValueMappingTransformer> BuildAndTrainModel(IDataView trainingDataView, EstimatorChain <ITransformer> pipeline)
        {
            // STEP 3: Create the training algorithm/trainer
            // Use the multi-class SDCA model to predict the label using features.
            // <SnippetSdcaMultiClassTrainer>
            var trainer = new SdcaMultiClassTrainer(_mlContext, DefaultColumnNames.Label, DefaultColumnNames.Features);
            // </SnippetSdcaMultiClassTrainer>

            //Set the trainer/algorithm and map label to value (original readable state)
            // <SnippetAddTrainer>
            var trainingPipeline = pipeline.Append(trainer)
                                   .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // </SnippetAddTrainer>

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine($"=============== Training the model  ===============");

            // <SnippetTrainModel>
            _trainedModel = trainingPipeline.Fit(trainingDataView);
            // </SnippetTrainModel>
            Console.WriteLine($"=============== Finished Training the model Ending time: {DateTime.Now.ToString()} ===============");

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            Console.WriteLine($"=============== Single Prediction just-trained-model ===============");

            // Create prediction engine related to the loaded trained model
            // <SnippetCreatePredictionEngine>
            _predEngine = _trainedModel.CreatePredictionEngine <GitHubIssue, IssuePrediction>(_mlContext);
            // </SnippetCreatePredictionEngine>
            // <SnippetCreateTestIssue1>
            GitHubIssue issue = new GitHubIssue()
            {
                Title       = "WebSockets communication is slow in my machine",
                Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
            };
            // </SnippetCreateTestIssue1>

            // <SnippetPredict>
            var prediction = _predEngine.Predict(issue);

            // </SnippetPredict>

            // <SnippetOutputPrediction>
            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            // </SnippetOutputPrediction>

            // Save the new model to .ZIP file
            // <SnippetCallSaveModel>
            SaveModelAsFile(_mlContext, _trainedModel);
            // </SnippetCallSaveModel>

            // <SnippetReturnModel>
            return(trainingPipeline);
            // </SnippetReturnModel>
        }
コード例 #7
0
        public IPipelineChain BuildPipeline()
        {
            if (_predictedColumn == null)
            {
                throw new ArgumentNullException(nameof(_predictedColumn));
            }
            if (_algorithmType == null)
            {
                throw new ArgumentNullException(nameof(_algorithmType));
            }

            var keyMap        = _predictedColumn.IsAlphanumeric ? MlContext.Transforms.Conversion.MapValueToKey(_predictedColumn.ColumnName) : null;
            var keyConversion = _predictedColumn.DataKind != null?MlContext.Transforms.Conversion.ConvertType(_predictedColumn.ColumnName, outputKind : _predictedColumn.DataKind.Value) : null;

            var keyColumn = MlContext.Transforms.CopyColumns("Label", _predictedColumn.ColumnName);

            if (_alphanumericColumns != null)
            {
                OneHotEncodingEstimator oneHotEncodingTransformer = null;
                EstimatorChain <OneHotEncodingTransformer> oneHotEncodingTransformerChain = null;
                if (_alphanumericColumns != null)
                {
                    for (int i = 0; i < _alphanumericColumns.Length; i++)
                    {
                        if (oneHotEncodingTransformer == null)
                        {
                            oneHotEncodingTransformer = MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]);
                        }
                        else if (oneHotEncodingTransformerChain == null)
                        {
                            oneHotEncodingTransformerChain = oneHotEncodingTransformer.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]));
                        }
                        else
                        {
                            oneHotEncodingTransformerChain = oneHotEncodingTransformerChain.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]));
                        }
                    }
                }

                var columnConcatenatingTransformer = oneHotEncodingTransformerChain?.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns)) ??
                                                     oneHotEncodingTransformer.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns));
                _transformerChain = _predictedColumn.IsAlphanumeric ?
                                    keyMap.Append(keyColumn).Append(columnConcatenatingTransformer) :
                                    _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(columnConcatenatingTransformer) : keyColumn.Append(columnConcatenatingTransformer);
            }
            else
            {
                var featureColumn = MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns);
                _estimatorChain = _predictedColumn.IsAlphanumeric ?
                                  keyMap.Append(keyColumn).Append(featureColumn) :
                                  _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(featureColumn) : keyColumn.Append(featureColumn);
            }

            return(this);
        }
コード例 #8
0
        public static ITransformer WaitTimeTrainWithSdca(MLContext mlContext, IDataView trainData, EstimatorChain <ColumnConcatenatingTransformer> basePipeline)
        {
            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithSdca start");
            IDataView dataView = trainData;
            var       pipeline = basePipeline.Append(mlContext.Regression.Trainers.Sdca());

            var waitTimeModel = pipeline.Fit(dataView);

            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithSdca end");
            return(waitTimeModel);
        }
コード例 #9
0
        public ITransformer TrainTransformer(IDataView trainData)
        {
            IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>();

            // append each transformer to the pipeline
            foreach (var transform in Transforms)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            // get learner
            var learner = Trainer.BuildTrainer(_context);

            // append learner to pipeline
            pipeline = pipeline.Append(learner);

            return(pipeline.Fit(trainData));
        }
コード例 #10
0
        public static IEstimator <ITransformer> InferTransforms(this TransformsCatalog catalog, IDataView data, string label)
        {
            var mlContext           = new MLContext();
            var suggestedTransforms = TransformInferenceApi.InferTransforms(mlContext, data, label);
            var estimators          = suggestedTransforms.Select(s => s.Estimator);
            var pipeline            = new EstimatorChain <ITransformer>();

            foreach (var estimator in estimators)
            {
                pipeline = pipeline.Append(estimator);
            }
            return(pipeline);
        }
コード例 #11
0
ファイル: CoreFacade.cs プロジェクト: walidbj/Millionaire
        private ITransformer BuildAndTrainUsingParams(ColumnEnum column)
        {
            List <string>            features        = new List <string>();
            TextFeaturizingEstimator textTransformer = null;
            EstimatorChain <ColumnConcatenatingTransformer> estimatorColumn = null;
            EstimatorChain <ITransformer> estimatorTransformer = null;

            if (_includeDay)
            {
                textTransformer = _mlContext.Transforms.Text.FeaturizeText("DayString", "Day");
                features.Add("DayString");
            }
            if (_includeMonth)
            {
                if (textTransformer != null)
                {
                    estimatorTransformer = textTransformer.Append(_mlContext.Transforms.Text.FeaturizeText("MonthString", "Month"));
                }
                else
                {
                    textTransformer = _mlContext.Transforms.Text.FeaturizeText("MonthString", "Month");
                }
                features.Add("MonthString");
            }
            if (_includeWeek)
            {
                features.Add("Week");
            }

            if (textTransformer == null)
            {
                var res = _mlContext.Transforms.Concatenate("Features", features.ToArray())
                          .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                          .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

                return(res.Fit(_trainData));
            }
            if (estimatorTransformer != null)
            {
                var res2 = estimatorTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                           .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                           .Append(_mlContext.Regression.Trainers.FastTreeTweedie());
                return(res2.Fit(_trainData));
            }
            var res3 = textTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                       .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                       .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

            return(res3.Fit(_trainData));
        }
コード例 #12
0
        public EstimatorChain <ITransformer> BuildFromParameters(IDictionary <string, string> parameters)
        {
            var pipeline = new EstimatorChain <ITransformer>();

            for (int i = 0; i < this.Estimators.Count; i++)
            {
                if (this.Estimators[i] == SweepableEstimator <IEstimator <ITransformer> > .EmptyNode)
                {
                    continue;
                }

                pipeline = pipeline.Append(this.Estimators[i].BuildFromParameters(parameters), this.Estimators[i].Scope);
            }

            return(pipeline);
        }
コード例 #13
0
ファイル: Demo.cs プロジェクト: justCodeLife/book-recommender
        private static void CreateModel()
        {
            var options = new MatrixFactorizationTrainer.Options
            {
                LabelColumnName             = nameof(InputModel.Rating),
                MatrixColumnIndexColumnName = "Encoded_UserID",
                MatrixRowIndexColumnName    = "Encoded_Book",
                NumberOfIterations          = 100,
                ApproximationRank           = 100
            };

            var trainer  = _context.Recommendation().Trainers.MatrixFactorization(options);
            var pipeline = estimator.Append(trainer);

            model = pipeline.Fit(splitData.TrainSet);
        }
コード例 #14
0
        // ===========================================================================================================


        public void BuildTrainingPipelineAndModel()
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                EstimatorChain <NormalizingTransformer> dataProcessPipeline =
                    _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                    .Append(_mlContext.Transforms.Concatenate(
                                outputColumnName: "Features",
                                inputColumnNames: FeatureNames.ToArray()))
                    .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                    .AppendCacheCheckpoint(_mlContext);

                EstimatorChain <KeyToValueMappingTransformer> trainer =
                    _mlContext.MulticlassClassification.Trainers.OneVersusAll(
                        _mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label",
                                                                                    numberOfIterations: 1, featureColumnName: "Features"), labelColumnName: "Label")
                    .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                IEstimator <ITransformer> trainingPipeline =
                    dataProcessPipeline.Append(trainer);

                // TODO
                // Evaluate quality of Model
                // Evaluate(_mlContext, _trainingDataView, trainingPipeline);

                // Train Model
                _mlModel = trainingPipeline.Fit(_trainingDataView);

                // TODO
                // Save _mlModel
                // SaveModel(_mlContext, mlModel, ModelSettings.ModelFilePath, _trainingDataView.Schema);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }
コード例 #15
0
        private EstimatorChain <KeyToValueMappingTransformer> BuildAndTrainModel(IDataView trainingDataView,
                                                                                 EstimatorChain <ITransformer> pipeline)
        {
            var trainer          = new SdcaMultiClassTrainer(_mlContext, DefaultColumnNames.Label, DefaultColumnNames.Features);
            var trainingPipeline = pipeline.Append(trainer)
                                   .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            _trainedModel = trainingPipeline.Fit(trainingDataView);
            _predEngine   = _trainedModel.CreatePredictionEngine <ConformChecker, CheckerPrediction>(_mlContext);
            var conf = new ConformChecker
            {
                Name = "Электронный аукцион"
            };
            var prediction = _predEngine.Predict(conf);

            SaveModelAsFile(_mlContext, _trainedModel);
            return(trainingPipeline);
        }
コード例 #16
0
        public static ITransformer WaitTimeTrainWithLightGbm(MLContext mlContext, IDataView trainData, EstimatorChain <ColumnConcatenatingTransformer> basePipeline)
        {
            var options = new LightGbmRegressionTrainer.Options
            {
                NumberOfIterations         = 100,
                NumberOfLeaves             = 10,
                MinimumExampleCountPerLeaf = 18,
                LearningRate = 0.09
            };

            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithLightGbm start");
            IDataView dataView = trainData;
            var       pipeline = basePipeline.Append(mlContext.Regression.Trainers.LightGbm(options));

            var waitTimeModel = pipeline.Fit(dataView);

            //Console.WriteLine($"[{DateTime.UtcNow}] Method WaitTimeTrainWithLightGbm end");
            return(waitTimeModel);
        }
コード例 #17
0
        public static IEstimator <ITransformer> GetConcatEstimator(IHostEnvironment env, ManyToOneColumn[] columns)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(columns, nameof(columns));

            var estimator = new EstimatorChain <ITransformer>();

            foreach (var col in columns)
            {
                env.CheckUserArg(col != null, nameof(WordBagBuildingTransformer.Options.Columns));
                env.CheckUserArg(!string.IsNullOrWhiteSpace(col.Name), nameof(col.Name));
                env.CheckUserArg(Utils.Size(col.Source) > 0, nameof(col.Source));
                env.CheckUserArg(col.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(col.Source));
                if (col.Source.Length > 1)
                {
                    estimator = estimator.Append <ITransformer>(new ColumnConcatenatingEstimator(env, col.Name, col.Source));
                }
            }
            return(estimator);
        }
コード例 #18
0
        public static EstimatorChain <KeyToValueMappingTransformer> BuildAndTrainModel(IDataView trainingDataView, EstimatorChain <ITransformer> pipeline)
        {
            var trainer          = new SdcaMultiClassTrainer(_mlContext, DefaultColumnNames.Label, DefaultColumnNames.Features);
            var trainingPipeline = pipeline.Append(trainer)
                                   .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            Console.WriteLine($"=============== Training the model  ===============");
            _trainedModel = trainingPipeline.Fit(trainingDataView);
            Console.WriteLine($"=============== Finished Training the model Ending time: {DateTime.Now.ToString()} ===============");
            Console.WriteLine($"=============== Single Prediction just-trained-model ===============");
            _predEngine = _trainedModel.CreatePredictionEngine <СonformChecker, CheckerPrediction>(_mlContext);
            СonformChecker conf = new СonformChecker()
            {
                Name = "Электронный аукцион"
            };
            var prediction = _predEngine.Predict(conf);

            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Con} ===============");
            SaveModelAsFile(_mlContext, _trainedModel);
            return(trainingPipeline);
        }
コード例 #19
0
        public static OneHotEncodingPreprocessor OneHotEncoding(this MLContext context, IEnumerable <PropertyInfo> properties, string encodedFormat = "{0}_encoded")
        {
            var ohePreprocessor = new OneHotEncodingPreprocessor();
            var features        = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <OneHotEncodingColumn>(true);
                if (attribute == null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var needToEncodeFeatures = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <OneHotEncodingColumn>(true);
                if (attribute != null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var oheEstimator = new EstimatorChain <OneHotEncodingTransformer>();
            List <CombinedFeature> combinedFeatures = new List <CombinedFeature>();

            foreach (var feature in needToEncodeFeatures)
            {
                var encoded = string.Format(encodedFormat, feature);
                oheEstimator = oheEstimator.Append(context.Transforms.Categorical.OneHotEncoding(inputColumnName: feature, outputColumnName: encoded));
                combinedFeatures.Add(new CombinedFeature
                {
                    Feature        = feature,
                    EncodedFeature = encoded
                });
            }
            ohePreprocessor.OneHotEncodingEstimator = oheEstimator;
            ohePreprocessor.CombinedFeatures        = Shared.Enumerator.CombineEnumerable(features, combinedFeatures.Select(x => x.EncodedFeature));
            return(ohePreprocessor);
        }
コード例 #20
0
        public static KeyToValueMappingPreprocessor KeyToValueMapping(this MLContext context, IEnumerable <PropertyInfo> properties, string encodedFormat = "{0}")
        {
            var ktvPreprocessor = new KeyToValueMappingPreprocessor();
            var features        = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <KeyToValueColumn>(true);
                if (attribute == null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var needToEncodeFeatures = properties.Where(property =>
            {
                var attribute = property.GetCustomAttribute <KeyToValueColumn>(true);
                if (attribute != null)
                {
                    return(true);
                }
                return(false);
            }).Select(property => property.Name);

            var vtkEstimator = new EstimatorChain <KeyToValueMappingTransformer>();
            List <CombinedFeature> combinedFeatures = new List <CombinedFeature>();

            foreach (var feature in needToEncodeFeatures)
            {
                var encoded = string.Format(encodedFormat, feature);
                vtkEstimator = vtkEstimator.Append(context.Transforms.Conversion.MapKeyToValue(inputColumnName: feature, outputColumnName: encoded));
                combinedFeatures.Add(new CombinedFeature
                {
                    Feature        = feature,
                    EncodedFeature = encoded
                });
            }
            ktvPreprocessor.KeyToValueMappingEstimator = vtkEstimator;
            ktvPreprocessor.CombinedFeatures           = Shared.Enumerator.CombineEnumerable(features, combinedFeatures.Select(x => x.EncodedFeature));
            return(ktvPreprocessor);
        }
コード例 #21
0
        private static Task <ITransformer> TrainAndGetBestModel(string FilePath)
        {
            return(Task.Factory.StartNew(() =>
            {
                MLContext MLC = MLCProvider.Current;

                IDataView TrainingDataView = MLC.Data.LoadFromTextFile <BookRating>(FilePath, ',', true);
                TrainingDataView = MLC.Data.Cache(TrainingDataView);

                Console.WriteLine("=============== 正在读取训练数据文件 ===============");

                EstimatorChain <ColumnConcatenatingTransformer> DataPipeLine = MLC.Transforms.Text.FeaturizeText("UserIdFeaturized", nameof(BookRating.UserId))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("ISBNFeaturized", nameof(BookRating.ISBN)))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("AgeFeaturized", nameof(BookRating.Age)))
                                                                               .Append(MLC.Transforms.Concatenate("Features", "UserIdFeaturized", "ISBNFeaturized", "AgeFeaturized"));

                Console.WriteLine("=============== 正在使用交叉验证训练预测模型 ===============");


                FieldAwareFactorizationMachineTrainer.Options Options = new FieldAwareFactorizationMachineTrainer.Options
                {
                    Verbose = true,
                    NumberOfIterations = 10,
                    FeatureColumnName = "Features",
                    Shuffle = true
                };

                EstimatorChain <FieldAwareFactorizationMachinePredictionTransformer> TrainingPipeLine = DataPipeLine.Append(MLC.BinaryClassification.Trainers.FieldAwareFactorizationMachine(Options));

                var CVResult = MLC.BinaryClassification.CrossValidate(TrainingDataView, TrainingPipeLine);

                return CVResult.OrderByDescending(t => t.Metrics.Accuracy).Select(r => r.Model).FirstOrDefault();
            }, TaskCreationOptions.LongRunning));
        }
コード例 #22
0
        public void TestSingleSentence2Classes()
        {
            var dataView = ML.Data.LoadFromEnumerable(
                new List <TestSingleSentenceData>(new TestSingleSentenceData[] {
                new TestSingleSentenceData()
                {       // Testing longer than 512 words.
                    Sentence1 = "ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .",
                    Sentiment = "Negative"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "with a sharp script and strong performances",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "that director m. night shyamalan can weave an eerie spell and",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "comfortable",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "does have its charms .",
                    Sentiment = "Positive"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "banal as the telling",
                    Sentiment = "Negative"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "faithful without being forceful , sad without being shrill , `` a walk to remember '' succeeds through sincerity .",
                    Sentiment = "Negative"
                },
                new TestSingleSentenceData()
                {
                    Sentence1 = "leguizamo 's best movie work so far",
                    Sentiment = "Negative"
                }
            }));
            var chain     = new EstimatorChain <ITransformer>();
            var estimator = chain.Append(ML.Transforms.Conversion.MapValueToKey("Label", "Sentiment"), TransformerScope.TrainTest)
                            .Append(ML.MulticlassClassification.Trainers.TextClassification(outputColumnName: "outputColumn"))
                            .Append(ML.Transforms.Conversion.MapKeyToValue("outputColumn"));

            TestEstimatorCore(estimator, dataView);
            var estimatorSchema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema));

            Assert.Equal(5, estimatorSchema.Count);
            Assert.Equal("outputColumn", estimatorSchema[3].Name);
            Assert.Equal(TextDataViewType.Instance, estimatorSchema[3].ItemType);

            var transformer       = estimator.Fit(dataView);
            var transformerSchema = transformer.GetOutputSchema(dataView.Schema);

            var filteredModel = transformer.GetModelFor(TransformerScope.Scoring);

            Assert.Equal(6, transformerSchema.Count);
            Assert.Equal("outputColumn", transformerSchema[4].Name);
            Assert.Equal(TextDataViewType.Instance, transformerSchema[4].Type);

            var dataNoLabel = ML.Data.LoadFromEnumerable(
                new List <TestSingleSentenceDataNoLabel>(new TestSingleSentenceDataNoLabel[] {
                new ()
                {       // Testing longer than 512 words.
                    Sentence1 = "ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community . ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .ultimately feels as flat as the scruffy sands of its titular community .",
                },
                new ()
                {
                    Sentence1 = "with a sharp script and strong performances",
                },
                new ()
                {
                    Sentence1 = "that director m. night shyamalan can weave an eerie spell and",
                },
                new ()
                {
                    Sentence1 = "comfortable",
                },
                new ()
                {
                    Sentence1 = "does have its charms .",
                },
                new ()
                {
                    Sentence1 = "banal as the telling",
                },
                new ()
                {
                    Sentence1 = "faithful without being forceful , sad without being shrill , `` a walk to remember '' succeeds through sincerity .",
                },
                new ()
                {
                    Sentence1 = "leguizamo 's best movie work so far",
                }
            }));
コード例 #23
0
        /// <summary>
        /// Command to parse the model component from the model JSON and add it to the data pipeline.
        /// </summary>
        /// <param name="pipeline">Reference to the EstimatorChain that is being appended to.</param>
        /// <param name="modelType">The string representing the type of component to add to the pipeline.</param>
        /// <param name="componentObject">The JSON configuration of the model component being appended.</param>
        /// <returns>True if successful, false otherwise.</returns>
        private bool _ParseComponentJson(ref EstimatorChain <ITransformer> pipeline, string modelType, JToken componentObject)
        {
            switch (modelType)
            {
            //------------------------ A ---------------------------------------------------------------------------------
            case "ApplyOnnxModel":
                pipeline.Append(MLContext._ApplyOnnxModel(componentObject));
                return(true);

            case "ApplyWordEmbedding":
                pipeline.Append(MLContext._ApplyWordEmbedding(componentObject));
                return(true);

            case "ApproximatedKernelMap":
                pipeline.Append(MLContext._ApproximatedKernelMap(componentObject));
                return(true);

            case "AveragedPerceptron":
                pipeline.Append(MLContext._AveragedPerceptronTrainer(componentObject));
                return(true);


            //------------------------ C ---------------------------------------------------------------------------------
            case "CalculateFeatureContribution":
                pipeline.Append(MLContext._CalculateFeatureContribution(componentObject));
                return(true);

            case "Concatenate":
                pipeline.Append(MLContext._Concatenate(componentObject));
                return(true);

            case "ConvertToGreyscale":
                pipeline.Append(MLContext._ConvertToGrayscale(componentObject));
                return(true);

            case "ConvertToImage":
                pipeline.Append(MLContext._ConvertToImage(componentObject));
                return(true);

            case "ConvertType":
                pipeline.Append(MLContext._ConvertType(componentObject));
                return(true);

            case "CopyColumns":
                pipeline.Append(MLContext._CopyColumns(componentObject));
                return(true);


            //------------------------ D ---------------------------------------------------------------------------------
            case "DetectAnomalyBySrCnn":
                pipeline.Append(MLContext._DetectAnomalyBySrCnn(componentObject));
                return(true);

            case "DetectChangePointBySsa":
                pipeline.Append(MLContext._DetectChangePointBySsa(componentObject));
                return(true);

            case "DetectIidChangePoint":
                pipeline.Append(MLContext._DetectIidChangePoint(componentObject));
                return(true);

            case "DetectIidSpike":
                pipeline.Append(MLContext._DetectIidSpike(componentObject));
                return(true);

            case "DetectSpikeBySsa":
                pipeline.Append(MLContext._DetectSpikeBySsa(componentObject));
                return(true);

            case "DnnFeaturizeImage":
                pipeline.Append(MLContext._DnnFeaturizeImage(componentObject));
                return(true);

            case "DropColumns":
                pipeline.Append(MLContext._DropColumns(componentObject));
                return(true);


            //------------------------ E ---------------------------------------------------------------------------------
            case "ExtractPixels":
                pipeline.Append(MLContext._ExtractPixels(componentObject));
                return(true);


            //------------------------ F ---------------------------------------------------------------------------------
            case "FastForestBinary":
                pipeline.Append(MLContext._FastForestBinaryTrainer(componentObject));
                return(true);

            case "FastForestRegression":
                pipeline.Append(MLContext._FastForestRegressionTrainer(componentObject));
                return(true);

            case "FastTreeBinary":
                pipeline.Append(MLContext._FastTreeBinaryTrainer(componentObject));
                return(true);

            case "FastTreeRanking":
                pipeline.Append(MLContext._FastTreeRankingTrainer(componentObject));
                return(true);

            case "FastTreeRegression":
                pipeline.Append(MLContext._FastTreeRegressionTrainer(componentObject));
                return(true);

            case "FastTreeTweedie":
                pipeline.Append(MLContext._FastTreeTweedieTrainer(componentObject));
                return(true);

            case "FeaturizeText":
                pipeline.Append(MLContext._FeaturizeText(componentObject));
                return(true);

            case "FieldAwareFactorizationMachine":
                pipeline.Append(MLContext._FieldAwareFactorizationMachineTrainer(componentObject));
                return(true);

            case "ForecastBySsa":
                pipeline.Append(MLContext._ForecastBySsa(componentObject));
                return(true);


            //------------------------ G ---------------------------------------------------------------------------------
            case "GamBinary":
                pipeline.Append(MLContext._GamBinaryTrainer(componentObject));
                return(true);

            case "GamRegression":
                pipeline.Append(MLContext._GamRegressionTrainer(componentObject));
                return(true);


            //------------------------ H ---------------------------------------------------------------------------------
            case "Hash":
                pipeline.Append(MLContext._Hash(componentObject));
                return(true);


            //------------------------ I ---------------------------------------------------------------------------------
            case "IndicateMissingValues":
                pipeline.Append(MLContext._IndicateMissingValues(componentObject));
                return(true);


            //------------------------ K ---------------------------------------------------------------------------------
            case "KMeans":
                pipeline.Append(MLContext._KMeansTrainer(componentObject));
                return(true);


            //------------------------ L ---------------------------------------------------------------------------------
            case "LatentDirichletAllocation":
                pipeline.Append(MLContext._LatentDirichletAllocation(componentObject));
                return(true);

            case "LbfgsLogisticRegressionBinary":
                pipeline.Append(MLContext._LbfgsLogisticRegressionBinaryTrainer(componentObject));
                return(true);

            case "LbfgsMaximumEntropyMulticlass":
                pipeline.Append(MLContext._LbfgsMaximumEntropyMulticlassTrainer(componentObject));
                return(true);

            case "LbfgsPoissonRegression":
                pipeline.Append(MLContext._LbfgsPoissonRegressionTrainer(componentObject));
                return(true);

            case "LightGbmBinary":
                pipeline.Append(MLContext._LightGbmBinaryTrainer(componentObject));
                return(true);

            case "LightGbmMulticlass":
                pipeline.Append(MLContext._LightGbmMulticlassTrainer(componentObject));
                return(true);

            case "LightGbmRanking":
                pipeline.Append(MLContext._LightGbmRankingTrainer(componentObject));
                return(true);

            case "LightGbmRegression":
                pipeline.Append(MLContext._LightGbmRegressionTrainer(componentObject));
                return(true);

            case "LinearSvm":
                pipeline.Append(MLContext._LinearSvmTrainer(componentObject));
                return(true);

            case "LoadImages":
                pipeline.Append(MLContext._LoadImages(componentObject));
                return(true);

            case "LoadTensorFlowModel":
                pipeline.Append(MLContext._LoadTensorFlowModel(componentObject));
                return(true);


            //------------------------ M ---------------------------------------------------------------------------------
            case "MapKeyToBinaryVector":
                pipeline.Append(MLContext._MapKeyToBinaryVector(componentObject));
                return(true);

            case "MapKeyToValue":
                pipeline.Append(MLContext._MapKeyToValue(componentObject));
                return(true);

            case "MapKeyToVector":
                pipeline.Append(MLContext._MapKeyToVector(componentObject));
                return(true);

            case "MapValue":
                pipeline.Append(MLContext._MapValue(componentObject));
                return(true);

            case "MapValueToKey":
                pipeline.Append(MLContext._MapValueToKey(componentObject));
                return(true);

            case "MatrixFactorization":
                pipeline.Append(MLContext._MatrixFactorizationTrainer(componentObject));
                return(true);


            //------------------------ N ---------------------------------------------------------------------------------
            case "NaiveBayesMulticlass":
                pipeline.Append(MLContext._NaiveBayesMulticlassTrainer(componentObject));
                return(true);

            case "NormalizeBinning":
                pipeline.Append(MLContext._NormalizeBinning(componentObject));
                return(true);

            case "NormalizeGlobalContrast":
                pipeline.Append(MLContext._NormalizeGlobalContrast(componentObject));
                return(true);

            case "NormalizeLogMeanVariance":
                pipeline.Append(MLContext._NormalizeLogMeanVariance(componentObject));
                return(true);

            case "NormalizeLpNorm":
                pipeline.Append(MLContext._NormalizeLpNorm(componentObject));
                return(true);

            case "NormalizeMeanVariance":
                pipeline.Append(MLContext._NormalizeMeanVariance(componentObject));
                return(true);

            case "NormalizeMinMax":
                pipeline.Append(MLContext._NormalizeMinMax(componentObject));
                return(true);

            case "NormalizeSupervisedBinning":
                pipeline.Append(MLContext._NormalizeSupervisedBinning(componentObject));
                return(true);

            case "NormalizeText":
                pipeline.Append(MLContext._NormalizeText(componentObject));
                return(true);


            //------------------------ O ---------------------------------------------------------------------------------
            case "OneHotEncoding":
                pipeline.Append(MLContext._OneHotEncoding(componentObject));
                return(true);

            case "OneHotHashEncoding":
                pipeline.Append(MLContext._OneHotHashEncoding(componentObject));
                return(true);

            case "OnlineGradientDescent":
                pipeline.Append(MLContext._OnlineGradientDescentTrainer(componentObject));
                return(true);

            case "OlsTrainer":
                pipeline.Append(MLContext._OlsTrainer(componentObject));
                return(true);


            //------------------------ P ---------------------------------------------------------------------------------
            case "Prior":
                pipeline.Append(MLContext._PriorTrainer(componentObject));
                return(true);

            case "ProduceHashedNgrams":
                pipeline.Append(MLContext._ProduceHashedNgrams(componentObject));
                return(true);

            case "ProduceHashedWordBags":
                pipeline.Append(MLContext._ProduceHashedWordBags(componentObject));
                return(true);

            case "ProduceNgrams":
                pipeline.Append(MLContext._ProduceNgrams(componentObject));
                return(true);

            case "ProduceWordBags":
                pipeline.Append(MLContext._ProduceWordBags(componentObject));
                return(true);

            case "ProjectToPrincipalComponents":
                pipeline.Append(MLContext._ProjectToPrincipalComponents(componentObject));
                return(true);


            //------------------------ R ---------------------------------------------------------------------------------
            case "RandomizedPca":
                pipeline.Append(MLContext._RandomizedPcaTrainer(componentObject));
                return(true);

            case "RemoveDefaultStopWords":
                pipeline.Append(MLContext._RemoveDefaultStopWords(componentObject));
                return(true);

            case "RemoveStopWords":
                pipeline.Append(MLContext._RemoveStopWords(componentObject));
                return(true);

            case "ReplaceMissingValues":
                pipeline.Append(MLContext._ReplaceMissingValues(componentObject));
                return(true);

            case "ResizeImages":
                pipeline.Append(MLContext._ResizeImages(componentObject));
                return(true);


            //------------------------ S ---------------------------------------------------------------------------------
            case "SdcaLogisticRegressionBinary":
                pipeline.Append(MLContext._SdcaLogisticRegressionBinaryTrainer(componentObject));
                return(true);

            case "SdcaMaximumEntropyMulticlass":
                pipeline.Append(MLContext._SdcaMaximumEntropyMulticlassTrainer(componentObject));
                return(true);

            case "SdcaNonCalibratedBinary":
                pipeline.Append(MLContext._SdcaNonCalibratedBinaryTrainer(componentObject));
                return(true);

            case "SdcaNonCalibratedMulticlass":
                pipeline.Append(MLContext._SdcaNonCalibratedMulticlassTrainer(componentObject));
                return(true);

            case "SdcaRegression":
                pipeline.Append(MLContext._SdcaRegressionTrainer(componentObject));
                return(true);

            case "SelectColumns":
                pipeline.Append(MLContext._SelectColumns(componentObject));
                return(true);

            case "SelectFeaturesBasedOnCount":
                pipeline.Append(MLContext._SelectFeaturesBasedOnCount(componentObject));
                return(true);

            case "SelectFeaturesBasedOnMutualInformation":
                pipeline.Append(MLContext._SelectFeaturesBasedOnMutualInformation(componentObject));
                return(true);

            case "SymbolicSgdLogisticRegressionBinary":
                pipeline.Append(MLContext._SymbolicSgdLogisticRegressionBinaryTrainer(componentObject));
                return(true);


            //------------------------ T ---------------------------------------------------------------------------------
            case "TokenizeIntoCharactersAsKeys":
                pipeline.Append(MLContext._TokenizeIntoCharactersAsKeys(componentObject));
                return(true);

            case "TokenizeIntoWords":
                pipeline.Append(MLContext._TokenizeIntoWords(componentObject));
                return(true);


            //------------------------ default ---------------------------------------------------------------------------
            default:
                return(false);
            }
        }
コード例 #24
0
 /// <summary>
 /// Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm).
 /// </summary>
 /// <param name="mlContext"></param>
 /// <param name="dataProcessPipeline"></param>
 /// <param name="trainer"></param>
 /// <param name="trainingPipeline"></param>
 private static void SetTrainingAlgorithm(MLContext mlContext, EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline, out SdcaRegressionTrainer trainer, out EstimatorChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > trainingPipeline)
 {
     trainer          = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
     trainingPipeline = dataProcessPipeline.Append(trainer);
 }
コード例 #25
0
        // ===========================================================================================================


        public void BuildTrainingPipelineAndModel()
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                EstimatorChain <NormalizingTransformer> dataProcessPipeline = _mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("CentralAir", "CentralAir") })
                                                                              .Append(_mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("MSZoning", "MSZoning"), new InputOutputColumnPair("Street", "Street"), new InputOutputColumnPair("Alley", "Alley"), new InputOutputColumnPair("LotShape", "LotShape"), new InputOutputColumnPair("LandContour", "LandContour"), new InputOutputColumnPair("Utilities", "Utilities"), new InputOutputColumnPair("LotConfig", "LotConfig"), new InputOutputColumnPair("LandSlope", "LandSlope"), new InputOutputColumnPair("Neighborhood", "Neighborhood"), new InputOutputColumnPair("Condition1", "Condition1"), new InputOutputColumnPair("Condition2", "Condition2"), new InputOutputColumnPair("BldgType", "BldgType"), new InputOutputColumnPair("HouseStyle", "HouseStyle"), new InputOutputColumnPair("RoofStyle", "RoofStyle"), new InputOutputColumnPair("RoofMatl", "RoofMatl"), new InputOutputColumnPair("Exterior1st", "Exterior1st"), new InputOutputColumnPair("Exterior2nd", "Exterior2nd"), new InputOutputColumnPair("MasVnrType", "MasVnrType"), new InputOutputColumnPair("ExterQual", "ExterQual"), new InputOutputColumnPair("ExterCond", "ExterCond"), new InputOutputColumnPair("Foundation", "Foundation"), new InputOutputColumnPair("BsmtQual", "BsmtQual"), new InputOutputColumnPair("BsmtCond", "BsmtCond"), new InputOutputColumnPair("BsmtExposure", "BsmtExposure"), new InputOutputColumnPair("BsmtFinType1", "BsmtFinType1"), new InputOutputColumnPair("BsmtFinType2", "BsmtFinType2"), new InputOutputColumnPair("Heating", "Heating"), new InputOutputColumnPair("HeatingQC", "HeatingQC"), new InputOutputColumnPair("Electrical", "Electrical"), new InputOutputColumnPair("KitchenQual", "KitchenQual"), new InputOutputColumnPair("Functional", "Functional"), new InputOutputColumnPair("FireplaceQu", "FireplaceQu"), new InputOutputColumnPair("GarageType", "GarageType"), new InputOutputColumnPair("GarageFinish", "GarageFinish"), new InputOutputColumnPair("GarageQual", "GarageQual"), new InputOutputColumnPair("GarageCond", "GarageCond"), new InputOutputColumnPair("PavedDrive", "PavedDrive"), new InputOutputColumnPair("PoolQC", "PoolQC"), new InputOutputColumnPair("Fence", "Fence"), new InputOutputColumnPair("MiscFeature", "MiscFeature"), new InputOutputColumnPair("SaleType", "SaleType"), new InputOutputColumnPair("SaleCondition", "SaleCondition") }))
                                                                              .Append(_mlContext.Transforms.IndicateMissingValues(new[] { new InputOutputColumnPair("LotFrontage_MissingIndicator", "LotFrontage"), new InputOutputColumnPair("MasVnrArea_MissingIndicator", "MasVnrArea"), new InputOutputColumnPair("GarageYrBlt_MissingIndicator", "GarageYrBlt") }))
                                                                              .Append(_mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("LotFrontage_MissingIndicator", "LotFrontage_MissingIndicator"), new InputOutputColumnPair("MasVnrArea_MissingIndicator", "MasVnrArea_MissingIndicator"), new InputOutputColumnPair("GarageYrBlt_MissingIndicator", "GarageYrBlt_MissingIndicator") }))
                                                                              .Append(_mlContext.Transforms.ReplaceMissingValues(new[] { new InputOutputColumnPair("LotFrontage", "LotFrontage"), new InputOutputColumnPair("MasVnrArea", "MasVnrArea"), new InputOutputColumnPair("GarageYrBlt", "GarageYrBlt") }))
                                                                              .Append(_mlContext.Transforms.Concatenate("Features", FeatureNames.ToArray()))
                                                                              .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                                                              .AppendCacheCheckpoint(_mlContext);

                LbfgsPoissonRegressionTrainer trainer = _mlContext.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options()
                {
                    L2Regularization = 0.07404655f, L1Regularization = 0.2087761f, OptimizationTolerance = 0.0001f, HistorySize = 5, MaximumNumberOfIterations = 462473459, InitialWeightsDiameter = 0.5613934f, DenseOptimizer = false, LabelColumnName = "SalePrice", FeatureColumnName = "Features"
                });

                EstimatorChain <RegressionPredictionTransformer <PoissonRegressionModelParameters> > trainingPipeline = dataProcessPipeline.Append(trainer);

                // TODO
                // Evaluate quality of Model
                // Evaluate(_mlContext, _trainingDataView, trainingPipeline);

                // Train Model
                _mlModel = trainingPipeline.Fit(_trainingDataView);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }
コード例 #26
0
        internal static IEstimator <ITransformer> CreateEstimator(IHostEnvironment env, Options options, SchemaShape inputSchema, TermLoaderArguments termLoaderArgs = null)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register(LoaderSignature);

            h.CheckValue(options, nameof(options));
            h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns), "Columns must be specified");

            var chain = new EstimatorChain <ITransformer>();

            var termCols  = new List <Column>();
            var isTermCol = new bool[options.Columns.Length];

            for (int i = 0; i < options.Columns.Length; i++)
            {
                var col = options.Columns[i];

                h.CheckNonWhiteSpace(col.Name, nameof(col.Name));
                h.CheckNonWhiteSpace(col.Source, nameof(col.Source));
                if (inputSchema.TryFindColumn(col.Source, out var colShape) &&
                    colShape.ItemType is TextDataViewType)
                {
                    termCols.Add(col);
                    isTermCol[i] = true;
                }
            }

            // If the column types of args.column are text, apply term transform to convert them to keys.
            // Otherwise, skip term transform and apply n-gram transform directly.
            // This logic allows NgramExtractorTransform to handle both text and key input columns.
            // Note: n-gram transform handles the validation of the types natively (in case the types
            // of args.column are not text nor keys).
            if (termCols.Count > 0)
            {
                var      columnOptions      = new List <ValueToKeyMappingEstimator.ColumnOptionsBase>();
                string[] missingDropColumns = termLoaderArgs != null && termLoaderArgs.DropUnknowns ? new string[termCols.Count] : null;

                for (int iinfo = 0; iinfo < termCols.Count; iinfo++)
                {
                    var column     = termCols[iinfo];
                    var colOptions = new ValueToKeyMappingEstimator.ColumnOptions(
                        column.Name,
                        column.Source,
                        maximumNumberOfKeys: Utils.Size(column.MaxNumTerms) > 0 ? column.MaxNumTerms[0] :
                        Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] :
                        termLoaderArgs == null ? NgramExtractingEstimator.Defaults.MaximumNgramsCount : int.MaxValue,
                        keyOrdinality: termLoaderArgs?.Sort ?? ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence);
                    if (termLoaderArgs != null)
                    {
                        colOptions.Key  = termLoaderArgs.Term;
                        colOptions.Keys = termLoaderArgs.Terms;
                    }
                    columnOptions.Add(colOptions);

                    if (missingDropColumns != null)
                    {
                        missingDropColumns[iinfo] = column.Name;
                    }
                }

                IDataView keyData = null;
                if (termLoaderArgs?.DataFile != null)
                {
                    using (var ch = env.Start("Create key data view"))
                        keyData = ValueToKeyMappingTransformer.GetKeyDataViewOrNull(env, ch, termLoaderArgs.DataFile, termLoaderArgs.TermsColumn, termLoaderArgs.Loader, out var autoConvert);
                }
                chain = chain.Append <ITransformer>(new ValueToKeyMappingEstimator(h, columnOptions.ToArray(), keyData));
                if (missingDropColumns != null)
                {
                    chain = chain.Append <ITransformer>(new MissingValueDroppingEstimator(h, missingDropColumns.Select(x => (x, x)).ToArray()));
                }
            }

            var ngramColumns = new NgramExtractingEstimator.ColumnOptions[options.Columns.Length];

            for (int iinfo = 0; iinfo < options.Columns.Length; iinfo++)
            {
                var column = options.Columns[iinfo];
                ngramColumns[iinfo] = new NgramExtractingEstimator.ColumnOptions(column.Name,
                                                                                 column.NgramLength ?? options.NgramLength,
                                                                                 column.SkipLength ?? options.SkipLength,
                                                                                 column.UseAllLengths ?? options.UseAllLengths,
                                                                                 column.Weighting ?? options.Weighting,
                                                                                 column.MaxNumTerms ?? options.MaxNumTerms,
                                                                                 isTermCol[iinfo] ? column.Name : column.Source
                                                                                 );
            }
            return(chain.Append <ITransformer>(new NgramExtractingEstimator(env, ngramColumns)));
        }
コード例 #27
0
        public static IEstimator <ITransformer> BuildTrainingPipeline(MLContext mlContext)
        {
            // Data process configuration with pipeline data transformations
            EstimatorChain <NormalizingTransformer> dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Sentiment", "Sentiment")
                                                                          .Append(mlContext.Transforms.Text.FeaturizeText("SentimentText_tf", "SentimentText"))
                                                                          .Append(mlContext.Transforms.CopyColumns("Features", "SentimentText_tf"))
                                                                          .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                                                          .AppendCacheCheckpoint(mlContext);


            // Set the training algorithm
            EstimatorChain <KeyToValueMappingTransformer> trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(labelColumnName: "Sentiment", featureColumnName: "Features")
                                                                    .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

            EstimatorChain <TransformerChain <KeyToValueMappingTransformer> > trainingPipeline = dataProcessPipeline.Append(trainer);

            return(trainingPipeline);
        }
コード例 #28
0
        // ===========================================================================================================


        public void BuildTrainingPipelineAndModel(ClassificationMode classificationMode)
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                switch (classificationMode)
                {
                case ClassificationMode.OneVersusAll:     // first time this project was builot
                    // Data process configuration with pipeline data transformations
                    EstimatorChain <NormalizingTransformer> dataProcessPipeline1 =
                        _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                        .Append(_mlContext.Transforms.Concatenate(
                                    outputColumnName: "Features",
                                    inputColumnNames: FeatureNames.ToArray()))
                        .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                        .AppendCacheCheckpoint(_mlContext);

                    // Set the training algorithm
                    EstimatorChain <KeyToValueMappingTransformer> trainer1 =
                        _mlContext.MulticlassClassification.Trainers.OneVersusAll(
                            _mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label",
                                                                                        numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label")
                        .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                    IEstimator <ITransformer> trainingPipeline1 =
                        dataProcessPipeline1.Append(trainer1);

                    // Train Model
                    _mlModel = trainingPipeline1.Fit(_trainingDataView);
                    break;

                case ClassificationMode.LightGbm:
                    // Data process configuration with pipeline data transformations
                    EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline2 =
                        _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                        .Append(_mlContext.Transforms.Concatenate(
                                    outputColumnName: "Features",
                                    inputColumnNames: FeatureNames.ToArray()));

                    // Set the training algorithm
                    EstimatorChain <KeyToValueMappingTransformer> trainer2 =
                        _mlContext.MulticlassClassification.Trainers.LightGbm(labelColumnName: "Label", featureColumnName: "Features")
                        .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                    EstimatorChain <TransformerChain <KeyToValueMappingTransformer> > trainingPipeline2 =
                        dataProcessPipeline2.Append(trainer2);

                    // Train Model
                    _mlModel = trainingPipeline2.Fit(_trainingDataView);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(classificationMode), classificationMode, null);
                }

                // TODO
                //// Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
                //// in order to evaluate and get the model's accuracy metrics
                //Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
                //var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numberOfFolds: 5, labelColumnName: "Label");
                //PrintMulticlassClassificationFoldsAverageMetrics(crossValidationResults);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }