コード例 #1
0
        /// <summary>
        /// Run the pipeline to train the model, then save the model to specific output folder path
        /// </summary>
        public void RunPipeline()
        {
            // 1., 2., 3., 4.
            PrepareDataset(useValidationSet);

            // 5. Call pipeline
            EstimatorChain <KeyToValueMappingTransformer> pipeline = CreateCustomPipeline();

            // 6. Train/create the ML Model
            Console.WriteLine("*** Training the image classification model with DNN Transfer Learning on top of the selected pre-trained model/architecture ***");

            ////////// Begin training
            Stopwatch watch = Stopwatch.StartNew();

            trainedModel = pipeline.Fit(trainDataset);
            watch.Stop();
            ////////// End training

            long ms = watch.ElapsedMilliseconds;

            Console.WriteLine($"Training with transfer learning took: {ms / 1000} seconds");

            // 8->7. Save the model to assets/outputs ML.NET .zip model file and TF .pb model file
            mlContext.Model.Save(trainedModel, trainDataset.Schema, OutputModelFilePath);
            Console.WriteLine($"Model saved to: {OutputModelFilePath}");

            // 7->8. Get the quality metrics
            EvaluateModel();
        }
コード例 #2
0
        private void TransformData()
        {
            // Select type of trainer
            ITrainerEstimator <ISingleFeaturePredictionTransformer <IPredictorProducing <float> >, IPredictorProducing <float> > trainer = null;

            Console.WriteLine("-------------------");
            switch (aiEnum)
            {
            case AIEnum.fastTree:
                Console.WriteLine("fastTree");
                trainer = mlContext.BinaryClassification.Trainers.FastTree();
                break;

            case AIEnum.generalizedAdditiveModels:
                Console.WriteLine("generalizedAdditiveModels");
                trainer = mlContext.BinaryClassification.Trainers.GeneralizedAdditiveModels();
                break;

            case AIEnum.logisticRegression:
                Console.WriteLine("logisticRegression");
                trainer = mlContext.BinaryClassification.Trainers.LogisticRegression();
                break;

            case AIEnum.stochasticDualCoordinateAscent:
                Console.WriteLine("stochasticDualCoordinateAscent");
                trainer = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent();
                break;

            case AIEnum.stochasticGradientDescent:
                Console.WriteLine("stochasticGradientDescent");
                trainer = mlContext.BinaryClassification.Trainers.StochasticGradientDescent();
                break;
            }
            Console.WriteLine("-------------------");

            // Create a pipeline
            pipeline = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "IsRegularOneHot", inputColumnName: "IsRegular")
                       .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "IsCyclicOneHot", inputColumnName: "IsCyclic"))
                       .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "IsChordalOneHot", inputColumnName: "IsChordal"))
                       .Append(mlContext.Transforms.Normalize(
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "ID_GraphClass", outputColumnName: "ID_GraphClassNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "ID_EulerianGraph", outputColumnName: "ID_EulerianGraphNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "IsRegularOneHot", outputColumnName: "IsRegularOneHotNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "IsCyclicOneHot", outputColumnName: "IsCyclicOneHotNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "IsChordalOneHot", outputColumnName: "IsChordalOneHotNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountVertices", outputColumnName: "CountVerticesNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountEdges", outputColumnName: "CountEdgesNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountCutVertices", outputColumnName: "CountCutVerticesNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountBridges", outputColumnName: "CountBridgesNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "Girth", outputColumnName: "GirthNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "Dense", outputColumnName: "DenseNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "MinimumVertexDegree", outputColumnName: "MinimumVertexDegreeNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "MaximumVertexDegree", outputColumnName: "MaximumVertexDegreeNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "AverageVertexDegree", outputColumnName: "AverageVertexDegreeNormalized", fixZero: true),
                                   new NormalizingEstimator.MinMaxColumn(inputColumnName: "MedianVertexDegree", outputColumnName: "MedianVertexDegreeNormalized", fixZero: true)))
                       .Append(mlContext.Transforms.Concatenate(DefaultColumnNames.Features, "ID_GraphClassNormalized", "ID_EulerianGraphNormalized", "IsRegularOneHotNormalized", "IsCyclicOneHotNormalized", "IsChordalOneHotNormalized",
                                                                "CountVerticesNormalized", "CountEdgesNormalized", "CountCutVerticesNormalized", /*"CountBridgesNormalized",*/ "GirthNormalized", "DenseNormalized", "MinimumVertexDegreeNormalized", "MaximumVertexDegreeNormalized", "AverageVertexDegreeNormalized"//, /*"MedianVertexDegreeNormalized",*/
                                                                ))
                       .Append(trainer);
        }
コード例 #3
0
        /// <summary>
        /// 5.1. (Optional) Define the model's training pipeline by using explicit hyper-parameters
        /// </summary>
        /// <param name="validationSet"></param>
        /// <returns></returns>
        private EstimatorChain <KeyToValueMappingTransformer> CreateCustomPipeline()
        {
            ImageClassificationTrainer.Options options = new ImageClassificationTrainer.Options()
            {
                LabelColumnName = KeyColumn,
                // The feature column name should has same name in ImageDataInMemory
                FeatureColumnName = FeatureColumn,
                // Change the architecture to different DNN architecture
                Arch = (ImageClassificationTrainer.Architecture)arch,
                // Number of training iterations
                Epoch = 200,
                // Number of samples to use for mini-batch training
                BatchSize       = 10,
                LearningRate    = 0.01f,
                MetricsCallback = (metrics) => Console.WriteLine(metrics),
            };
            if (useValidationSet)
            {
                options.ValidationSet = validationDataset;
            }
            else
            {
                options.ValidationSet = testDataset;
            }
            EstimatorChain <KeyToValueMappingTransformer> pipeline = mlContext.MulticlassClassification.Trainers.ImageClassification(options).
                                                                     Append(mlContext.Transforms.Conversion.MapKeyToValue(PredictedLabelColumn, PredictedLabelColumn));

            return(pipeline);
        }
        public void Build()
        {
            // Set up the MLContext, which is a catalog of components in ML.NET.
            mlContext = new MLContext();
            // Specify the schema for spam data and read it into DataView.
            _data = mlContext.Data.LoadFromTextFile <SpamInput>(path: TrainDataPath, hasHeader: true, separatorChar: '\t');
            // Data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                                      .Append(mlContext.Transforms.Text.FeaturizeText("FeaturesText", new Microsoft.ML.Transforms.Text.TextFeaturizingEstimator.Options
            {
                WordFeatureExtractor = new Microsoft.ML.Transforms.Text.WordBagEstimator.Options {
                    NgramLength = 2, UseAllLengths = true
                },
                CharFeatureExtractor = new Microsoft.ML.Transforms.Text.WordBagEstimator.Options {
                    NgramLength = 3, UseAllLengths = false
                },
            }, "Message"))
                                      .Append(mlContext.Transforms.CopyColumns("Features", "FeaturesText"))
                                      .Append(mlContext.Transforms.NormalizeLpNorm("Features", "Features"))
                                      .AppendCacheCheckpoint(mlContext);
            // Set the training algorithm
            var trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label", numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label")
                          .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

            _trainingPipeline = dataProcessPipeline.Append(trainer);
        }
コード例 #5
0
        public IEstimator <ITransformer> ToEstimator()
        {
            IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>();

            // Append each transformer to the pipeline
            foreach (var transform in Transforms)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            // Get learner
            var learner = Trainer.BuildTrainer();

            if (_cacheBeforeTrainer)
            {
                pipeline = pipeline.AppendCacheCheckpoint(_context);
            }

            // Append learner to pipeline
            pipeline = pipeline.Append(learner);

            // Append each post-trainer transformer to the pipeline
            foreach (var transform in TransformsPostTrainer)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            return(pipeline);
        }
 public SpamDetectionMLModel()
 {
     mlContext         = null;
     _model            = null;
     _trainingPipeline = null;
     _data             = null;
 }
コード例 #7
0
ファイル: Program.cs プロジェクト: aslotte/ML.NET.Templates
        static void Main(string[] args)
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();

            var mlContext = new MLContext(seed: 1);

            Console.WriteLine($"Loading data from {DataPath}");
            var data = mlContext.Data.LoadFromTextFile <ModelInput>(DataPath, hasHeader: HasHeader, separatorChar: SeparatorChar);

            Console.WriteLine("Splitting the data");
            var trainTestSplit = mlContext.Data.TrainTestSplit(data);

            Console.WriteLine("Transforming the data");
            IEstimator <ITransformer> dataProcessPipeline = null;

            Console.WriteLine("Training the model");
            IEstimator <ITransformer>     trainer          = null;
            EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            ITransformer model = trainingPipeline.Fit(trainTestSplit.TrainSet);

            Console.WriteLine("Evaluating the model's performance");
            //Depends on Trainer

            stopWatch.Stop();
            Console.WriteLine($"Training finished in: {stopWatch.ElapsedMilliseconds} milliseconds");

            Console.WriteLine($"Saving the model to {ModelName}");
            mlContext.Model.Save(model, trainTestSplit.TrainSet.Schema, ModelName);
        }
コード例 #8
0
        void IAiTest.Train()
        {
            Console.WriteLine("=============== Multiclass Classification - Issue Area Prediction ===============");
            IDataView dataView = _context.Data.LoadFromTextFile <GitHubIssue>($"{RootFolder}/{TrainDataFile}", hasHeader: true);

            var dataProcessPipeline = _context.Transforms.Conversion.MapValueToKey(inputColumnName: nameof(GitHubIssue.Area), outputColumnName: "Area")
                                      .Append(_context.Transforms.Text.FeaturizeText(inputColumnName: "Title", outputColumnName: "TitleFeaturized"))
                                      .Append(_context.Transforms.Text.FeaturizeText(inputColumnName: "Description", outputColumnName: "DescriptionFeaturized"))
                                      .Append(_context.Transforms.Concatenate("Features", "TitleFeaturized", "DescriptionFeaturized"))
                                      .AppendCacheCheckpoint(_context);

            _trainer = _context.MulticlassClassification.Trainers.SdcaMaximumEntropy(labelColumnName: "Area", featureColumnName: "Features")
                       .Append(_context.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            var       trainingPipeline = dataProcessPipeline.Append(_trainer);
            Stopwatch stop             = new Stopwatch();

            Console.WriteLine("=============== Create and Train the Model ===============");
            stop.Start();
            _model = trainingPipeline.Fit(dataView);
            stop.Stop();
            Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms");
            Console.WriteLine("=============== End of training ===============");
            Console.WriteLine();
        }
コード例 #9
0
        public void TrainFastForestOva(IEnumerable <TInput> trainingData, MultiClassOptions <TInput> multiClassOptions, FastForestOvaOptions fastForestOptions)
        {
            this.Options = multiClassOptions;

            // Data Preprocessing pipeline.
            var pipeline = this.ml.Transforms.Conversion.MapValueToKey(inputColumnName: this.Options.LabelName, outputColumnName: "Label")
                           .Append(this.ml.Transforms.Concatenate("Features", this.Options.FeatureColumnNames))
                           .AppendCacheCheckpoint(this.ml);

            // Training pipeline.
            var classifier = this.ml.BinaryClassification.Trainers.FastForest(numberOfLeaves: fastForestOptions.NumberOfLeaves, minimumExampleCountPerLeaf: fastForestOptions.MinimumExampleCountPerLeaf,
                                                                              numberOfTrees: fastForestOptions.NumberOfTrees, labelColumnName: "Label", featureColumnName: "Features");
            var multiClass = this.ml.MulticlassClassification.Trainers.OneVersusAll(classifier, labelColumnName: "Label");

            this.trainingPipeline = pipeline.Append(multiClass);

            // Training.
            var trainData = this.ml.Data.LoadFromEnumerable(trainingData);

            this.model = trainingPipeline
                         .Append(this.ml.Transforms.Conversion.MapKeyToValue("PredictedLabel"))
                         .Fit(trainData);

            this.inputSchema = trainData.Schema;

            this.predictionEngine = this.ml.Model.CreatePredictionEngine <TInput, PredictionOutput>(model);
        }
コード例 #10
0
        private static Task <ITransformer> TrainAndGetBestModel(string FilePath)
        {
            return(Task.Factory.StartNew(() =>
            {
                MLContext MLC = MLCProvider.Current;

                IDataView TrainingDataView = MLC.Data.LoadFromTextFile <BookRating>(FilePath, ',', true);
                TrainingDataView = MLC.Data.Cache(TrainingDataView);

                Console.WriteLine("=============== 正在读取训练数据文件 ===============");

                EstimatorChain <ColumnConcatenatingTransformer> DataPipeLine = MLC.Transforms.Text.FeaturizeText("UserIdFeaturized", nameof(BookRating.UserId))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("ISBNFeaturized", nameof(BookRating.ISBN)))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("AgeFeaturized", nameof(BookRating.Age)))
                                                                               .Append(MLC.Transforms.Concatenate("Features", "UserIdFeaturized", "ISBNFeaturized", "AgeFeaturized"));

                Console.WriteLine("=============== 正在使用交叉验证训练预测模型 ===============");


                FieldAwareFactorizationMachineTrainer.Options Options = new FieldAwareFactorizationMachineTrainer.Options
                {
                    Verbose = true,
                    NumberOfIterations = 10,
                    FeatureColumnName = "Features",
                    Shuffle = true
                };

                EstimatorChain <FieldAwareFactorizationMachinePredictionTransformer> TrainingPipeLine = DataPipeLine.Append(MLC.BinaryClassification.Trainers.FieldAwareFactorizationMachine(Options));

                var CVResult = MLC.BinaryClassification.CrossValidate(TrainingDataView, TrainingPipeLine);

                return CVResult.OrderByDescending(t => t.Metrics.Accuracy).Select(r => r.Model).FirstOrDefault();
            }, TaskCreationOptions.LongRunning));
        }
コード例 #11
0
        public async Task <ModelMetrics> GenerateModel(BaseDAL storage, string modelFileName)
        {
            if (storage == null)
            {
                Log.Error("Trainer::GenerateModel - BaseDAL is null");

                throw new ArgumentNullException(nameof(storage));
            }

            if (string.IsNullOrEmpty(modelFileName))
            {
                Log.Error("Trainer::GenerateModel - modelFileName is null");

                throw new ArgumentNullException(nameof(modelFileName));
            }

            if (!File.Exists(modelFileName))
            {
                Log.Error($"Trainer::GenerateModel - {modelFileName} does not exist");

                throw new FileNotFoundException(modelFileName);
            }

            var startTime = DateTime.Now;

            var options = new RandomizedPcaTrainer.Options
            {
                FeatureColumnName       = FEATURES,
                ExampleWeightColumnName = null,
                Rank           = 4,
                Oversampling   = 20,
                EnsureZeroMean = true,
                Seed           = Constants.ML_SEED
            };

            var(data, cleanRowCount, maliciousRowCount) = GetDataView(await storage.QueryPacketsAsync(a => a.IsClean), await storage.QueryPacketsAsync(a => !a.IsClean));

            IEstimator <ITransformer> dataProcessPipeline = _mlContext.Transforms.Concatenate(
                FEATURES,
                typeof(PayloadItem).ToPropertyList <PayloadItem>(nameof(PayloadItem.Label)));

            IEstimator <ITransformer> trainer = _mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options);

            EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            TransformerChain <ITransformer> trainedModel = trainingPipeline.Fit(data.TrainSet);

            _mlContext.Model.Save(trainedModel, data.TrainSet.Schema, modelFileName);

            var testSetTransform = trainedModel.Transform(data.TestSet);

            return(new ModelMetrics
            {
                Metrics = _mlContext.AnomalyDetection.Evaluate(testSetTransform),
                NumCleanRows = cleanRowCount,
                NumMaliciousRows = maliciousRowCount,
                Duration = DateTime.Now.Subtract(startTime)
            });
        }
コード例 #12
0
        public PredictionEngine <CenterFaceImageInput, CenterFaceImageOutput> GetMlNetPredictionEngine()
        {
            EstimatorChain <OnnxTransformer> pipeline = CreatePipeline();
            IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable);
            TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData);

            return(mlContext.Model.CreatePredictionEngine <CenterFaceImageInput, CenterFaceImageOutput>(transformer));
        }
コード例 #13
0
        public void SaveMLNetModel()
        {
            EstimatorChain <OnnxTransformer> pipeline = CreatePipeline();
            IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable);
            TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData);

            mlContext.Model.Save(transformer, null, _mlModelDestn);
        }
コード例 #14
0
        /// <summary>
        /// 5. Define the model's training pipeline using DNN default values
        /// </summary>
        /// <param name="dataset"></param>
        /// <returns></returns>
        private EstimatorChain <KeyToValueMappingTransformer> CreateDefaultPipeline(IDataView dataset)
        {
            EstimatorChain <KeyToValueMappingTransformer> pipeline = mlContext.MulticlassClassification.Trainers.
                                                                     // The feature column name should has same name in ImageDataInMemory
                                                                     ImageClassification(labelColumnName: KeyColumn, featureColumnName: FeatureColumn, validationSet: dataset).
                                                                     Append(mlContext.Transforms.Conversion.MapKeyToValue(PredictedLabelColumn, PredictedLabelColumn));

            return(pipeline);
        }
コード例 #15
0
        //OnlineGradientDescent, Gam - not work

        public static EstimatorChain <ColumnConcatenatingTransformer> BasePipelineBuilder(MLContext mlContext, IDataView trainData)
        {
            IDataView dataView = trainData;

            EstimatorChain <ColumnConcatenatingTransformer> basePipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "WaitTime")
                                                                           .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "SkillIdEncoded", inputColumnName: "SkillId"))
                                                                           .Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", "SkillIdEncoded", "AgentsOnCall", "QueueLength", "BaseAvailAgents", "R1AvailAgents", "R2AvailAgents", "R1Threshold", "R2Threshold"));

            return(basePipeline);
        }
コード例 #16
0
        public IPipelineChain BuildPipeline()
        {
            if (_predictedColumn == null)
            {
                throw new ArgumentNullException(nameof(_predictedColumn));
            }
            if (_algorithmType == null)
            {
                throw new ArgumentNullException(nameof(_algorithmType));
            }

            var keyMap        = _predictedColumn.IsAlphanumeric ? MlContext.Transforms.Conversion.MapValueToKey(_predictedColumn.ColumnName) : null;
            var keyConversion = _predictedColumn.DataKind != null?MlContext.Transforms.Conversion.ConvertType(_predictedColumn.ColumnName, outputKind : _predictedColumn.DataKind.Value) : null;

            var keyColumn = MlContext.Transforms.CopyColumns("Label", _predictedColumn.ColumnName);

            if (_alphanumericColumns != null)
            {
                OneHotEncodingEstimator oneHotEncodingTransformer = null;
                EstimatorChain <OneHotEncodingTransformer> oneHotEncodingTransformerChain = null;
                if (_alphanumericColumns != null)
                {
                    for (int i = 0; i < _alphanumericColumns.Length; i++)
                    {
                        if (oneHotEncodingTransformer == null)
                        {
                            oneHotEncodingTransformer = MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]);
                        }
                        else if (oneHotEncodingTransformerChain == null)
                        {
                            oneHotEncodingTransformerChain = oneHotEncodingTransformer.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]));
                        }
                        else
                        {
                            oneHotEncodingTransformerChain = oneHotEncodingTransformerChain.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]));
                        }
                    }
                }

                var columnConcatenatingTransformer = oneHotEncodingTransformerChain?.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns)) ??
                                                     oneHotEncodingTransformer.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns));
                _transformerChain = _predictedColumn.IsAlphanumeric ?
                                    keyMap.Append(keyColumn).Append(columnConcatenatingTransformer) :
                                    _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(columnConcatenatingTransformer) : keyColumn.Append(columnConcatenatingTransformer);
            }
            else
            {
                var featureColumn = MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns);
                _estimatorChain = _predictedColumn.IsAlphanumeric ?
                                  keyMap.Append(keyColumn).Append(featureColumn) :
                                  _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(featureColumn) : keyColumn.Append(featureColumn);
            }

            return(this);
        }
コード例 #17
0
ファイル: Demo.cs プロジェクト: justCodeLife/book-recommender
        private static void PreProcessData()
        {
            estimator = _context.Transforms.Conversion
                        .MapValueToKey("Encoded_UserID", nameof(InputModel.UserId))
                        .Append(_context.Transforms.Conversion.MapValueToKey("Encoded_Book",
                                                                             nameof(InputModel.ISBN)));

            var preProcessData = estimator.Fit(_dataView).Transform(_dataView);

            splitData = _context.Data.TrainTestSplit(preProcessData, 0.05);
        }
コード例 #18
0
ファイル: NGrams.cs プロジェクト: jamiefutch/MLHelpers
 /// <summary>
 /// constructor
 /// </summary>
 /// <param name="ngramLength"></param>
 public NGrams(int ngramLength = 3)
 {
     _mlContext    = new MLContext();
     _textPipeline = _mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
                     // 'ProduceNgrams' takes key type as input. Converting the tokens into key type using 'MapValueToKey'.
                     .Append(_mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
                     .Append(_mlContext.Transforms.Text.ProduceNgrams("NgramFeatures", "Tokens",
                                                                      ngramLength: ngramLength,
                                                                      useAllLengths: false,
                                                                      weighting: NgramExtractingEstimator.WeightingCriteria.Tf));
 }
コード例 #19
0
        public CompositeReaderEstimator(IDataReaderEstimator <TSource, IDataReader <TSource> > start, EstimatorChain <TLastTransformer> estimatorChain = null)
        {
            Contracts.CheckValue(start, nameof(start));
            Contracts.CheckValueOrNull(estimatorChain);

            _start          = start;
            _estimatorChain = estimatorChain ?? new EstimatorChain <TLastTransformer>();

            // REVIEW: enforce that estimator chain can read the reader's schema.
            // Right now it throws.
            // GetOutputSchema();
        }
コード例 #20
0
        public SignalClassifierController(string frameSize, string sensorType, string[] datasets, string[] labels)
        {
            mlContext = new MLContext();

            categories = labels;

            var reader = getFrameReader(frameSize, sensorType);

            var trainingDataView = reader.Load(datasets);

            var split = mlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2);

            estimatorPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label")
                                .Append(mlContext.Transforms.NormalizeMinMax("readings", fixZero: true))
                                .Append(mlContext.MulticlassClassification.Trainers
                                        .OneVersusAll(mlContext.BinaryClassification.Trainers
                                                      .FastTree(featureColumnName: "readings")));
            // .Append(mlContext.MulticlassClassification.Trainers
            //     .NaiveBayes(featureColumnName: "readings"));
            // .Append(mlContext.MulticlassClassification.Trainers
            //     .OneVersusAll(mlContext.BinaryClassification.Trainers
            //         .LbfgsLogisticRegression(featureColumnName: "readings")));
            // .Append(mlContext.MulticlassClassification.Trainers
            //         .OneVersusAll(mlContext.BinaryClassification.Trainers
            //             .LdSvm(featureColumnName: "readings")));

            transformer = estimatorPipeline.Fit(split.TrainSet);

            // var OVAEstimator = mlContext.MulticlassClassification.Trainers
            //     .OneVersusAll(mlContext.BinaryClassification.Trainers
            //         .LbfgsLogisticRegression(featureColumnName: "readings"));
            // var OVAEstimator = mlContext.MulticlassClassification.Trainers
            //     .OneVersusAll(mlContext.BinaryClassification.Trainers
            //         .LdSvm(featureColumnName: "readings"));
            // var NBEstimator = mlContext.MulticlassClassification.Trainers
            //     .NaiveBayes(featureColumnName: "readings");
            var OVAEstimator = mlContext.MulticlassClassification.Trainers
                               .OneVersusAll(mlContext.BinaryClassification.Trainers
                                             .FastTree(featureColumnName: "readings"));

            var transformedTrainingData = transformer.Transform(split.TrainSet);

            model = OVAEstimator.Fit(transformedTrainingData);
            // model = NBEstimator.Fit(transformedTrainingData);

            Console.WriteLine("Model fitted");

            var transformedTestData = transformer.Transform(split.TestSet);

            var testPredictions = model.Transform(transformedTestData);

            Console.WriteLine(mlContext.MulticlassClassification.Evaluate(testPredictions).ConfusionMatrix.GetFormattedConfusionTable());
        }
コード例 #21
0
ファイル: MLModel.cs プロジェクト: 4egod/FlowLinkML
        public MLModel(List <Archive> trainingData, List <string> features)
        {
            _trainingData = trainingData;

            _context = new MLContext(seed: 1);

            var dataProcessPipeline = _context.Transforms.Text.FeaturizeText("Timestamp_tf", "Timestamp")
                                      .Append(_context.Transforms.Concatenate("Features", features.ToArray()));

            var trainer = _context.Regression.Trainers.FastTree(labelColumnName: "Volume", featureColumnName: "Features");

            _pipeline = dataProcessPipeline.Append(trainer);
        }
コード例 #22
0
        public static IEstimator <ITransformer> InferTransforms(this TransformsCatalog catalog, IDataView data, string label)
        {
            var mlContext           = new MLContext();
            var suggestedTransforms = TransformInferenceApi.InferTransforms(mlContext, data, label);
            var estimators          = suggestedTransforms.Select(s => s.Estimator);
            var pipeline            = new EstimatorChain <ITransformer>();

            foreach (var estimator in estimators)
            {
                pipeline = pipeline.Append(estimator);
            }
            return(pipeline);
        }
コード例 #23
0
 /// <summary>
 /// Show accuracy stats.
 /// </summary>
 /// <param name="trainer"></param>
 /// <param name="metrics"></param>
 private static void ShowAccuracyStats(EstimatorChain <KeyToValueMappingTransformer> trainer, MulticlassClassificationMetrics metrics)
 {
     Console.WriteLine($"************************************************************");
     Console.WriteLine($"*    Metrics for {trainer.ToString()} multi-class classification model   ");
     Console.WriteLine($"*-----------------------------------------------------------");
     Console.WriteLine($"    AccuracyMacro = {metrics.MacroAccuracy.ToString(CultureInfo.CurrentCulture)}, a value between 0 and 1, the closer to 1, the better");
     Console.WriteLine($"    AccuracyMicro = {metrics.MicroAccuracy.ToString(CultureInfo.CurrentCulture)}, a value between 0 and 1, the closer to 1, the better");
     Console.WriteLine($"    LogLoss = {metrics.LogLoss.ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better");
     Console.WriteLine($"    LogLoss for class 1 = {metrics.PerClassLogLoss[0].ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better");
     Console.WriteLine($"    LogLoss for class 2 = {metrics.PerClassLogLoss[1].ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better");
     Console.WriteLine($"    LogLoss for class 3 = {metrics.PerClassLogLoss[2].ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better");
     Console.WriteLine($"************************************************************");
 }
コード例 #24
0
        public void EstimatorChain_should_generate_code()
        {
            var trainer     = this.GetTrainerEstimator();
            var transformer = this.GetTransformerEstimator();

            var estimatorChain = new EstimatorChain()
            {
                trainer,
                transformer,
            };

            Approvals.Verify(estimatorChain.GeneratorCode());
        }
コード例 #25
0
ファイル: CoreFacade.cs プロジェクト: walidbj/Millionaire
        private ITransformer BuildAndTrainUsingParams(ColumnEnum column)
        {
            List <string>            features        = new List <string>();
            TextFeaturizingEstimator textTransformer = null;
            EstimatorChain <ColumnConcatenatingTransformer> estimatorColumn = null;
            EstimatorChain <ITransformer> estimatorTransformer = null;

            if (_includeDay)
            {
                textTransformer = _mlContext.Transforms.Text.FeaturizeText("DayString", "Day");
                features.Add("DayString");
            }
            if (_includeMonth)
            {
                if (textTransformer != null)
                {
                    estimatorTransformer = textTransformer.Append(_mlContext.Transforms.Text.FeaturizeText("MonthString", "Month"));
                }
                else
                {
                    textTransformer = _mlContext.Transforms.Text.FeaturizeText("MonthString", "Month");
                }
                features.Add("MonthString");
            }
            if (_includeWeek)
            {
                features.Add("Week");
            }

            if (textTransformer == null)
            {
                var res = _mlContext.Transforms.Concatenate("Features", features.ToArray())
                          .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                          .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

                return(res.Fit(_trainData));
            }
            if (estimatorTransformer != null)
            {
                var res2 = estimatorTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                           .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                           .Append(_mlContext.Regression.Trainers.FastTreeTweedie());
                return(res2.Fit(_trainData));
            }
            var res3 = textTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                       .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                       .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

            return(res3.Fit(_trainData));
        }
コード例 #26
0
        public static ITransformer TrainModel(MLContext mlContext, IDataView trainDataView)
        {
            // Get all the feature column names (All except the Label and the IdPreservationColumn)
            string[] featureColumnNames = trainDataView.Schema.AsQueryable()
                                          .Select(column => column.Name)                               // Get all the column names
                                          .Where(name => name != nameof(TransactionObservation.Label)) // Do not include the Label column
                                          .Where(name => name != "IdPreservationColumn")               // Do not include the IdPreservationColumn/StratificationColumn
                                          .Where(name => name != nameof(TransactionObservation.Time))  // Do not include the Time column. Not needed as feature column
                                          .ToArray();


            // Create the data process pipeline
            IEstimator <ITransformer> dataProcessPipeline = mlContext.Transforms.Concatenate("Features", featureColumnNames)
                                                            .Append(mlContext.Transforms.DropColumns(new string[] { nameof(TransactionObservation.Time) }))
                                                            .Append(mlContext.Transforms.NormalizeLpNorm(outputColumnName: "NormalizedFeatures", inputColumnName: "Features"));

            // In Anomaly Detection, the learner assumes all training examples have label 0, as it only learns from normal examples.
            // If any of the training examples has label 1, it is recommended to use a Filter transform to filter them out before training:
            IDataView normalTrainDataView = mlContext.Data.FilterRowsByColumn(trainDataView, columnName: nameof(TransactionObservation.Label), lowerBound: 0, upperBound: 1);


            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, normalTrainDataView, dataProcessPipeline, 2);
            ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "NormalizedFeatures", normalTrainDataView, dataProcessPipeline, 2);


            var options = new RandomizedPcaTrainer.Options
            {
                FeatureColumnName       = "NormalizedFeatures", // The name of the feature column. The column data must be a known-sized vector of Single.
                ExampleWeightColumnName = null,                 // The name of the example weight column (optional). To use the weight column, the column data must be of type Single.
                Rank           = 28,                            // The number of components in the PCA.
                Oversampling   = 20,                            // Oversampling parameter for randomized PCA training.
                EnsureZeroMean = true,                          // If enabled, data is centered to be zero mean.
                Seed           = 1                              // The seed for random number generation.
            };


            // Create an anomaly detector. Its underlying algorithm is randomized PCA.
            IEstimator <ITransformer> trainer = mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options);

            EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            ConsoleHelper.ConsoleWriteHeader("=============== Training model ===============");

            TransformerChain <ITransformer> model = trainingPipeline.Fit(normalTrainDataView);

            ConsoleHelper.ConsoleWriteHeader("=============== End of training process ===============");

            return(model);
        }
コード例 #27
0
 public void Predict(string imagepath)
 {
     EstimatorChain <OnnxTransformer> pipeline = CreatePipeline();
     IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable);
     TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData);
     IDataView emptyTestData = mlContext.Data.LoadFromEnumerable(new List <CenterFaceImageInput>()
     {
         new CenterFaceImageInput()
         {
             Image = (Bitmap)Bitmap.FromFile(imagepath)
         }
     });
     var res = transformer.Transform(emptyTestData);
 }
コード例 #28
0
ファイル: Default.aspx.cs プロジェクト: Fpisti-dev/MLFSQLIAD
        protected void Page_Load(object sender, EventArgs e)
        {
            _mlContext        = null;
            _model            = null;
            _trainingPipeline = null;
            _trainer          = null;
            _data             = null;

            if (!IsPostBack)
            {
                if (!String.IsNullOrEmpty(Request.QueryString["pSql"]) && !String.IsNullOrEmpty(Request.QueryString["pLab"]))
                {
                    // Query string value is there so now use it
                    pSql   = Convert.ToString(Request.QueryString["pSql"]);
                    iLabel = Convert.ToInt32(Request.QueryString["pLab"]);
                }

                if (pSql != "")
                {
                    if (iLabel == 0 || iLabel == 1)
                    {
                        Debug.Print("pSql: " + pSql + ", pLab: " + iLabel.ToString());

                        bUrlRequest = true;

                        // Get setup state and trinign data file state
                        GetSetup();

                        // Call Use Model WithS ingleItem
                        UseModelWithSingleItem(txtResult, pSql);

                        // Auto close pages if want to test a lots url requets
                        //ClosePages();
                    }
                    else
                    {
                        iLabel      = -1;
                        bUrlRequest = false;
                    }
                }
                else
                {
                    bUrlRequest = false;
                }
            }
            else // Input fields and submit button used on site, so we haven't label value
            {
                iLabel = -1;
            }
        }
コード例 #29
0
        private void buildAndTrainModel(IDataView data, EstimatorChain <ITransformer> pipeline)
        {
            var trainingPipeline =
                _mlContext.Transforms.Conversion
                .MapValueToKey(inputColumnName: nameof(MLEntry.Answer), outputColumnName: "Label")
                .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(MLEntry.Question),
                                                                 outputColumnName: "QuestionFeaturized"))
                .AppendCacheCheckpoint(_mlContext)
                .Append(_mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "QuestionFeaturized"))
                .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            _trainedModel = trainingPipeline.Fit(data);

            _predictionEngine = _mlContext.Model.CreatePredictionEngine <MLEntry, AnswerPrediction>(_trainedModel);
        }
コード例 #30
0
        private ITransformer GetModel(AlgorithmType?algorithmType, EstimatorChain <TransformerChain <ColumnConcatenatingTransformer> > pipeline)
        {
            if (_predictedColumn.IsAlphanumeric)
            {
                return(algorithmType != null
                                        ? pipeline.Append(GetAlgorithm(algorithmType.Value)).Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView)
                                        : pipeline.Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView));
            }

            if (algorithmType != null)
            {
                return(pipeline.Append(GetAlgorithm(algorithmType.Value)).Fit(DataView));
            }

            return(pipeline.Fit(DataView));
        }