/// <summary> /// Run the pipeline to train the model, then save the model to specific output folder path /// </summary> public void RunPipeline() { // 1., 2., 3., 4. PrepareDataset(useValidationSet); // 5. Call pipeline EstimatorChain <KeyToValueMappingTransformer> pipeline = CreateCustomPipeline(); // 6. Train/create the ML Model Console.WriteLine("*** Training the image classification model with DNN Transfer Learning on top of the selected pre-trained model/architecture ***"); ////////// Begin training Stopwatch watch = Stopwatch.StartNew(); trainedModel = pipeline.Fit(trainDataset); watch.Stop(); ////////// End training long ms = watch.ElapsedMilliseconds; Console.WriteLine($"Training with transfer learning took: {ms / 1000} seconds"); // 8->7. Save the model to assets/outputs ML.NET .zip model file and TF .pb model file mlContext.Model.Save(trainedModel, trainDataset.Schema, OutputModelFilePath); Console.WriteLine($"Model saved to: {OutputModelFilePath}"); // 7->8. Get the quality metrics EvaluateModel(); }
private void TransformData() { // Select type of trainer ITrainerEstimator <ISingleFeaturePredictionTransformer <IPredictorProducing <float> >, IPredictorProducing <float> > trainer = null; Console.WriteLine("-------------------"); switch (aiEnum) { case AIEnum.fastTree: Console.WriteLine("fastTree"); trainer = mlContext.BinaryClassification.Trainers.FastTree(); break; case AIEnum.generalizedAdditiveModels: Console.WriteLine("generalizedAdditiveModels"); trainer = mlContext.BinaryClassification.Trainers.GeneralizedAdditiveModels(); break; case AIEnum.logisticRegression: Console.WriteLine("logisticRegression"); trainer = mlContext.BinaryClassification.Trainers.LogisticRegression(); break; case AIEnum.stochasticDualCoordinateAscent: Console.WriteLine("stochasticDualCoordinateAscent"); trainer = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(); break; case AIEnum.stochasticGradientDescent: Console.WriteLine("stochasticGradientDescent"); trainer = mlContext.BinaryClassification.Trainers.StochasticGradientDescent(); break; } Console.WriteLine("-------------------"); // Create a pipeline pipeline = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "IsRegularOneHot", inputColumnName: "IsRegular") .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "IsCyclicOneHot", inputColumnName: "IsCyclic")) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "IsChordalOneHot", inputColumnName: "IsChordal")) .Append(mlContext.Transforms.Normalize( new NormalizingEstimator.MinMaxColumn(inputColumnName: "ID_GraphClass", outputColumnName: "ID_GraphClassNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "ID_EulerianGraph", outputColumnName: "ID_EulerianGraphNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "IsRegularOneHot", outputColumnName: "IsRegularOneHotNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "IsCyclicOneHot", outputColumnName: "IsCyclicOneHotNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "IsChordalOneHot", outputColumnName: "IsChordalOneHotNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountVertices", outputColumnName: "CountVerticesNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountEdges", outputColumnName: "CountEdgesNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountCutVertices", outputColumnName: "CountCutVerticesNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "CountBridges", outputColumnName: "CountBridgesNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "Girth", outputColumnName: "GirthNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "Dense", outputColumnName: "DenseNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "MinimumVertexDegree", outputColumnName: "MinimumVertexDegreeNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "MaximumVertexDegree", outputColumnName: "MaximumVertexDegreeNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "AverageVertexDegree", outputColumnName: "AverageVertexDegreeNormalized", fixZero: true), new NormalizingEstimator.MinMaxColumn(inputColumnName: "MedianVertexDegree", outputColumnName: "MedianVertexDegreeNormalized", fixZero: true))) .Append(mlContext.Transforms.Concatenate(DefaultColumnNames.Features, "ID_GraphClassNormalized", "ID_EulerianGraphNormalized", "IsRegularOneHotNormalized", "IsCyclicOneHotNormalized", "IsChordalOneHotNormalized", "CountVerticesNormalized", "CountEdgesNormalized", "CountCutVerticesNormalized", /*"CountBridgesNormalized",*/ "GirthNormalized", "DenseNormalized", "MinimumVertexDegreeNormalized", "MaximumVertexDegreeNormalized", "AverageVertexDegreeNormalized"//, /*"MedianVertexDegreeNormalized",*/ )) .Append(trainer); }
/// <summary> /// 5.1. (Optional) Define the model's training pipeline by using explicit hyper-parameters /// </summary> /// <param name="validationSet"></param> /// <returns></returns> private EstimatorChain <KeyToValueMappingTransformer> CreateCustomPipeline() { ImageClassificationTrainer.Options options = new ImageClassificationTrainer.Options() { LabelColumnName = KeyColumn, // The feature column name should has same name in ImageDataInMemory FeatureColumnName = FeatureColumn, // Change the architecture to different DNN architecture Arch = (ImageClassificationTrainer.Architecture)arch, // Number of training iterations Epoch = 200, // Number of samples to use for mini-batch training BatchSize = 10, LearningRate = 0.01f, MetricsCallback = (metrics) => Console.WriteLine(metrics), }; if (useValidationSet) { options.ValidationSet = validationDataset; } else { options.ValidationSet = testDataset; } EstimatorChain <KeyToValueMappingTransformer> pipeline = mlContext.MulticlassClassification.Trainers.ImageClassification(options). Append(mlContext.Transforms.Conversion.MapKeyToValue(PredictedLabelColumn, PredictedLabelColumn)); return(pipeline); }
public void Build() { // Set up the MLContext, which is a catalog of components in ML.NET. mlContext = new MLContext(); // Specify the schema for spam data and read it into DataView. _data = mlContext.Data.LoadFromTextFile <SpamInput>(path: TrainDataPath, hasHeader: true, separatorChar: '\t'); // Data process configuration with pipeline data transformations var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label", "Label") .Append(mlContext.Transforms.Text.FeaturizeText("FeaturesText", new Microsoft.ML.Transforms.Text.TextFeaturizingEstimator.Options { WordFeatureExtractor = new Microsoft.ML.Transforms.Text.WordBagEstimator.Options { NgramLength = 2, UseAllLengths = true }, CharFeatureExtractor = new Microsoft.ML.Transforms.Text.WordBagEstimator.Options { NgramLength = 3, UseAllLengths = false }, }, "Message")) .Append(mlContext.Transforms.CopyColumns("Features", "FeaturesText")) .Append(mlContext.Transforms.NormalizeLpNorm("Features", "Features")) .AppendCacheCheckpoint(mlContext); // Set the training algorithm var trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label", numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label") .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel")); _trainingPipeline = dataProcessPipeline.Append(trainer); }
public IEstimator <ITransformer> ToEstimator() { IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>(); // Append each transformer to the pipeline foreach (var transform in Transforms) { if (transform.Estimator != null) { pipeline = pipeline.Append(transform.Estimator); } } // Get learner var learner = Trainer.BuildTrainer(); if (_cacheBeforeTrainer) { pipeline = pipeline.AppendCacheCheckpoint(_context); } // Append learner to pipeline pipeline = pipeline.Append(learner); // Append each post-trainer transformer to the pipeline foreach (var transform in TransformsPostTrainer) { if (transform.Estimator != null) { pipeline = pipeline.Append(transform.Estimator); } } return(pipeline); }
public SpamDetectionMLModel() { mlContext = null; _model = null; _trainingPipeline = null; _data = null; }
static void Main(string[] args) { var stopWatch = new Stopwatch(); stopWatch.Start(); var mlContext = new MLContext(seed: 1); Console.WriteLine($"Loading data from {DataPath}"); var data = mlContext.Data.LoadFromTextFile <ModelInput>(DataPath, hasHeader: HasHeader, separatorChar: SeparatorChar); Console.WriteLine("Splitting the data"); var trainTestSplit = mlContext.Data.TrainTestSplit(data); Console.WriteLine("Transforming the data"); IEstimator <ITransformer> dataProcessPipeline = null; Console.WriteLine("Training the model"); IEstimator <ITransformer> trainer = null; EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer); ITransformer model = trainingPipeline.Fit(trainTestSplit.TrainSet); Console.WriteLine("Evaluating the model's performance"); //Depends on Trainer stopWatch.Stop(); Console.WriteLine($"Training finished in: {stopWatch.ElapsedMilliseconds} milliseconds"); Console.WriteLine($"Saving the model to {ModelName}"); mlContext.Model.Save(model, trainTestSplit.TrainSet.Schema, ModelName); }
void IAiTest.Train() { Console.WriteLine("=============== Multiclass Classification - Issue Area Prediction ==============="); IDataView dataView = _context.Data.LoadFromTextFile <GitHubIssue>($"{RootFolder}/{TrainDataFile}", hasHeader: true); var dataProcessPipeline = _context.Transforms.Conversion.MapValueToKey(inputColumnName: nameof(GitHubIssue.Area), outputColumnName: "Area") .Append(_context.Transforms.Text.FeaturizeText(inputColumnName: "Title", outputColumnName: "TitleFeaturized")) .Append(_context.Transforms.Text.FeaturizeText(inputColumnName: "Description", outputColumnName: "DescriptionFeaturized")) .Append(_context.Transforms.Concatenate("Features", "TitleFeaturized", "DescriptionFeaturized")) .AppendCacheCheckpoint(_context); _trainer = _context.MulticlassClassification.Trainers.SdcaMaximumEntropy(labelColumnName: "Area", featureColumnName: "Features") .Append(_context.Transforms.Conversion.MapKeyToValue("PredictedLabel")); var trainingPipeline = dataProcessPipeline.Append(_trainer); Stopwatch stop = new Stopwatch(); Console.WriteLine("=============== Create and Train the Model ==============="); stop.Start(); _model = trainingPipeline.Fit(dataView); stop.Stop(); Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms"); Console.WriteLine("=============== End of training ==============="); Console.WriteLine(); }
public void TrainFastForestOva(IEnumerable <TInput> trainingData, MultiClassOptions <TInput> multiClassOptions, FastForestOvaOptions fastForestOptions) { this.Options = multiClassOptions; // Data Preprocessing pipeline. var pipeline = this.ml.Transforms.Conversion.MapValueToKey(inputColumnName: this.Options.LabelName, outputColumnName: "Label") .Append(this.ml.Transforms.Concatenate("Features", this.Options.FeatureColumnNames)) .AppendCacheCheckpoint(this.ml); // Training pipeline. var classifier = this.ml.BinaryClassification.Trainers.FastForest(numberOfLeaves: fastForestOptions.NumberOfLeaves, minimumExampleCountPerLeaf: fastForestOptions.MinimumExampleCountPerLeaf, numberOfTrees: fastForestOptions.NumberOfTrees, labelColumnName: "Label", featureColumnName: "Features"); var multiClass = this.ml.MulticlassClassification.Trainers.OneVersusAll(classifier, labelColumnName: "Label"); this.trainingPipeline = pipeline.Append(multiClass); // Training. var trainData = this.ml.Data.LoadFromEnumerable(trainingData); this.model = trainingPipeline .Append(this.ml.Transforms.Conversion.MapKeyToValue("PredictedLabel")) .Fit(trainData); this.inputSchema = trainData.Schema; this.predictionEngine = this.ml.Model.CreatePredictionEngine <TInput, PredictionOutput>(model); }
private static Task <ITransformer> TrainAndGetBestModel(string FilePath) { return(Task.Factory.StartNew(() => { MLContext MLC = MLCProvider.Current; IDataView TrainingDataView = MLC.Data.LoadFromTextFile <BookRating>(FilePath, ',', true); TrainingDataView = MLC.Data.Cache(TrainingDataView); Console.WriteLine("=============== 正在读取训练数据文件 ==============="); EstimatorChain <ColumnConcatenatingTransformer> DataPipeLine = MLC.Transforms.Text.FeaturizeText("UserIdFeaturized", nameof(BookRating.UserId)) .Append(MLC.Transforms.Text.FeaturizeText("ISBNFeaturized", nameof(BookRating.ISBN))) .Append(MLC.Transforms.Text.FeaturizeText("AgeFeaturized", nameof(BookRating.Age))) .Append(MLC.Transforms.Concatenate("Features", "UserIdFeaturized", "ISBNFeaturized", "AgeFeaturized")); Console.WriteLine("=============== 正在使用交叉验证训练预测模型 ==============="); FieldAwareFactorizationMachineTrainer.Options Options = new FieldAwareFactorizationMachineTrainer.Options { Verbose = true, NumberOfIterations = 10, FeatureColumnName = "Features", Shuffle = true }; EstimatorChain <FieldAwareFactorizationMachinePredictionTransformer> TrainingPipeLine = DataPipeLine.Append(MLC.BinaryClassification.Trainers.FieldAwareFactorizationMachine(Options)); var CVResult = MLC.BinaryClassification.CrossValidate(TrainingDataView, TrainingPipeLine); return CVResult.OrderByDescending(t => t.Metrics.Accuracy).Select(r => r.Model).FirstOrDefault(); }, TaskCreationOptions.LongRunning)); }
public async Task <ModelMetrics> GenerateModel(BaseDAL storage, string modelFileName) { if (storage == null) { Log.Error("Trainer::GenerateModel - BaseDAL is null"); throw new ArgumentNullException(nameof(storage)); } if (string.IsNullOrEmpty(modelFileName)) { Log.Error("Trainer::GenerateModel - modelFileName is null"); throw new ArgumentNullException(nameof(modelFileName)); } if (!File.Exists(modelFileName)) { Log.Error($"Trainer::GenerateModel - {modelFileName} does not exist"); throw new FileNotFoundException(modelFileName); } var startTime = DateTime.Now; var options = new RandomizedPcaTrainer.Options { FeatureColumnName = FEATURES, ExampleWeightColumnName = null, Rank = 4, Oversampling = 20, EnsureZeroMean = true, Seed = Constants.ML_SEED }; var(data, cleanRowCount, maliciousRowCount) = GetDataView(await storage.QueryPacketsAsync(a => a.IsClean), await storage.QueryPacketsAsync(a => !a.IsClean)); IEstimator <ITransformer> dataProcessPipeline = _mlContext.Transforms.Concatenate( FEATURES, typeof(PayloadItem).ToPropertyList <PayloadItem>(nameof(PayloadItem.Label))); IEstimator <ITransformer> trainer = _mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options); EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer); TransformerChain <ITransformer> trainedModel = trainingPipeline.Fit(data.TrainSet); _mlContext.Model.Save(trainedModel, data.TrainSet.Schema, modelFileName); var testSetTransform = trainedModel.Transform(data.TestSet); return(new ModelMetrics { Metrics = _mlContext.AnomalyDetection.Evaluate(testSetTransform), NumCleanRows = cleanRowCount, NumMaliciousRows = maliciousRowCount, Duration = DateTime.Now.Subtract(startTime) }); }
public PredictionEngine <CenterFaceImageInput, CenterFaceImageOutput> GetMlNetPredictionEngine() { EstimatorChain <OnnxTransformer> pipeline = CreatePipeline(); IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable); TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData); return(mlContext.Model.CreatePredictionEngine <CenterFaceImageInput, CenterFaceImageOutput>(transformer)); }
public void SaveMLNetModel() { EstimatorChain <OnnxTransformer> pipeline = CreatePipeline(); IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable); TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData); mlContext.Model.Save(transformer, null, _mlModelDestn); }
/// <summary> /// 5. Define the model's training pipeline using DNN default values /// </summary> /// <param name="dataset"></param> /// <returns></returns> private EstimatorChain <KeyToValueMappingTransformer> CreateDefaultPipeline(IDataView dataset) { EstimatorChain <KeyToValueMappingTransformer> pipeline = mlContext.MulticlassClassification.Trainers. // The feature column name should has same name in ImageDataInMemory ImageClassification(labelColumnName: KeyColumn, featureColumnName: FeatureColumn, validationSet: dataset). Append(mlContext.Transforms.Conversion.MapKeyToValue(PredictedLabelColumn, PredictedLabelColumn)); return(pipeline); }
//OnlineGradientDescent, Gam - not work public static EstimatorChain <ColumnConcatenatingTransformer> BasePipelineBuilder(MLContext mlContext, IDataView trainData) { IDataView dataView = trainData; EstimatorChain <ColumnConcatenatingTransformer> basePipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "WaitTime") .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "SkillIdEncoded", inputColumnName: "SkillId")) .Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", "SkillIdEncoded", "AgentsOnCall", "QueueLength", "BaseAvailAgents", "R1AvailAgents", "R2AvailAgents", "R1Threshold", "R2Threshold")); return(basePipeline); }
public IPipelineChain BuildPipeline() { if (_predictedColumn == null) { throw new ArgumentNullException(nameof(_predictedColumn)); } if (_algorithmType == null) { throw new ArgumentNullException(nameof(_algorithmType)); } var keyMap = _predictedColumn.IsAlphanumeric ? MlContext.Transforms.Conversion.MapValueToKey(_predictedColumn.ColumnName) : null; var keyConversion = _predictedColumn.DataKind != null?MlContext.Transforms.Conversion.ConvertType(_predictedColumn.ColumnName, outputKind : _predictedColumn.DataKind.Value) : null; var keyColumn = MlContext.Transforms.CopyColumns("Label", _predictedColumn.ColumnName); if (_alphanumericColumns != null) { OneHotEncodingEstimator oneHotEncodingTransformer = null; EstimatorChain <OneHotEncodingTransformer> oneHotEncodingTransformerChain = null; if (_alphanumericColumns != null) { for (int i = 0; i < _alphanumericColumns.Length; i++) { if (oneHotEncodingTransformer == null) { oneHotEncodingTransformer = MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i]); } else if (oneHotEncodingTransformerChain == null) { oneHotEncodingTransformerChain = oneHotEncodingTransformer.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i])); } else { oneHotEncodingTransformerChain = oneHotEncodingTransformerChain.Append(MlContext.Transforms.Categorical.OneHotEncoding(_alphanumericColumns[i])); } } } var columnConcatenatingTransformer = oneHotEncodingTransformerChain?.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns)) ?? oneHotEncodingTransformer.Append(MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns)); _transformerChain = _predictedColumn.IsAlphanumeric ? keyMap.Append(keyColumn).Append(columnConcatenatingTransformer) : _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(columnConcatenatingTransformer) : keyColumn.Append(columnConcatenatingTransformer); } else { var featureColumn = MlContext.Transforms.Concatenate(_featureColumn, _concatenatedColumns); _estimatorChain = _predictedColumn.IsAlphanumeric ? keyMap.Append(keyColumn).Append(featureColumn) : _predictedColumn.DataKind != null?keyConversion.Append(keyColumn).Append(featureColumn) : keyColumn.Append(featureColumn); } return(this); }
private static void PreProcessData() { estimator = _context.Transforms.Conversion .MapValueToKey("Encoded_UserID", nameof(InputModel.UserId)) .Append(_context.Transforms.Conversion.MapValueToKey("Encoded_Book", nameof(InputModel.ISBN))); var preProcessData = estimator.Fit(_dataView).Transform(_dataView); splitData = _context.Data.TrainTestSplit(preProcessData, 0.05); }
/// <summary> /// constructor /// </summary> /// <param name="ngramLength"></param> public NGrams(int ngramLength = 3) { _mlContext = new MLContext(); _textPipeline = _mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text") // 'ProduceNgrams' takes key type as input. Converting the tokens into key type using 'MapValueToKey'. .Append(_mlContext.Transforms.Conversion.MapValueToKey("Tokens")) .Append(_mlContext.Transforms.Text.ProduceNgrams("NgramFeatures", "Tokens", ngramLength: ngramLength, useAllLengths: false, weighting: NgramExtractingEstimator.WeightingCriteria.Tf)); }
public CompositeReaderEstimator(IDataReaderEstimator <TSource, IDataReader <TSource> > start, EstimatorChain <TLastTransformer> estimatorChain = null) { Contracts.CheckValue(start, nameof(start)); Contracts.CheckValueOrNull(estimatorChain); _start = start; _estimatorChain = estimatorChain ?? new EstimatorChain <TLastTransformer>(); // REVIEW: enforce that estimator chain can read the reader's schema. // Right now it throws. // GetOutputSchema(); }
public SignalClassifierController(string frameSize, string sensorType, string[] datasets, string[] labels) { mlContext = new MLContext(); categories = labels; var reader = getFrameReader(frameSize, sensorType); var trainingDataView = reader.Load(datasets); var split = mlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2); estimatorPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label") .Append(mlContext.Transforms.NormalizeMinMax("readings", fixZero: true)) .Append(mlContext.MulticlassClassification.Trainers .OneVersusAll(mlContext.BinaryClassification.Trainers .FastTree(featureColumnName: "readings"))); // .Append(mlContext.MulticlassClassification.Trainers // .NaiveBayes(featureColumnName: "readings")); // .Append(mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LbfgsLogisticRegression(featureColumnName: "readings"))); // .Append(mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LdSvm(featureColumnName: "readings"))); transformer = estimatorPipeline.Fit(split.TrainSet); // var OVAEstimator = mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LbfgsLogisticRegression(featureColumnName: "readings")); // var OVAEstimator = mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LdSvm(featureColumnName: "readings")); // var NBEstimator = mlContext.MulticlassClassification.Trainers // .NaiveBayes(featureColumnName: "readings"); var OVAEstimator = mlContext.MulticlassClassification.Trainers .OneVersusAll(mlContext.BinaryClassification.Trainers .FastTree(featureColumnName: "readings")); var transformedTrainingData = transformer.Transform(split.TrainSet); model = OVAEstimator.Fit(transformedTrainingData); // model = NBEstimator.Fit(transformedTrainingData); Console.WriteLine("Model fitted"); var transformedTestData = transformer.Transform(split.TestSet); var testPredictions = model.Transform(transformedTestData); Console.WriteLine(mlContext.MulticlassClassification.Evaluate(testPredictions).ConfusionMatrix.GetFormattedConfusionTable()); }
public MLModel(List <Archive> trainingData, List <string> features) { _trainingData = trainingData; _context = new MLContext(seed: 1); var dataProcessPipeline = _context.Transforms.Text.FeaturizeText("Timestamp_tf", "Timestamp") .Append(_context.Transforms.Concatenate("Features", features.ToArray())); var trainer = _context.Regression.Trainers.FastTree(labelColumnName: "Volume", featureColumnName: "Features"); _pipeline = dataProcessPipeline.Append(trainer); }
public static IEstimator <ITransformer> InferTransforms(this TransformsCatalog catalog, IDataView data, string label) { var mlContext = new MLContext(); var suggestedTransforms = TransformInferenceApi.InferTransforms(mlContext, data, label); var estimators = suggestedTransforms.Select(s => s.Estimator); var pipeline = new EstimatorChain <ITransformer>(); foreach (var estimator in estimators) { pipeline = pipeline.Append(estimator); } return(pipeline); }
/// <summary> /// Show accuracy stats. /// </summary> /// <param name="trainer"></param> /// <param name="metrics"></param> private static void ShowAccuracyStats(EstimatorChain <KeyToValueMappingTransformer> trainer, MulticlassClassificationMetrics metrics) { Console.WriteLine($"************************************************************"); Console.WriteLine($"* Metrics for {trainer.ToString()} multi-class classification model "); Console.WriteLine($"*-----------------------------------------------------------"); Console.WriteLine($" AccuracyMacro = {metrics.MacroAccuracy.ToString(CultureInfo.CurrentCulture)}, a value between 0 and 1, the closer to 1, the better"); Console.WriteLine($" AccuracyMicro = {metrics.MicroAccuracy.ToString(CultureInfo.CurrentCulture)}, a value between 0 and 1, the closer to 1, the better"); Console.WriteLine($" LogLoss = {metrics.LogLoss.ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better"); Console.WriteLine($" LogLoss for class 1 = {metrics.PerClassLogLoss[0].ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better"); Console.WriteLine($" LogLoss for class 2 = {metrics.PerClassLogLoss[1].ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better"); Console.WriteLine($" LogLoss for class 3 = {metrics.PerClassLogLoss[2].ToString(CultureInfo.CurrentCulture)}, the closer to 0, the better"); Console.WriteLine($"************************************************************"); }
public void EstimatorChain_should_generate_code() { var trainer = this.GetTrainerEstimator(); var transformer = this.GetTransformerEstimator(); var estimatorChain = new EstimatorChain() { trainer, transformer, }; Approvals.Verify(estimatorChain.GeneratorCode()); }
private ITransformer BuildAndTrainUsingParams(ColumnEnum column) { List <string> features = new List <string>(); TextFeaturizingEstimator textTransformer = null; EstimatorChain <ColumnConcatenatingTransformer> estimatorColumn = null; EstimatorChain <ITransformer> estimatorTransformer = null; if (_includeDay) { textTransformer = _mlContext.Transforms.Text.FeaturizeText("DayString", "Day"); features.Add("DayString"); } if (_includeMonth) { if (textTransformer != null) { estimatorTransformer = textTransformer.Append(_mlContext.Transforms.Text.FeaturizeText("MonthString", "Month")); } else { textTransformer = _mlContext.Transforms.Text.FeaturizeText("MonthString", "Month"); } features.Add("MonthString"); } if (_includeWeek) { features.Add("Week"); } if (textTransformer == null) { var res = _mlContext.Transforms.Concatenate("Features", features.ToArray()) .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column))) .Append(_mlContext.Regression.Trainers.FastTreeTweedie()); return(res.Fit(_trainData)); } if (estimatorTransformer != null) { var res2 = estimatorTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray())) .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column))) .Append(_mlContext.Regression.Trainers.FastTreeTweedie()); return(res2.Fit(_trainData)); } var res3 = textTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray())) .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column))) .Append(_mlContext.Regression.Trainers.FastTreeTweedie()); return(res3.Fit(_trainData)); }
public static ITransformer TrainModel(MLContext mlContext, IDataView trainDataView) { // Get all the feature column names (All except the Label and the IdPreservationColumn) string[] featureColumnNames = trainDataView.Schema.AsQueryable() .Select(column => column.Name) // Get all the column names .Where(name => name != nameof(TransactionObservation.Label)) // Do not include the Label column .Where(name => name != "IdPreservationColumn") // Do not include the IdPreservationColumn/StratificationColumn .Where(name => name != nameof(TransactionObservation.Time)) // Do not include the Time column. Not needed as feature column .ToArray(); // Create the data process pipeline IEstimator <ITransformer> dataProcessPipeline = mlContext.Transforms.Concatenate("Features", featureColumnNames) .Append(mlContext.Transforms.DropColumns(new string[] { nameof(TransactionObservation.Time) })) .Append(mlContext.Transforms.NormalizeLpNorm(outputColumnName: "NormalizedFeatures", inputColumnName: "Features")); // In Anomaly Detection, the learner assumes all training examples have label 0, as it only learns from normal examples. // If any of the training examples has label 1, it is recommended to use a Filter transform to filter them out before training: IDataView normalTrainDataView = mlContext.Data.FilterRowsByColumn(trainDataView, columnName: nameof(TransactionObservation.Label), lowerBound: 0, upperBound: 1); // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features" ConsoleHelper.PeekDataViewInConsole(mlContext, normalTrainDataView, dataProcessPipeline, 2); ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "NormalizedFeatures", normalTrainDataView, dataProcessPipeline, 2); var options = new RandomizedPcaTrainer.Options { FeatureColumnName = "NormalizedFeatures", // The name of the feature column. The column data must be a known-sized vector of Single. ExampleWeightColumnName = null, // The name of the example weight column (optional). To use the weight column, the column data must be of type Single. Rank = 28, // The number of components in the PCA. Oversampling = 20, // Oversampling parameter for randomized PCA training. EnsureZeroMean = true, // If enabled, data is centered to be zero mean. Seed = 1 // The seed for random number generation. }; // Create an anomaly detector. Its underlying algorithm is randomized PCA. IEstimator <ITransformer> trainer = mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options); EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer); ConsoleHelper.ConsoleWriteHeader("=============== Training model ==============="); TransformerChain <ITransformer> model = trainingPipeline.Fit(normalTrainDataView); ConsoleHelper.ConsoleWriteHeader("=============== End of training process ==============="); return(model); }
public void Predict(string imagepath) { EstimatorChain <OnnxTransformer> pipeline = CreatePipeline(); IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable); TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData); IDataView emptyTestData = mlContext.Data.LoadFromEnumerable(new List <CenterFaceImageInput>() { new CenterFaceImageInput() { Image = (Bitmap)Bitmap.FromFile(imagepath) } }); var res = transformer.Transform(emptyTestData); }
protected void Page_Load(object sender, EventArgs e) { _mlContext = null; _model = null; _trainingPipeline = null; _trainer = null; _data = null; if (!IsPostBack) { if (!String.IsNullOrEmpty(Request.QueryString["pSql"]) && !String.IsNullOrEmpty(Request.QueryString["pLab"])) { // Query string value is there so now use it pSql = Convert.ToString(Request.QueryString["pSql"]); iLabel = Convert.ToInt32(Request.QueryString["pLab"]); } if (pSql != "") { if (iLabel == 0 || iLabel == 1) { Debug.Print("pSql: " + pSql + ", pLab: " + iLabel.ToString()); bUrlRequest = true; // Get setup state and trinign data file state GetSetup(); // Call Use Model WithS ingleItem UseModelWithSingleItem(txtResult, pSql); // Auto close pages if want to test a lots url requets //ClosePages(); } else { iLabel = -1; bUrlRequest = false; } } else { bUrlRequest = false; } } else // Input fields and submit button used on site, so we haven't label value { iLabel = -1; } }
private void buildAndTrainModel(IDataView data, EstimatorChain <ITransformer> pipeline) { var trainingPipeline = _mlContext.Transforms.Conversion .MapValueToKey(inputColumnName: nameof(MLEntry.Answer), outputColumnName: "Label") .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(MLEntry.Question), outputColumnName: "QuestionFeaturized")) .AppendCacheCheckpoint(_mlContext) .Append(_mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "QuestionFeaturized")) .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); _trainedModel = trainingPipeline.Fit(data); _predictionEngine = _mlContext.Model.CreatePredictionEngine <MLEntry, AnswerPrediction>(_trainedModel); }
private ITransformer GetModel(AlgorithmType?algorithmType, EstimatorChain <TransformerChain <ColumnConcatenatingTransformer> > pipeline) { if (_predictedColumn.IsAlphanumeric) { return(algorithmType != null ? pipeline.Append(GetAlgorithm(algorithmType.Value)).Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView) : pipeline.Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView)); } if (algorithmType != null) { return(pipeline.Append(GetAlgorithm(algorithmType.Value)).Fit(DataView)); } return(pipeline.Fit(DataView)); }