public SignalClassifierController(string frameSize, string sensorType, string[] datasets, string[] labels) { mlContext = new MLContext(); categories = labels; var reader = getFrameReader(frameSize, sensorType); var trainingDataView = reader.Load(datasets); var split = mlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2); estimatorPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label") .Append(mlContext.Transforms.NormalizeMinMax("readings", fixZero: true)) .Append(mlContext.MulticlassClassification.Trainers .OneVersusAll(mlContext.BinaryClassification.Trainers .FastTree(featureColumnName: "readings"))); // .Append(mlContext.MulticlassClassification.Trainers // .NaiveBayes(featureColumnName: "readings")); // .Append(mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LbfgsLogisticRegression(featureColumnName: "readings"))); // .Append(mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LdSvm(featureColumnName: "readings"))); transformer = estimatorPipeline.Fit(split.TrainSet); // var OVAEstimator = mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LbfgsLogisticRegression(featureColumnName: "readings")); // var OVAEstimator = mlContext.MulticlassClassification.Trainers // .OneVersusAll(mlContext.BinaryClassification.Trainers // .LdSvm(featureColumnName: "readings")); // var NBEstimator = mlContext.MulticlassClassification.Trainers // .NaiveBayes(featureColumnName: "readings"); var OVAEstimator = mlContext.MulticlassClassification.Trainers .OneVersusAll(mlContext.BinaryClassification.Trainers .FastTree(featureColumnName: "readings")); var transformedTrainingData = transformer.Transform(split.TrainSet); model = OVAEstimator.Fit(transformedTrainingData); // model = NBEstimator.Fit(transformedTrainingData); Console.WriteLine("Model fitted"); var transformedTestData = transformer.Transform(split.TestSet); var testPredictions = model.Transform(transformedTestData); Console.WriteLine(mlContext.MulticlassClassification.Evaluate(testPredictions).ConfusionMatrix.GetFormattedConfusionTable()); }
private static async Task PerformStep5(TransformerChain <RegressionPredictionTransformer <LinearRegressionPredictor> > trainedModel, IDataView testDataView, MLContext mlContext) { var predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, "Label", "Score"); await PrintRegressionMetrics("", metrics); }
public IDataView Transform(IDataView input) { _host.CheckValue(input, nameof(input)); var chain = new TransformerChain <ITransformer>(HashingTransformer, CountTable); return(chain.Transform(input)); }
private void Metrics() { var testHouseDataView = mlContext.Data.LoadFromEnumerable(testHouseData); var testPriceDataView = model.Transform(testHouseDataView); metrics = mlContext.Regression.Evaluate(testPriceDataView, labelColumnName: "Price"); }
public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); _predictionEngine = _trainedModel.CreatePredictionEngine <IrisData, IrisPrediction>(_env); _consumer.Consume(_predictionEngine.Predict(_example)); var reader = new TextLoader(_env, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("PetalLength", DataKind.R4, 3), new TextLoader.Column("PetalWidth", DataKind.R4, 4), }, hasHeader: true ); IDataView testData = reader.Read(_dataPath); IDataView scoredTestData = _trainedModel.Transform(testData); var evaluator = new MultiClassClassifierEvaluator(_env, new MultiClassClassifierEvaluator.Arguments()); _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); _batches = new IrisData[_batchSizes.Length][]; for (int i = 0; i < _batches.Length; i++) { var batch = new IrisData[_batchSizes[i]]; for (int bi = 0; bi < batch.Length; bi++) { batch[bi] = _example; } _batches[i] = batch; } }
public async Task <ModelMetrics> GenerateModel(BaseDAL storage, string modelFileName) { if (storage == null) { Log.Error("Trainer::GenerateModel - BaseDAL is null"); throw new ArgumentNullException(nameof(storage)); } if (string.IsNullOrEmpty(modelFileName)) { Log.Error("Trainer::GenerateModel - modelFileName is null"); throw new ArgumentNullException(nameof(modelFileName)); } if (!File.Exists(modelFileName)) { Log.Error($"Trainer::GenerateModel - {modelFileName} does not exist"); throw new FileNotFoundException(modelFileName); } var startTime = DateTime.Now; var options = new RandomizedPcaTrainer.Options { FeatureColumnName = FEATURES, ExampleWeightColumnName = null, Rank = 4, Oversampling = 20, EnsureZeroMean = true, Seed = Constants.ML_SEED }; var(data, cleanRowCount, maliciousRowCount) = GetDataView(await storage.QueryPacketsAsync(a => a.IsClean), await storage.QueryPacketsAsync(a => !a.IsClean)); IEstimator <ITransformer> dataProcessPipeline = _mlContext.Transforms.Concatenate( FEATURES, typeof(PayloadItem).ToPropertyList <PayloadItem>(nameof(PayloadItem.Label))); IEstimator <ITransformer> trainer = _mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options); EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer); TransformerChain <ITransformer> trainedModel = trainingPipeline.Fit(data.TrainSet); _mlContext.Model.Save(trainedModel, data.TrainSet.Schema, modelFileName); var testSetTransform = trainedModel.Transform(data.TestSet); return(new ModelMetrics { Metrics = _mlContext.AnomalyDetection.Evaluate(testSetTransform), NumCleanRows = cleanRowCount, NumMaliciousRows = maliciousRowCount, Duration = DateTime.Now.Subtract(startTime) }); }
/// <summary> /// Evaluate the model. /// </summary> /// <param name="mlContext"></param> /// <param name="testDataView"></param> /// <param name="trainedModel"></param> /// <returns>Accuracy stats</returns> private static RegressionMetrics Evaluate(MLContext mlContext, IDataView testDataView, TransformerChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > trainedModel) { Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); IDataView predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score"); return(metrics); }
public void WriteLine(string line) { if ((line = transformerChain.Transform(line)) == null) { return; } outputStream.WriteLine(line); }
internal static async Task Main(string[] args) { if (args is null) { throw new ArgumentNullException(nameof(args)); } IConfigurationRoot?configuration = new ConfigurationBuilder() .AddJsonFile("appsettings.json") .AddEnvironmentVariables() .AddUserSecrets(typeof(Program).Assembly) .Build(); OwntracksSettings? owntracksSettings = configuration.GetSection("Owntracks").Get <OwntracksSettings>(); IEnumerable <ML.Location>?locations = await QueryDataAsync(owntracksSettings).ConfigureAwait(false); var mlContext = new MLContext(seed: 1); IDataView?dataView = mlContext.Data.LoadFromEnumerable(LocationsToTransitionsConverter.Convert(locations)); DataOperationsCatalog.TrainTestData trainTestData = mlContext.Data.TrainTestSplit(dataView); EstimatorChain <Microsoft.ML.Calibrators.CalibratorTransformer <Microsoft.ML.Calibrators.PlattCalibrator> >?pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair(nameof(Transition.User), nameof(Transition.User)), new InputOutputColumnPair(nameof(Transition.Device), nameof(Transition.Device)), new InputOutputColumnPair(nameof(Transition.FromDayOfWeek), nameof(Transition.FromDayOfWeek)), new InputOutputColumnPair(nameof(Transition.FromHours), nameof(Transition.FromHours)), new InputOutputColumnPair(nameof(Transition.FromGeohash), nameof(Transition.FromGeohash)), new InputOutputColumnPair(nameof(Transition.ToDayOfWeek), nameof(Transition.ToDayOfWeek)), new InputOutputColumnPair(nameof(Transition.ToHours), nameof(Transition.ToHours)), new InputOutputColumnPair(nameof(Transition.ToGeohash), nameof(Transition.ToGeohash)) }, OneHotEncodingEstimator.OutputKind.Binary) .Append(mlContext.Transforms.Concatenate("Features", nameof(Transition.User), nameof(Transition.Device), nameof(Transition.FromDayOfWeek), nameof(Transition.FromHours), nameof(Transition.FromGeohash), nameof(Transition.ToDayOfWeek), nameof(Transition.ToHours), nameof(Transition.ToGeohash))) .Append(mlContext.BinaryClassification.Trainers.LinearSvm()) .Append(mlContext.BinaryClassification.Calibrators.Platt()); Console.WriteLine("Training model..."); TransformerChain <Microsoft.ML.Calibrators.CalibratorTransformer <Microsoft.ML.Calibrators.PlattCalibrator> >?model = pipeline.Fit(trainTestData.TrainSet); Console.WriteLine("Predicting..."); // Now that the model is trained, we want to test it's prediction results, which is done by using a test dataset IDataView?predictions = model.Transform(trainTestData.TestSet); // Now that we have the predictions, calculate the metrics of those predictions and output the results. CalibratedBinaryClassificationMetrics?metrics = mlContext.BinaryClassification.Evaluate(predictions); PrintBinaryClassificationMetrics(metrics); MLSettings?mlSettings = configuration.GetSection("ML").Get <MLSettings>(); mlContext.Model.Save(model, dataView.Schema, mlSettings.ModelPath); }
public List <PredictionModel> Test(List <DataPoint> testDataPoints, bool printConfusionMatrix = true) { if (!IsModelTrained) { throw new Exception("Cannot test on untrained model"); } IDataView testData = _mlContext.Data.LoadFromEnumerable(testDataPoints); IDataView transformedTestData = _trainedModel.Transform(testData); List <PredictionModel> predictions = _mlContext.Data .CreateEnumerable <PredictionModel>(transformedTestData, reuseRowObject: false).ToList(); if (printConfusionMatrix) { PrintMetrics(_mlContext.MulticlassClassification.Evaluate(transformedTestData)); } return(predictions); }
public void Predict(string imagepath) { EstimatorChain <OnnxTransformer> pipeline = CreatePipeline(); IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable); TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData); IDataView emptyTestData = mlContext.Data.LoadFromEnumerable(new List <CenterFaceImageInput>() { new CenterFaceImageInput() { Image = (Bitmap)Bitmap.FromFile(imagepath) } }); var res = transformer.Transform(emptyTestData); }
public void Evaluate() { IDataView dataView = _textLoader.Read(_testDataPath); var predictions = _model.Transform(dataView); var metrics = _mlContext.Regression.Evaluate(predictions, "Label", "Score"); Console.WriteLine(); Console.WriteLine($"*************************************************"); Console.WriteLine($"* Model quality metrics evaluation "); Console.WriteLine($"*------------------------------------------------"); Console.WriteLine($"* R2 Score: {metrics.RSquared:0.##}"); Console.WriteLine($"* RMS loss: {metrics.Rms:#.##}"); }
public void Train(string trainingFileName, string testingFileName) { if (!File.Exists(trainingFileName)) { Console.WriteLine($"Failed to find training data file ({trainingFileName}"); return; } if (!File.Exists(testingFileName)) { Console.WriteLine($"Failed to find test data file ({testingFileName}"); return; } var trainingDataView = GetDataView(trainingFileName); var options = new RandomizedPcaTrainer.Options { FeatureColumnName = FEATURES, ExampleWeightColumnName = null, Rank = 5, Oversampling = 20, EnsureZeroMean = true, Seed = 1 }; IEstimator <ITransformer> trainer = MlContext.AnomalyDetection.Trainers.RandomizedPca(options: options); EstimatorChain <ITransformer> trainingPipeline = trainingDataView.Transformer.Append(trainer); TransformerChain <ITransformer> trainedModel = trainingPipeline.Fit(trainingDataView.DataView); MlContext.Model.Save(trainedModel, trainingDataView.DataView.Schema, ModelPath); var testingDataView = GetDataView(testingFileName, true); var testSetTransform = trainedModel.Transform(testingDataView.DataView); var modelMetrics = MlContext.AnomalyDetection.Evaluate(testSetTransform); Console.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" + $"Detection at FP Count: {modelMetrics.DetectionRateAtFalsePositiveCount}"); }
public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); _predictionEngine = _mlContext.Model.CreatePredictionEngine <IrisData, IrisPrediction>(_trainedModel); _consumer.Consume(_predictionEngine.Predict(_example)); // Create text loader. var options = new TextLoader.Options() { Columns = new[] { new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column("SepalLength", DataKind.Single, 1), new TextLoader.Column("SepalWidth", DataKind.Single, 2), new TextLoader.Column("PetalLength", DataKind.Single, 3), new TextLoader.Column("PetalWidth", DataKind.Single, 4), }, HasHeader = true, }; var loader = new TextLoader(_mlContext, options: options); IDataView testData = loader.Load(_dataPath); _scoredIrisTestData = _trainedModel.Transform(testData); _evaluator = new MulticlassClassificationEvaluator(_mlContext, new MulticlassClassificationEvaluator.Arguments()); _metrics = _evaluator.Evaluate(_scoredIrisTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); _batches = new IrisData[_batchSizes.Length][]; for (int i = 0; i < _batches.Length; i++) { var batch = new IrisData[_batchSizes[i]]; for (int bi = 0; bi < batch.Length; bi++) { batch[bi] = _example; } _batches[i] = batch; } }
public IDataView Transform(IDataView input) => _transformer.Transform(input);
public void PredictIrisBatchOf1() => _trainedModel.Transform(_mlContext.Data.LoadFromEnumerable(_batches[0]));
public void PredictIrisBatchOf1() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[0]));
internal static ITransformer Create(IHostEnvironment env, Options options, IDataView input, TermLoaderArguments termLoaderArgs = null) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(LoaderSignature); h.CheckValue(options, nameof(options)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns), "Columns must be specified"); var chain = new TransformerChain <ITransformer>(); var termCols = new List <Column>(); var isTermCol = new bool[options.Columns.Length]; for (int i = 0; i < options.Columns.Length; i++) { var col = options.Columns[i]; h.CheckNonWhiteSpace(col.Name, nameof(col.Name)); h.CheckNonWhiteSpace(col.Source, nameof(col.Source)); int colId; if (input.Schema.TryGetColumnIndex(col.Source, out colId) && input.Schema[colId].Type.GetItemType() is TextDataViewType) { termCols.Add(col); isTermCol[i] = true; } } // If the column types of args.column are text, apply term transform to convert them to keys. // Otherwise, skip term transform and apply n-gram transform directly. // This logic allows NgramExtractorTransform to handle both text and key input columns. // Note: n-gram transform handles the validation of the types natively (in case the types // of args.column are not text nor keys). if (termCols.Count > 0) { var columnOptions = new List <ValueToKeyMappingEstimator.ColumnOptionsBase>(); string[] missingDropColumns = termLoaderArgs != null && termLoaderArgs.DropUnknowns ? new string[termCols.Count] : null; for (int iinfo = 0; iinfo < termCols.Count; iinfo++) { var column = termCols[iinfo]; var colOptions = new ValueToKeyMappingEstimator.ColumnOptions( column.Name, column.Source, maximumNumberOfKeys: Utils.Size(column.MaxNumTerms) > 0 ? column.MaxNumTerms[0] : Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] : termLoaderArgs == null ? NgramExtractingEstimator.Defaults.MaximumNgramsCount : int.MaxValue, keyOrdinality: termLoaderArgs?.Sort ?? ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence); if (termLoaderArgs != null) { colOptions.Key = termLoaderArgs.Term; colOptions.Keys = termLoaderArgs.Terms; } columnOptions.Add(colOptions); if (missingDropColumns != null) { missingDropColumns[iinfo] = column.Name; } } IDataView keyData = null; if (termLoaderArgs?.DataFile != null) { using (var ch = env.Start("Create key data view")) keyData = ValueToKeyMappingTransformer.GetKeyDataViewOrNull(env, ch, termLoaderArgs.DataFile, termLoaderArgs.TermsColumn, termLoaderArgs.Loader, out var autoConvert); } chain = chain.Append <ITransformer>(new ValueToKeyMappingEstimator(h, columnOptions.ToArray(), keyData).Fit(input)); if (missingDropColumns != null) { chain = chain.Append <ITransformer>(new MissingValueDroppingTransformer(h, missingDropColumns.Select(x => (x, x)).ToArray())); } } var ngramColumns = new NgramExtractingEstimator.ColumnOptions[options.Columns.Length]; for (int iinfo = 0; iinfo < options.Columns.Length; iinfo++) { var column = options.Columns[iinfo]; ngramColumns[iinfo] = new NgramExtractingEstimator.ColumnOptions(column.Name, column.NgramLength ?? options.NgramLength, column.SkipLength ?? options.SkipLength, column.UseAllLengths ?? options.UseAllLengths, column.Weighting ?? options.Weighting, column.MaxNumTerms ?? options.MaxNumTerms, isTermCol[iinfo] ? column.Name : column.Source ); } input = chain.Transform(input); return(chain.Append <ITransformer>(new NgramExtractingEstimator(env, ngramColumns).Fit(input))); }
public PredictionResult predict(string frameSize, string sensorType, short[] data) { // var dataView = mlContext.Data.LoadFromEnumerable(getFrameEnumerable(frameSize, sensorType, data)); IDataView dataView; if (sensorType == "four") { switch (frameSize) { case "half": dataView = mlContext.Data.LoadFromEnumerable(Enumerable.Repeat( new FourSensorsHalfFrame() { readings = data.Select(d => (float)d).ToArray() }, 1 )); break; case "double": dataView = mlContext.Data.LoadFromEnumerable(Enumerable.Repeat( new FourSensorsDoubleFrame() { readings = data.Select(d => (float)d).ToArray() }, 1 )); break; default: dataView = mlContext.Data.LoadFromEnumerable(Enumerable.Repeat( new FourSensorsSingleFrame() { readings = data.Select(d => (float)d).ToArray() }, 1 )); break; } } else { switch (frameSize) { case "half": dataView = mlContext.Data.LoadFromEnumerable(Enumerable.Repeat( new TwoSensorsHalfFrame() { readings = data.Select(d => (float)d).ToArray() }, 1 )); break; case "double": dataView = mlContext.Data.LoadFromEnumerable(Enumerable.Repeat( new TwoSensorsDoubleFrame() { readings = data.Select(d => (float)d).ToArray() }, 1 )); break; default: dataView = mlContext.Data.LoadFromEnumerable(Enumerable.Repeat( new TwoSensorsSingleFrame() { readings = data.Select(d => (float)d).ToArray() }, 1 )); break; } } var transformedData = transformer.Transform(dataView); var predictedData = model.Transform(transformedData); var predictions = mlContext.Data.CreateEnumerable <Prediction>(predictedData, reuseRowObject: false).ToArray(); //Console.WriteLine(predictions[0].PredictedLabel); //var test = transformedData.GetColumn<Vector<Single>>(transformedData.Schema.GetColumnOrNull("Score").Value); var predictedIndex = predictions[0].PredictedLabel - 1; return(new PredictionResult(categories[predictedIndex], predictions[0].Score)); //"Predicted: " + categories[predictedIndex] + "\n" + // "Scores: " + predictions[0].Score[0] + ";" + predictions[0].Score[1] + ";" + // + predictions[0].Score[2] + ";" + predictions[0].Score[3] + "\n\n"; }
private static (double microAccuracy, double macroAccuracy) Evaluate(MLContext ml, IDataView testDataView, TransformerChain <Microsoft.ML.Transforms.KeyToValueMappingTransformer> trainedModel) { var testMetrics = ml.MulticlassClassification.Evaluate(trainedModel.Transform(testDataView)); return(testMetrics.MicroAccuracy, testMetrics.MacroAccuracy); }
private static void TrainModel() { MLContext context = new MLContext(0); IDataView dataView = context.Data.LoadFromTextFile <LanguageSentence>(@"data.corpus"); DataOperationsCatalog.TrainTestData data = context.Data.TrainTestSplit(dataView, 0.2D); EstimatorChain <KeyToValueMappingTransformer> pipeline = context.Transforms.Conversion.MapValueToKey("Label", nameof(LanguageSentence.Label)) .Append(context.Transforms.Text.FeaturizeText("Features", nameof(LanguageSentence.Sentence))) .AppendCacheCheckpoint(context) .Append(context.MulticlassClassification.Trainers.SdcaMaximumEntropy()) .Append(context.Transforms.Conversion.MapKeyToValue("PredictedLabel")); TransformerChain <KeyToValueMappingTransformer> model = pipeline.Fit(data.TrainSet); Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Starting time: {DateTime.Now.ToString()} ==============="); MulticlassClassificationMetrics testMetrics = context.MulticlassClassification.Evaluate(model.Transform(data.TestSet)); Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Ending time: {DateTime.Now.ToString(CultureInfo.InvariantCulture)} ==============="); Console.WriteLine($"*************************************************************************************************************"); Console.WriteLine($"* Metrics for Multi-class Classification model - Test Data "); Console.WriteLine($"*------------------------------------------------------------------------------------------------------------"); Console.WriteLine($"* MicroAccuracy: {testMetrics.MicroAccuracy:0.###}"); Console.WriteLine($"* MacroAccuracy: {testMetrics.MacroAccuracy:0.###}"); Console.WriteLine($"* LogLoss: {testMetrics.LogLoss:#.###}"); Console.WriteLine($"* LogLossReduction: {testMetrics.LogLossReduction:#.###}"); Console.WriteLine($"*************************************************************************************************************"); context.Model.Save(model, data.TrainSet.Schema, @"language-detection.model"); }
public void PredictIrisBatchOf1() => _trainedModel.Transform(_env.Data.ReadFromEnumerable(_batches[0]));