public void Train() { LocalEnvironment = new LocalEnvironment(); string dataPath = "Data//PassengerData.txt"; var reader = new TextLoader(LocalEnvironment, new TextLoader.Arguments() { Separator = ",", HasHeader = true, Column = new[] { new TextLoader.Column("Gender", DataKind.R4, 0), new TextLoader.Column("Nationality", DataKind.R4, 1), new TextLoader.Column("year", DataKind.R4, 2), new TextLoader.Column("Label", DataKind.Text, 3) } }); IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath)); var pipeline = new TermEstimator(LocalEnvironment, "Label", "Label") .Append(new ConcatEstimator(LocalEnvironment, "Features", "Gender", "Nationality", "year")) .Append(new SdcaMultiClassTrainer(LocalEnvironment, new SdcaMultiClassTrainer.Arguments())) .Append(new KeyToValueEstimator(LocalEnvironment, "PredictedLabel")); Model = pipeline.Fit(trainingDataView); }
private static void SaveModelAsFile(LocalEnvironment env, TransformerChain <BinaryPredictionTransformer <Microsoft.ML.Runtime.Internal.Internallearn.IPredictorWithFeatureWeights <float> > > model) { using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write)) model.SaveTo(env, fs); Console.WriteLine("The model is saved to {0}", ModelPath); }
private static void SaveModelAsFile(LocalEnvironment env, TransformerChain <ClusteringPredictionTransformer <KMeansPredictor> > model) { using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write)) model.SaveTo(env, fs); Console.WriteLine("The model is saved to {0}", ModelPath); }
private static void PredictWithModelLoadedFromFile(IrisData sampleData) { // Test with Loaded Model from .zip file using (var env = new LocalEnvironment()) { ITransformer loadedModel; using (var stream = new FileStream(ModelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) { loadedModel = TransformerChain.LoadFrom(env, stream); } // Create prediction engine and make prediction. var prediction = loadedModel.MakePredictionFunction <IrisData, IrisPrediction>(env).Predict( new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }); Console.WriteLine(); Console.WriteLine($"Clusters assigned for setosa flowers:" + prediction.SelectedClusterId); } }
public IDataView Transform(IDataView input) { _host.CheckValue(input, nameof(input)); var chain = new TransformerChain <ITransformer>(HashingTransformer, CountTable); return(chain.Transform(input)); }
public static object GetPredictionEngine(MLTypesGenerator typesGenerator, TransformerChain <Microsoft.ML.Transforms.KeyToValueMappingTransformer> model) { var methodInfo = typesGenerator.GeneratorType.GetMethod("GetPredictionEngine"); var predictionEngine = methodInfo.Invoke(null, new object[] { model }); return(predictionEngine); }
IRowToRowMapper ITransformer.GetRowToRowMapper(DataViewSchema inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); ITransformer chain = new TransformerChain <ITransformer>(HashingTransformer, CountTable); return(chain.GetRowToRowMapper(inputSchema)); }
public DataViewSchema GetOutputSchema(DataViewSchema inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); var chain = new TransformerChain <ITransformer>(HashingTransformer, CountTable); return(chain.GetOutputSchema(inputSchema)); }
public void Train() { var options = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = "UserIdEncoded", MatrixRowIndexColumnName = "PostIdEncoded", LabelColumnName = "Label", NumberOfIterations = 20, ApproximationRank = 100 }; // set up a training pipeline // step 1: map userId and movieId to keys var pipeline = _mlContext .Transforms .Conversion .MapValueToKey( inputColumnName: "UserId", outputColumnName: "UserIdEncoded") .Append(_mlContext.Transforms.Conversion .MapValueToKey( inputColumnName: "PostId", outputColumnName: "PostIdEncoded") // step 2: find recommendations using matrix factorization .Append(_mlContext.Recommendation().Trainers.MatrixFactorization(options))); // train the model Console.WriteLine("Training the model..."); Model = pipeline.Fit(TrainingData); }
public void Train() { // If working in Visual Studio, make sure the 'Copy to Output Directory' // property of iris-data.txt is set to 'Copy always' string dataPath = "IrisClassification/iris.data.txt"; var reader = new TextLoader(_env, new TextLoader.Arguments() { Separator = ",", HasHeader = true, Column = new[] { new TextLoader.Column("SepalLength", DataKind.R4, 0), new TextLoader.Column("SepalWidth", DataKind.R4, 1), new TextLoader.Column("PetalLength", DataKind.R4, 2), new TextLoader.Column("PetalWidth", DataKind.R4, 3), new TextLoader.Column("Label", DataKind.Text, 4) } }); IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath)); // STEP 3: Transform your data and add a learner // Assign numeric values to text in the "Label" column, because only // numbers can be processed during model training. // Add a learning algorithm to the pipeline. e.g.(What type of iris is this?) // Convert the Label back into original text (after converting to number in step 3) var pipeline = new TermEstimator(_env, "Label", "Label") .Append(new ConcatEstimator(_env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")) .Append(new SdcaMultiClassTrainer(_env, new SdcaMultiClassTrainer.Arguments())) .Append(new KeyToValueEstimator(_env, "PredictedLabel")); // STEP 4: Train your model based on the data set _model = pipeline.Fit(trainingDataView); }
public static float carregar(float Prioridade, float Punicao, float Dispensa, float UltimoDiaSemana, float DiaAtualSemanaServico) { string pathModel = "model.pb"; if (!File.Exists(pathModel)) { WaitForm waitForm = new WaitForm("Modelo em treinamento...", ServicoPredicaoControl.criar_e_treinar); waitForm.ShowDialog(); } FileStream fs = File.OpenRead(pathModel); var env = new LocalEnvironment(); var model = TransformerChain.LoadFrom(env, fs); var prediction = model.MakePredictionFunction<ModeloPredicao, ServicoPredicao>(env).Predict( new ModeloPredicao() { prioridade = Prioridade, punicao = Punicao, dispensa = Dispensa, ultimoDiaSemana = UltimoDiaSemana, diaAtualSemanaServico = DiaAtualSemanaServico } ); fs.Close(); return prediction.PredictedLabels; }
public void EvaluateStaticApi() { ConsoleWriteHeader("Loading model"); Console.WriteLine($"Model loaded: {modelLocation}"); // Load the model ITransformer loadedModel; using (var f = new FileStream(modelLocation, FileMode.Open)) loadedModel = TransformerChain.LoadFrom(env, f); // Make prediction function (input = ImageNetData, output = ImageNetPrediction) var predictor = loadedModel.MakePredictionFunction <ImageNetData, ImageNetPrediction>(env); // Read csv file into List<ImageNetData> var testData = ImageNetData.ReadFromCsv(dataLocation, imagesFolder).ToList(); ConsoleWriteHeader("Making classifications"); // There is a bug (https://github.com/dotnet/machinelearning/issues/1138), // that always buffers the response from the predictor // so we have to make a copy-by-value op everytime we get a response // from the predictor testData .Select(td => new { td, pred = predictor.Predict(td) }) .Select(pr => (pr.td.ImagePath, pr.pred.PredictedLabelValue, pr.pred.Score)) .ToList() .ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max())); }
public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); _predictionEngine = _trainedModel.CreatePredictionEngine <IrisData, IrisPrediction>(_env); _consumer.Consume(_predictionEngine.Predict(_example)); var reader = new TextLoader(_env, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("PetalLength", DataKind.R4, 3), new TextLoader.Column("PetalWidth", DataKind.R4, 4), }, hasHeader: true ); IDataView testData = reader.Read(_dataPath); IDataView scoredTestData = _trainedModel.Transform(testData); var evaluator = new MultiClassClassifierEvaluator(_env, new MultiClassClassifierEvaluator.Arguments()); _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); _batches = new IrisData[_batchSizes.Length][]; for (int i = 0; i < _batches.Length; i++) { var batch = new IrisData[_batchSizes[i]]; for (int bi = 0; bi < batch.Length; bi++) { batch[bi] = _example; } _batches[i] = batch; } }
public void ReadKeyboard(TransformerChain <KeyToValueMappingTransformer> model) { Console.WriteLine("Ingrese la letra s para salir..."); bool cicle = true; while (cicle) { var option = ValidateInput(); if (option.ToUpper() == "S") { cicle = false; } else { float valueUser = float.Parse(option); var prediction = mlContext.Model.CreatePredictionEngine <DataEntity, DataPredictionEntity>(model).Predict( new DataEntity() { Num = valueUser }); Console.WriteLine($"Tipo predecido: {prediction.PredictedLabels}"); } } }
private static ITransformer LoadModel() { using (var stream = new FileStream(ModelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) { return(TransformerChain.LoadFrom(s_environment, stream)); } }
public void TestEstimatorSaveLoad() { using (var env = new ConsoleEnvironment()) { var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = env.CreateLoader("Text{col=ImagePath:TX:0 col=Name:TX:1}", new MultiFileSource(dataFile)); var pipe = new ImageLoaderEstimator(env, imageFolder, ("ImagePath", "ImageReal")) .Append(new ImageResizerEstimator(env, "ImageReal", "ImageReal", 100, 100)) .Append(new ImagePixelExtractorEstimator(env, "ImageReal", "ImagePixels")) .Append(new ImageGrayscaleEstimator(env, ("ImageReal", "ImageGray"))); pipe.GetOutputSchema(Core.Data.SchemaShape.Create(data.Schema)); var model = pipe.Fit(data); using (var file = env.CreateTempFile()) { using (var fs = file.CreateWriteStream()) model.SaveTo(env, fs); var model2 = TransformerChain.LoadFrom(env, file.OpenReadStream()); var newCols = ((ImageLoaderTransform)model2.First()).Columns; var oldCols = ((ImageLoaderTransform)model.First()).Columns; Assert.True(newCols .Zip(oldCols, (x, y) => x == y) .All(x => x)); } } Done(); }
private static async Task PerformStep5(TransformerChain <RegressionPredictionTransformer <LinearRegressionPredictor> > trainedModel, IDataView testDataView, MLContext mlContext) { var predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, "Label", "Score"); await PrintRegressionMetrics("", metrics); }
public void Evaluate() { ITransformer model; using (var file = File.OpenRead(modelLocation)) { model = TransformerChain .LoadFrom(env, file); } var reader = new TextLoader(env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(0, 31) }), new TextLoader.Column("LastName", DataKind.Text, 32) }, HasHeader = true, Separator = "," }); ConsoleWriteHeader("Read model"); Console.WriteLine($"Model location: {modelLocation}"); var data = reader.Read(new MultiFileSource(pivotDataLocation)); var predictions = model.Transform(data) .AsEnumerable <ClusteringPrediction>(env, false) .ToArray(); SaveCustomerSegmentationPlot(predictions, plotLocation); OpenChartInDefaultWindow(plotLocation); }
/// <summary> /// Trains and returns a <see cref="ITransformer"/>. /// </summary> public ITransformer Fit(IDataView input) { var h = _host; h.CheckValue(input, nameof(input)); var tparams = new TransformApplierParams(this); string[] textCols = _inputColumns; string[] wordTokCols = null; string[] charTokCols = null; string wordFeatureCol = null; string charFeatureCol = null; List <string> tempCols = new List <string>(); IDataView view = input; TransformerChain <ITransformer> chain = new TransformerChain <ITransformer>(); if (tparams.NeedInitialSourceColumnConcatTransform && textCols.Length > 1) { var srcCols = textCols; textCols = new[] { GenerateColumnName(input.Schema, OutputColumn, "InitialConcat") }; tempCols.Add(textCols[0]); chain = AddToChainAndTransform(chain, new ColumnConcatenatingTransformer(h, textCols[0], srcCols), ref view); } if (tparams.NeedsNormalizeTransform) { var xfCols = new (string outputColumnName, string inputColumnName)[textCols.Length];
public async Task <ModelMetrics> GenerateModel(BaseDAL storage, string modelFileName) { if (storage == null) { Log.Error("Trainer::GenerateModel - BaseDAL is null"); throw new ArgumentNullException(nameof(storage)); } if (string.IsNullOrEmpty(modelFileName)) { Log.Error("Trainer::GenerateModel - modelFileName is null"); throw new ArgumentNullException(nameof(modelFileName)); } if (!File.Exists(modelFileName)) { Log.Error($"Trainer::GenerateModel - {modelFileName} does not exist"); throw new FileNotFoundException(modelFileName); } var startTime = DateTime.Now; var options = new RandomizedPcaTrainer.Options { FeatureColumnName = FEATURES, ExampleWeightColumnName = null, Rank = 4, Oversampling = 20, EnsureZeroMean = true, Seed = Constants.ML_SEED }; var(data, cleanRowCount, maliciousRowCount) = GetDataView(await storage.QueryPacketsAsync(a => a.IsClean), await storage.QueryPacketsAsync(a => !a.IsClean)); IEstimator <ITransformer> dataProcessPipeline = _mlContext.Transforms.Concatenate( FEATURES, typeof(PayloadItem).ToPropertyList <PayloadItem>(nameof(PayloadItem.Label))); IEstimator <ITransformer> trainer = _mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options); EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer); TransformerChain <ITransformer> trainedModel = trainingPipeline.Fit(data.TrainSet); _mlContext.Model.Save(trainedModel, data.TrainSet.Schema, modelFileName); var testSetTransform = trainedModel.Transform(data.TestSet); return(new ModelMetrics { Metrics = _mlContext.AnomalyDetection.Evaluate(testSetTransform), NumCleanRows = cleanRowCount, NumMaliciousRows = maliciousRowCount, Duration = DateTime.Now.Subtract(startTime) }); }
public void SaveMLNetModel() { EstimatorChain <OnnxTransformer> pipeline = CreatePipeline(); IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable); TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData); mlContext.Model.Save(transformer, null, _mlModelDestn); }
public PredictionEngineExample(string modelName) { _env = EnvHelper.NewTestEnvironment(); var transformer = TransformerChain.LoadFromLegacy(_env, File.OpenRead(modelName)); var model = new ModelOperationsCatalog(_env); _predictor = model.CreatePredictionEngine <FloatVectorInput, FloatOutput>(transformer); }
public PredictionEngine <CenterFaceImageInput, CenterFaceImageOutput> GetMlNetPredictionEngine() { EstimatorChain <OnnxTransformer> pipeline = CreatePipeline(); IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable); TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData); return(mlContext.Model.CreatePredictionEngine <CenterFaceImageInput, CenterFaceImageOutput>(transformer)); }
public OutputWriter( TransformerChain transformerChain, TextWriter outputStream ) { this.transformerChain = transformerChain; this.outputStream = outputStream; }
internal static ITransformer Create(IHostEnvironment env, Options options, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(LoaderSignature); h.CheckValue(options, nameof(options)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns), "Columns must be specified"); var chain = new TransformerChain <ITransformer>(); // To each input column to the NgramHashExtractorArguments, a HashTransform using 31 // bits (to minimize collisions) is applied first, followed by an NgramHashTransform. var hashColumns = new List <HashingEstimator.ColumnOptions>(); var ngramHashColumns = new NgramHashingEstimator.ColumnOptions[options.Columns.Length]; var colCount = options.Columns.Length; // The NGramHashExtractor has a ManyToOne column type. To avoid stepping over the source // column name when a 'name' destination column name was specified, we use temporary column names. string[][] tmpColNames = new string[colCount][]; for (int iinfo = 0; iinfo < colCount; iinfo++) { var column = options.Columns[iinfo]; h.CheckUserArg(!string.IsNullOrWhiteSpace(column.Name), nameof(column.Name)); h.CheckUserArg(Utils.Size(column.Source) > 0 && column.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(column.Source)); int srcCount = column.Source.Length; tmpColNames[iinfo] = new string[srcCount]; for (int isrc = 0; isrc < srcCount; isrc++) { var tmpName = input.Schema.GetTempColumnName(column.Source[isrc]); tmpColNames[iinfo][isrc] = tmpName; hashColumns.Add(new HashingEstimator.ColumnOptions(tmpName, column.Source[isrc], 30, column.Seed ?? options.Seed, false, column.MaximumNumberOfInverts ?? options.MaximumNumberOfInverts)); } ngramHashColumns[iinfo] = new NgramHashingEstimator.ColumnOptions(column.Name, tmpColNames[iinfo], column.NgramLength ?? options.NgramLength, column.SkipLength ?? options.SkipLength, column.UseAllLengths ?? options.UseAllLengths, column.NumberOfBits ?? options.NumberOfBits, column.Seed ?? options.Seed, column.Ordered ?? options.Ordered, column.MaximumNumberOfInverts ?? options.MaximumNumberOfInverts); ngramHashColumns[iinfo].FriendlyNames = column.FriendlyNames; } var hashing = new HashingEstimator(h, hashColumns.ToArray()).Fit(input); return(chain.Append(hashing) .Append(new NgramHashingEstimator(h, ngramHashColumns).Fit(hashing.Transform(input))) .Append(new ColumnSelectingTransformer(h, null, tmpColNames.SelectMany(cols => cols).ToArray()))); }
public NBARegressor(TransformerChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > transformer, DataViewSchema schema) : base( new MLNet.DemoApp.Path(), id: "id", mLContext: MLContext2.Context, transformer: transformer, schema: schema) { }
static void Main(string[] args) { if (false == File.Exists(Program.TrainDataPath)) { using (var client = new WebClient()) { client.DownloadFile(@"https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip", "spam.zip"); } ZipFile.ExtractToDirectory("spam.zip", Program.DataDirectoryPath); } var context = new MLContext(); var reader = new TextLoader(context, new TextLoader.Arguments() { Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column("Label", DataKind.Text, 0), new TextLoader.Column("Message", DataKind.Text, 1) } }); var data = reader.Read(new MultiFileSource(Program.TrainDataPath)); var estimator = context.Transforms.CustomMapping <MyInput, MyOutput>(MyLambda.MyAction, "MyLambda") .Append(context.Transforms.Text.FeaturizeText("Message", "Features")) .Append(context.BinaryClassification.Trainers.StochasticDualCoordinateAscent()); var cvResult = context.BinaryClassification.CrossValidate(data, estimator, numFolds: 5); var aucs = cvResult.Select(r => r.metrics.Auc); Console.WriteLine($"The AUC is {aucs.Average()}"); var model = estimator.Fit(data); var inPipe = new TransformerChain <ITransformer>(model.Take(model.Count() - 1).ToArray()); var lastTransformer = new BinaryPredictionTransformer <IPredictorProducing <float> >( context, model.LastTransformer.Model, inPipe.GetOutputSchema(data.Schema), model.LastTransformer.FeatureColumn, threshold: 0.15f, thresholdColumn: DefaultColumnNames.Probability); var parts = model.ToArray(); parts[parts.Length - 1] = lastTransformer; var newModel = new TransformerChain <ITransformer>(parts); var predictor = newModel.MakePredictionFunction <Input, Prediction>(context); Program.ClassifyMessage(predictor, "That's a great idea. It should work."); Program.ClassifyMessage(predictor, "Free medicine winner! Congratulations"); Program.ClassifyMessage(predictor, "Yes we should meet over the weekend"); Program.ClassifyMessage(predictor, "You win pills and free entry vouchers"); }
public ITransformer LoadModelFromZipFile(string modelPath) { using (var stream = new FileStream(modelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) { _trainedModel = TransformerChain.LoadFrom(_mlContext, stream); } return(_trainedModel); }
public static ITransformer ReadModel(this LocalEnvironment env, string modelLocation) { ITransformer model; using (var file = File.OpenRead(@modelLocation)) { model = TransformerChain.LoadFrom(env, file); } return(model); }
internal OneHotEncodingTransformer(ValueToKeyMappingEstimator term, IEstimator <ITransformer> toVector, IDataView input) { if (toVector != null) { _transformer = term.Append(toVector).Fit(input); } else { _transformer = new TransformerChain <ITransformer>(term.Fit(input)); } }