public ReturnResult <Model> StartTraining([FromBody] TrainInput input) { var results = Model.TrainModel( Db, input, Int32.Parse(User.FindFirstValue(ClaimTypes.NameIdentifier)), Configuration["ModelBuilderUrl"]); return(results); }
public static ReturnResult <Model> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)] HttpRequest req, ILogger log) { var dataFilePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); try { db.BeginTransaction(); MLContext context = new MLContext(); TrainInput input = null; using (StreamReader reader = new StreamReader(req.Body)) { input = JsonConvert.DeserializeObject <TrainInput>(reader.ReadToEnd()); } File.WriteAllText(dataFilePath, input.Data); IDataView LoadedData = null; var columnData = new List <TextLoader.Column>(); foreach (var c in input.Columns) { //data type 1 is for ignore if (c.Type != 1) { var newColData = new TextLoader.Column() { DataKind = (DataKind)c.Type, Name = c.ColumnName, Source = new TextLoader.Range[] { new TextLoader.Range(c.ColumnIndex) } }; columnData.Add(newColData); } } LoadedData = context.Data.LoadFromTextFile( dataFilePath, columnData.ToArray(), separatorChar: input.Separator, hasHeader: input.HasHeaders, allowQuoting: true ); LoadedData = context.Data.ShuffleRows(LoadedData); /* * Multiclass will be used in the case of binary experiments and multiclass experiments. * This is because multiclass can accept all types as an output column. This will * allow less interaction with the user and a better user experience. */ double bestRunMetric = 0; ITransformer bestModel = null; if (input.ModelType == TrainInput.ModelTypes.Multiclass) { ExperimentResult <MulticlassClassificationMetrics> Results = null; var settings = new MulticlassExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateMulticlassClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.MacroAccuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Binary) { ExperimentResult <BinaryClassificationMetrics> Results = null; var settings = new BinaryExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateBinaryClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.Accuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Regression) { ExperimentResult <RegressionMetrics> Results = null; var settings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateRegressionExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.RSquared; bestModel = Results.BestRun.Model; if (bestRunMetric < 0) { bestRunMetric = 0; } } else { throw new Exception("Invalid model type"); } var modelFileId = 0; using (MemoryStream ms = new MemoryStream()) { context.Model.Save(bestModel, LoadedData.Schema, ms); //Save model to the database FileStore modelSave = new FileStore() { Data = ms.ToArray() }; modelFileId = FileStore.InsertUpdate(db, modelSave).Item.FileStoreId; } var resultModel = new Model() { FileStoreId = modelFileId, Accuracy = bestRunMetric, Rows = input.Data.Trim().Split('\n').Length }; db.CompleteTransaction(); return(new ReturnResult <Model>() { Success = true, Item = resultModel }); } catch (Exception e) { db.AbortTransaction(); log.LogError(e.Message); return(new ReturnResult <Model>() { Success = false, Exception = e }); } }
public void RunTestBad() { Mock <ILogger> Logger = new Mock <ILogger>(); Mock <HttpRequest> request = new Mock <HttpRequest>(); Mock <IDatabase> Db = new Mock <IDatabase>(); TrainInput trainingInput = new TrainInput() { Columns = new List <ColumnInformation>() { new ColumnInformation() { ColumnIndex = 0, ColumnName = "Id", Type = 1 //ignore }, new ColumnInformation() { ColumnIndex = 1, ColumnName = "SepalLengthCm", Type = (int)DataKind.Single }, new ColumnInformation() { ColumnIndex = 2, ColumnName = "SepalWidthCm", Type = (int)DataKind.Single }, new ColumnInformation() { ColumnIndex = 3, ColumnName = "PetalLengthCm", Type = (int)DataKind.Single }, new ColumnInformation() { ColumnIndex = 4, ColumnName = "PetalWidthCm", Type = (int)DataKind.Single }, new ColumnInformation() { ColumnIndex = 5, ColumnName = "Species", Type = (int)DataKind.Single // Wrong data type } }, Data = File.ReadAllText("Iris.csv"), HasHeaders = true, Description = "test", LabelColumn = "Species", ModelType = TrainInput.ModelTypes.Multiclass, Separator = ',', Title = "Iris Test" }; var dataString = JsonConvert.SerializeObject(trainingInput); MemoryStream dataStream = new MemoryStream(Encoding.ASCII.GetBytes(dataString)); request.Setup(x => x.Body).Returns(dataStream); Train.db = Db.Object; var results = Train.Run(request.Object, Logger.Object); Assert.IsFalse(results.Success); }