예제 #1
0
        public ReturnResult <Model> StartTraining([FromBody] TrainInput input)
        {
            var results = Model.TrainModel(
                Db,
                input,
                Int32.Parse(User.FindFirstValue(ClaimTypes.NameIdentifier)),
                Configuration["ModelBuilderUrl"]);

            return(results);
        }
예제 #2
0
        public static ReturnResult <Model> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)] HttpRequest req, ILogger log)
        {
            var dataFilePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());

            try
            {
                db.BeginTransaction();

                MLContext context = new MLContext();

                TrainInput input = null;

                using (StreamReader reader = new StreamReader(req.Body))
                {
                    input = JsonConvert.DeserializeObject <TrainInput>(reader.ReadToEnd());
                }

                File.WriteAllText(dataFilePath, input.Data);

                IDataView LoadedData = null;

                var columnData = new List <TextLoader.Column>();
                foreach (var c in input.Columns)
                {
                    //data type 1 is for ignore
                    if (c.Type != 1)
                    {
                        var newColData = new TextLoader.Column()
                        {
                            DataKind = (DataKind)c.Type,
                            Name     = c.ColumnName,
                            Source   = new TextLoader.Range[] { new TextLoader.Range(c.ColumnIndex) }
                        };

                        columnData.Add(newColData);
                    }
                }

                LoadedData = context.Data.LoadFromTextFile(
                    dataFilePath,
                    columnData.ToArray(),
                    separatorChar: input.Separator,
                    hasHeader: input.HasHeaders,
                    allowQuoting: true
                    );

                LoadedData = context.Data.ShuffleRows(LoadedData);

                /*
                 * Multiclass will be used in the case of binary experiments and multiclass experiments.
                 * This is because multiclass can accept all types as an output column. This will
                 * allow less interaction with the user and a better user experience.
                 */

                double       bestRunMetric = 0;
                ITransformer bestModel     = null;

                if (input.ModelType == TrainInput.ModelTypes.Multiclass)
                {
                    ExperimentResult <MulticlassClassificationMetrics> Results = null;
                    var settings = new MulticlassExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateMulticlassClassificationExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.MacroAccuracy;
                    bestModel     = Results.BestRun.Model;
                }
                else if (input.ModelType == TrainInput.ModelTypes.Binary)
                {
                    ExperimentResult <BinaryClassificationMetrics> Results = null;
                    var settings = new BinaryExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateBinaryClassificationExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.Accuracy;
                    bestModel     = Results.BestRun.Model;
                }
                else if (input.ModelType == TrainInput.ModelTypes.Regression)
                {
                    ExperimentResult <RegressionMetrics> Results = null;
                    var settings = new RegressionExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateRegressionExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.RSquared;
                    bestModel     = Results.BestRun.Model;
                    if (bestRunMetric < 0)
                    {
                        bestRunMetric = 0;
                    }
                }
                else
                {
                    throw new Exception("Invalid model type");
                }


                var modelFileId = 0;

                using (MemoryStream ms = new MemoryStream())
                {
                    context.Model.Save(bestModel, LoadedData.Schema, ms);
                    //Save model to the database
                    FileStore modelSave = new FileStore()
                    {
                        Data = ms.ToArray()
                    };

                    modelFileId = FileStore.InsertUpdate(db, modelSave).Item.FileStoreId;
                }

                var resultModel = new Model()
                {
                    FileStoreId = modelFileId,
                    Accuracy    = bestRunMetric,
                    Rows        = input.Data.Trim().Split('\n').Length
                };

                db.CompleteTransaction();

                return(new ReturnResult <Model>()
                {
                    Success = true,
                    Item = resultModel
                });
            }
            catch (Exception e)
            {
                db.AbortTransaction();
                log.LogError(e.Message);
                return(new ReturnResult <Model>()
                {
                    Success = false,
                    Exception = e
                });
            }
        }
예제 #3
0
        public void RunTestBad()
        {
            Mock <ILogger>     Logger  = new Mock <ILogger>();
            Mock <HttpRequest> request = new Mock <HttpRequest>();
            Mock <IDatabase>   Db      = new Mock <IDatabase>();

            TrainInput trainingInput = new TrainInput()
            {
                Columns = new List <ColumnInformation>()
                {
                    new ColumnInformation()
                    {
                        ColumnIndex = 0,
                        ColumnName  = "Id",
                        Type        = 1 //ignore
                    },
                    new ColumnInformation()
                    {
                        ColumnIndex = 1,
                        ColumnName  = "SepalLengthCm",
                        Type        = (int)DataKind.Single
                    },
                    new ColumnInformation()
                    {
                        ColumnIndex = 2,
                        ColumnName  = "SepalWidthCm",
                        Type        = (int)DataKind.Single
                    },
                    new ColumnInformation()
                    {
                        ColumnIndex = 3,
                        ColumnName  = "PetalLengthCm",
                        Type        = (int)DataKind.Single
                    },
                    new ColumnInformation()
                    {
                        ColumnIndex = 4,
                        ColumnName  = "PetalWidthCm",
                        Type        = (int)DataKind.Single
                    },
                    new ColumnInformation()
                    {
                        ColumnIndex = 5,
                        ColumnName  = "Species",
                        Type        = (int)DataKind.Single // Wrong data type
                    }
                },
                Data        = File.ReadAllText("Iris.csv"),
                HasHeaders  = true,
                Description = "test",
                LabelColumn = "Species",
                ModelType   = TrainInput.ModelTypes.Multiclass,
                Separator   = ',',
                Title       = "Iris Test"
            };

            var          dataString = JsonConvert.SerializeObject(trainingInput);
            MemoryStream dataStream = new MemoryStream(Encoding.ASCII.GetBytes(dataString));

            request.Setup(x => x.Body).Returns(dataStream);

            Train.db = Db.Object;

            var results = Train.Run(request.Object, Logger.Object);

            Assert.IsFalse(results.Success);
        }