Example #1
0
        public async Task OnPostAsync()
        {
            Message = "LOADING...";


            string assetsRelativePath = @"../../../assets";
            string assetsPath         = GetAbsolutePath(assetsRelativePath);


            var imagesFolder       = Path.Combine(assetsPath, "inputs", "images-for-predictions", "tmp-web-image");
            var imageClassifierZip = Path.Combine(assetsPath, "inputs", "MLNETModel", "imageClassifier.zip");

            imagesFolder += @"/" + ImageFile.FileName;

            try
            {
                using (var stream = System.IO.File.Create(imagesFolder))
                {
                    await ImageFile.CopyToAsync(stream);
                }


                var modelScorer = new ModelScorer(imagesFolder, imageClassifierZip);
                Predict = modelScorer.ClassifySingleImage();
            }
            catch (Exception ex)
            {
                Message = ex.Message;
            }

            Message = "...";
            //return RedirectToPage("./Index");
        }
Example #2
0
        private static void Main(string[] args)
        {
            //Create the MLContext to share across components for deterministic results
            MLContext mlContext = new MLContext(seed: 1);  //Seed set to any number so you have a deterministic environment

            //STEP 1: Common data loading
            DataLoader dataLoader = new DataLoader(mlContext);
            var        fullData   = dataLoader.GetDataView(DataPath);

            (IDataView trainingDataView, IDataView testingDataView) = mlContext.Clustering.TrainTestSplit(fullData, testFraction: 0.2);

            //STEP 2: Process data transformations in pipeline
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations
            Common.ConsoleHelper.PeekDataViewInConsole <IrisData>(mlContext, trainingDataView, dataProcessPipeline, 10);
            Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 10);

            // STEP 3: Create and train the model
            var modelBuilder = new ModelBuilder <IrisData, IrisPrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.Clustering.Trainers.KMeans(features: "Features", clustersCount: 3);

            modelBuilder.AddTrainer(trainer);
            var trainedModel = modelBuilder.Train(trainingDataView);

            // STEP4: Evaluate accuracy of the model
            var metrics = modelBuilder.EvaluateClusteringModel(testingDataView);

            Common.ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics);

            // STEP5: Save/persist the model as a .ZIP file
            modelBuilder.SaveModelAsFile(ModelPath);

            Console.WriteLine("=============== End of training process ===============");

            Console.WriteLine("=============== Predict a cluster for a single case (Single Iris data sample) ===============");

            // Test with one sample text
            var sampleIrisData = new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            };

            //Create the clusters: Create data files and plot a chart
            var modelScorer = new ModelScorer <IrisData, IrisPrediction>(mlContext);

            modelScorer.LoadModelFromZipFile(ModelPath);

            var prediction = modelScorer.PredictSingle(sampleIrisData);

            Console.WriteLine($"Cluster assigned for setosa flowers:" + prediction.SelectedClusterId);

            Console.WriteLine("=============== End of process, hit any key to finish ===============");
            Console.ReadKey();
        }
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            DataLoader dataLoader       = new DataLoader(mlContext);
            var        trainingDataView = dataLoader.GetDataView(DataSetLocation);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Set the selected training algorithm into the modelBuilder
            var modelBuilder = new Common.ModelBuilder <GitHubIssue, GitHubIssuePrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features");

            modelBuilder.AddTrainer(trainer);
            modelBuilder.AddEstimator(new KeyToValueEstimator(mlContext, "PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValResults = modelBuilder.CrossValidateAndEvaluateMulticlassClassificationModel(trainingDataView, 6, "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics("SdcaMultiClassTrainer", crossValResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            // (OPTIONAL) Try/test a single prediction by loding the model from the file, first.
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "Entity Framework crashes", Description = "When connecting to the database, EF is crashing"
            };
            var modelScorer = new ModelScorer <GitHubIssue, GitHubIssuePrediction>(mlContext);

            modelScorer.LoadModelFromZipFile(ModelPath);
            var prediction = modelScorer.PredictSingle(issue);

            Console.WriteLine($"=============== Single Prediction - Result: {prediction.Area} ===============");
            //

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
        public static void VisualizeSomePredictions(MLContext mlContext,
                                                    string modelName,
                                                    string testDataLocation,
                                                    ModelScorer <DemandObservation, DemandPrediction> modelScorer,
                                                    int numberOfPredictions)
        {
            //Make a few prediction tests
            // Make the provided number of predictions and compare with observed data from the test dataset
            var testData = ReadSampleDataFromCsvFile(testDataLocation, numberOfPredictions);

            for (int i = 0; i < numberOfPredictions; i++)
            {
                var prediction = modelScorer.PredictSingle(testData[i]);

                Common.ConsoleHelper.PrintRegressionPredictionVersusObserved(prediction.PredictedCount.ToString(),
                                                                             testData[i].Count.ToString());
            }
        }
        static async Task Main(string[] args)
        {
            var assetsPath = ModelHelpers.GetAssetsPath(@"..\..\..\assets");

            var tagsTsv            = Path.Combine(assetsPath, "inputs", "data", "tags.tsv");
            var imagesFolder       = Path.Combine(assetsPath, "inputs", "data");
            var imageClassifierZip = Path.Combine(assetsPath, "inputs", "imageClassifier.zip");

            try
            {
                var modelScorer = new ModelScorer(tagsTsv, imagesFolder, imageClassifierZip);
                modelScorer.ClassifyImages();
            }
            catch (Exception ex)
            {
                ConsoleWriteException(ex.Message);
            }

            ConsolePressAnyKey();
        }
Example #6
0
        static void Main(string[] args)
        {
            string assetsRelativePath = @"..\..\..\assets";
            string assetsPath         = GetDataSetAbsolutePath(assetsRelativePath);

            var tagsTsv            = Path.Combine(assetsPath, "inputs", "data", "tags.tsv");
            var imagesFolder       = Path.Combine(assetsPath, "inputs", "data");
            var imageClassifierZip = Path.Combine(assetsPath, "inputs", "imageClassifier.zip");

            try
            {
                var modelScorer = new ModelScorer(tagsTsv, imagesFolder, imageClassifierZip);
                modelScorer.ClassifyImages();
            }
            catch (Exception ex)
            {
                ConsoleWriteException(ex.Message);
            }

            ConsolePressAnyKey();
        }
Example #7
0
        public static void Run()
        {
            string assetsRelativePath = @"../../../assets";
            string assetsPath         = GetAbsolutePath(assetsRelativePath);

            var tagsTsv            = Path.Combine(assetsPath, "inputs", "data", "images_list.tsv");
            var imagesFolder       = Path.Combine(assetsPath, "inputs", "data");
            var imageClassifierZip = Path.Combine(assetsPath, "inputs", "imageClassifier.zip");

            try
            {
                var modelScorer = new ModelScorer(tagsTsv, imagesFolder, imageClassifierZip);
                modelScorer.ClassifyImages();
            }
            catch (Exception ex)
            {
                ConsoleHelpers.ConsoleWriteException(ex.Message);
            }

            ConsoleHelpers.ConsolePressAnyKey();
        }
Example #8
0
        static void Main(string[] args)
        {
            string assetsRelativePath = @"../../../assets";
            string assetsPath         = GetAbsolutePath(assetsRelativePath);


            var imagesFolder       = Path.Combine(assetsPath, "inputs", "images-for-predictions");
            var imageClassifierZip = Path.Combine(assetsPath, "inputs", "MLNETModel", "imageClassifier.zip");

            try
            {
                var modelScorer = new ModelScorer(imagesFolder, imageClassifierZip);
                modelScorer.ClassifyImages();
            }
            catch (Exception ex)
            {
                ConsoleWriteException(ex.ToString());
            }

            ConsolePressAnyKey();
        }
        public Labeler(string modelPath, string repoOwner = "", string repoName = "", string accessToken = "")
        {
            _modelPath = modelPath;
            _repoOwner = repoOwner;
            _repoName  = repoName;

            _mlContext = new MLContext(seed: 1);

            //Load file model into ModelScorer
            _modelScorer = new ModelScorer <GitHubIssue, GitHubIssuePrediction>(_mlContext);
            _modelScorer.LoadModelFromZipFile(_modelPath);

            //Configure Client to access a GitHub repo
            if (accessToken != string.Empty)
            {
                var productInformation = new ProductHeaderValue("MLGitHubLabeler");
                _client = new GitHubClient(productInformation)
                {
                    Credentials = new Credentials(accessToken)
                };
            }
        }
        static async Task Main(string[] args)
        {
            // Running inside Visual Studio, $SolutionDir/assets is automatically passed as argument
            // If you execute from the console, pass as argument the location of the assets folder
            // Otherwise, it will search for assets in the executable's folder
            var assetsPath = args.Length > 0 ? args[0] : ModelHelpers.GetAssetsPath();

            var tagsTsv      = Path.Combine(assetsPath, "inputs", "data", "tags.tsv");
            var imagesFolder = Path.Combine(assetsPath, "inputs", "data");
            var inceptionPb  = Path.Combine(assetsPath, "inputs", "inception", "tensorflow_inception_graph.pb");
            var labelsTxt    = Path.Combine(assetsPath, "inputs", "inception", "imagenet_comp_graph_label_strings.txt");

            try
            {
                var modelEvaluator = new ModelScorer(tagsTsv, imagesFolder, inceptionPb, labelsTxt);
                modelEvaluator.Score();
            }
            catch (Exception ex)
            {
                Console.WriteLine($"Exception: {ex.Message}");
            }
            Console.ReadKey();
        }
 public PersonController(PersonDbContext context)
 {
     _context = context;
     PresentsHelper.ExtractAllPresents(_context.Persons);
     _modelScorer = new ModelScorer("model.zip");
 }
Example #12
0
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath, MyTrainerStrategy selectedStrategy)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            var textLoader       = GitHubLabelerTextLoaderFactory.CreateTextLoader(mlContext);
            var trainingDataView = textLoader.Read(DataSetLocation);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = GitHubLabelerDataProcessPipelineFactory.CreateDataProcessPipeline(mlContext);

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Create the selected training algorithm/trainer
            IEstimator <ITransformer> trainer = null;

            switch (selectedStrategy)
            {
            case MyTrainerStrategy.SdcaMultiClassTrainer:
                trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label,
                                                                                                     DefaultColumnNames.Features);
                break;

            case MyTrainerStrategy.OVAAveragedPerceptronTrainer:
            {
                // Create a binary classification trainer.
                var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(DefaultColumnNames.Label,
                                                                                                                 DefaultColumnNames.Features,
                                                                                                                 numIterations: 10);
                // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer.
                // In this strategy, a binary classification algorithm is used to train one classifier for each class, "
                // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, "
                // and choosing the prediction with the highest confidence score.
                trainer = new Ova(mlContext, averagedPerceptronBinaryTrainer);
                break;
            }

            default:
                break;
            }

            //Set the trainer/algorithm
            var modelBuilder = new Common.ModelBuilder <GitHubIssue, GitHubIssuePrediction>(mlContext, dataProcessPipeline);

            modelBuilder.AddTrainer(trainer);
            modelBuilder.AddEstimator(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValResults = modelBuilder.CrossValidateAndEvaluateMulticlassClassificationModel(trainingDataView, 6, "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "WebSockets communication is slow in my machine", Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
            };
            var modelScorer = new ModelScorer <GitHubIssue, GitHubIssuePrediction>(mlContext, modelBuilder.TrainedModel);
            var prediction  = modelScorer.PredictSingle(issue);

            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            //

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
Example #13
0
        static void Main(string[] args)
        {
            try
            {
                ArgCollection argCollection = ArgCollection.GetInstance(args);

                if (argCollection.ExtractOptionalFlag("help"))
                {
                    Console.WriteLine("");
                    Console.WriteLine(UsageMessage);
                    Console.WriteLine(HelpMessage);
                    return;
                }

                string optimizerName = argCollection.ExtractOptional <string>("optimizer", "BrentThenGrid");
                string keepTestName  = argCollection.ExtractOptional <string>("keepTest", "AlwaysKeep");
                string skipRowIndexFileNameOrNull = argCollection.ExtractOptional <string>("skipRowIndexFile", null);

                argCollection.CheckNoMoreOptions();

                string          treeFileName          = argCollection.ExtractNext <string>("treeFile");
                string          predictorFileName     = argCollection.ExtractNext <string>("predictorFile");
                string          targetFileName        = argCollection.ExtractNext <string>("targetFile");
                string          leafDistributionName  = argCollection.ExtractNext <string>("leafDistribution");
                string          nullDataGeneratorName = argCollection.ExtractNext <string>("nullDataGenerator");
                string          niceName                  = argCollection.ExtractNext <string>("niceName");
                string          outputDirectory           = argCollection.ExtractNext <string>("outputDirectory");
                RangeCollection pieceIndexRangeCollection = argCollection.ExtractNext <RangeCollection>("pieceIndexRange");
                int             pieceCount                = argCollection.ExtractNext <int>("pieceCount");
                RangeCollection nullIndexRangeCollection  = argCollection.ExtractNext <RangeCollection>("nullIndexRange");

                argCollection.CheckThatEmpty();

                if (!PhyloDDriver.ValidateDistribution(leafDistributionName))
                {
                    Console.WriteLine("{0} is not a recognized distribution name. Please choose a name from the following list:", leafDistributionName);
                    foreach (string name in PhyloDDriver.GetDistributionNames())
                    {
                        Console.WriteLine("\t{0}", name);
                    }
                    throw new ArgumentException("Invalid distribution name.");
                }
                RangeCollection skipRowIndexRangeCollectionOrNull = (null == skipRowIndexFileNameOrNull) || skipRowIndexFileNameOrNull == "null" ? null : RangeCollection.Parse(File.ReadAllText(skipRowIndexFileNameOrNull));
                KeepTest <Dictionary <string, string> > keepTest  = KeepTest <Dictionary <string, string> > .GetInstance(null, keepTestName);

                SpecialFunctions.CheckCondition(pieceIndexRangeCollection.IsBetween(0, pieceCount - 1), "pieceIndex must be at least 0 and less than pieceCount");
                SpecialFunctions.CheckCondition(nullIndexRangeCollection.IsBetween(-1, int.MaxValue), "nullIndex must be at least -1");

                PhyloTree aPhyloTree = PhyloTree.GetInstance(treeFileName, null);

                ModelScorer    modelScorer    = ModelScorer.GetInstance(aPhyloTree, leafDistributionName, optimizerName);
                ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(leafDistributionName, modelScorer);
                PhyloDDriver   driver         = PhyloDDriver.GetInstance();

                driver.Run(
                    modelEvaluator,
                    predictorFileName, targetFileName,
                    leafDistributionName, nullDataGeneratorName,
                    keepTest, skipRowIndexRangeCollectionOrNull,
                    niceName,
                    outputDirectory,
                    pieceIndexRangeCollection, pieceCount,
                    nullIndexRangeCollection,
                    optimizerName);

                //Console.Write("Press enter to exist.");
                //Console.Read();
            }
            catch (Exception exception)
            {
                Console.WriteLine("");
                Console.WriteLine(exception.Message);
                if (exception.InnerException != null)
                {
                    Console.WriteLine(exception.InnerException.Message);
                }

                Console.WriteLine("");
                Console.WriteLine(UsageMessage);
                throw;
            }
        }
Example #14
0
        /// <summary>
        /// Does the work.
        /// </summary>
        public override void DoWork()
        {
            // get our input data and null the field to make sure we don't serialize it back
            InputData inputData = mInputData;

            mInputData = null;

            // get the job-specific names of input files
            FileDefCollection fileDefs             = Job.FileDefs;
            string            treeFileName         = Utility.GetNamedFileDef(fileDefs, Constants.TreeFileDefName).LocalName;
            string            predictorFileName    = Utility.GetNamedFileDef(fileDefs, Constants.PredictorFileDefName).LocalName;
            string            targetFileName       = Utility.GetNamedFileDef(fileDefs, Constants.TargetFileDefName).LocalName;
            string            skipRowIndexFileName = Utility.GetNamedFileDef(fileDefs, Constants.SkipRowIndexFileDefName).LocalName;

            // construct RangeCollections
            RangeCollection pieceIndexRangeCollection = RangeCollection.Parse(inputData.PieceIndexRange);
            RangeCollection nullIndexRangeCollection  = RangeCollection.Parse(inputData.NullIndexRange);
            RangeCollection skipRowIndexRangeCollection;
            FileInfo        fileInfo = new FileInfo(skipRowIndexFileName);

            if (fileInfo.Length > 0)
            {
                skipRowIndexRangeCollection = RangeCollection.Parse(File.ReadAllText(skipRowIndexFileName));
            }
            else
            {
                skipRowIndexRangeCollection = null;
            }

            // do the rest
            PhyloTree   aPhyloTree  = PhyloTree.GetInstance(treeFileName, null);
            ModelScorer modelScorer =
                ModelScorer.GetInstance(aPhyloTree, inputData.LeafDistributionName, inputData.OptimizerName);
            ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(inputData.LeafDistributionName, modelScorer);
            KeepTest <Dictionary <string, string> > keepTest =
                KeepTest <Dictionary <string, string> > .GetInstance(null, inputData.KeepTestName);

            PhyloDDriver driver = PhyloDDriver.GetInstance();

            // create a name for the temporary job sandbox.  This directory gets created by driver.Run(...)
            string agentOutputDirectoryName =
                Path.Combine(Environment.CurrentDirectory, String.Format(CultureInfo.InvariantCulture, "{0}.{1}", Job.JobId, Task.TaskId));

            // save the standard out and standard error in memory streams
            using (MemoryStream streamOut = new MemoryStream(), streamError = new MemoryStream())
            {
                try
                {
                    // redirect the outputs
                    using (
                        StreamWriter writerOut = new StreamWriter(streamOut),
                        writerError = new StreamWriter(streamError))
                    {
                        Console.SetOut(writerOut);
                        Console.SetError(writerError);

                        try
                        {
                            // run the model
                            string outputFileName = driver.Run(
                                modelEvaluator,
                                predictorFileName, targetFileName,
                                inputData.LeafDistributionName, inputData.NullDataGeneratorName,
                                keepTest, skipRowIndexRangeCollection,
                                inputData.NiceName,
                                agentOutputDirectoryName,
                                pieceIndexRangeCollection, inputData.PieceCount,
                                nullIndexRangeCollection,
                                inputData.OptimizerName);

                            // this is the expected output file name -- save this so it can be written on the master side with the same name.
                            mOutputFileName = Path.GetFileName(outputFileName);


                            mLocalOutputFileName = Path.Combine(inputData.LocalOutputDirectoryName, mOutputFileName);

                            // get the output data
                            string fullOutputPath = Path.Combine(agentOutputDirectoryName, mOutputFileName);
                            if (!File.Exists(fullOutputPath))
                            {
                                TaskResult.FailureReason  = TaskFailureReason.MissingOutput;
                                TaskResult.FailureMessage = String.Format(CultureInfo.CurrentCulture, "Cannot find output file '{0}'", targetFileName);
                                TaskResult.Status         = TaskAssignmentStatus.Failed;
                            }
                            using (StreamReader outputData = new StreamReader(fullOutputPath))
                            {
                                mOutputData = outputData.ReadToEnd();
                            }
                        }
                        finally
                        {
                            // this finally is to make sure we delete the folder
                            // get rid of the sandbox
                            Directory.Delete(agentOutputDirectoryName, true);
                        }
                    }
                }
                finally
                {
                    // this finally is to make sure we get console output
                    Encoding encoding = Encoding.Default;
                    TaskResult.StandardOutput = encoding.GetString(streamOut.GetBuffer());
                    TaskResult.StandardError  = encoding.GetString(streamError.GetBuffer());
                }
            }
        }