コード例 #1
0
        public static void DetectAnomalies(MLContext mlContext, IDataView dataView)
        {
            // Train
            const int PValueSize         = 30;
            const int SeasonalitySize    = 30;
            const int TrainingSize       = 90;
            const int ConfidenceInterval = 98;

            string outputColumnName = nameof(SpikePrediction.Prediction);
            string inputColumnName  = signalName;

            var trainigPipeLine = mlContext.Transforms.DetectSpikeBySsa(
                outputColumnName,
                inputColumnName,
                confidence: ConfidenceInterval,
                pvalueHistoryLength: PValueSize,
                trainingWindowSize: TrainingSize,
                seasonalityWindowSize: SeasonalitySize);

            ITransformer trainedModel = trainigPipeLine.Fit(dataView);

            // Predict
            var transformedData = trainedModel.Transform(dataView);

            IEnumerable <SpikePrediction> predictions =
                mlContext.Data.CreateEnumerable <SpikePrediction>(transformedData, false);

            var colCDN  = dataView.GetColumn <float>(signalName).ToArray();
            var colTime = dataView.GetColumn <DateTime>("Timestamp").ToArray();

            Display(predictions, colCDN, colTime);
        }
コード例 #2
0
        public static void DetectAnomalies(MLContext mlContext, IDataView dataView)
        {
            ITransformer trainedModel = mlContext.Model.Load(ModelPath, out var modelInputSchema);

            var transformedData = trainedModel.Transform(dataView);

            // Getting the data of the newly created column as an IEnumerable
            IEnumerable <SpikePrediction> predictions =
                mlContext.Data.CreateEnumerable <SpikePrediction>(transformedData, false);

            var colCDN  = dataView.GetColumn <float>("ConsumptionDiffNormalized").ToArray();
            var colTime = dataView.GetColumn <DateTime>("time").ToArray();

            // Output the input data and predictions
            Console.WriteLine("======Displaying anomalies in the Power meter data=========");
            Console.WriteLine("Date              \tReadingDiff\tAlert\tScore\tP-Value");

            int i = 0;

            foreach (var p in predictions)
            {
                if (p.Prediction[0] == 1)
                {
                    Console.BackgroundColor = ConsoleColor.DarkYellow;
                    Console.ForegroundColor = ConsoleColor.Black;
                }
                Console.WriteLine("{0}\t{1:0.0000}\t{2:0.00}\t{3:0.00}\t{4:0.00}",
                                  colTime[i], colCDN[i],
                                  p.Prediction[0], p.Prediction[1], p.Prediction[2]);
                Console.ResetColor();
                i++;
            }
        }
コード例 #3
0
        private (int, string[], bool[]) PullDataFromDataView(IDataView testData, Guid modelId, MLContext mlContext, string featuresColumn, string labelsColumn)
        {
            var sizeTestSet     = testData.GetColumn <string>(mlContext, featuresColumn).Count();
            var testSetFeatures = testData.GetColumn <string>(mlContext, featuresColumn).Take(sizeTestSet).ToArray();
            // The features are raw meaning the are still strings and not array's of number
            var testSetLabels = testData.GetColumn <bool>(mlContext, labelsColumn).Take(sizeTestSet).ToArray();

            // The labels are pulled parallel from the features
            this.loadedModel = mlContext.Model.Load(this.fileSystemRepository.GetModelFileStream(modelId));

            return(sizeTestSet, testSetFeatures, testSetLabels);
        }
コード例 #4
0
        public static void BuildTrainEvaluateModel(MLContext ml, IDataView dataView)
        {
            // Configure the Estimator
            const int PValueSize         = 30;
            const int SeasonalitySize    = 30;
            const int TrainingSize       = 90;
            const int ConfidenceInterval = 98;

            string outputColumnName = nameof(SpikePrediction.Prediction);
            string inputColumnName  = nameof(MeterData.ConsumptionDiffNormalized);

            var estimator = ml.Transforms.SsaSpikeEstimator(
                outputColumnName,
                inputColumnName,
                confidence: ConfidenceInterval,
                pvalueHistoryLength: PValueSize,
                trainingWindowSize: TrainingSize,
                seasonalityWindowSize: SeasonalitySize);

            var model = estimator.Fit(dataView);

            var transformedData = model.Transform(dataView);

            // Getting the data of the newly created column as an IEnumerable
            IEnumerable <SpikePrediction> predictionColumn =
                ml.Data.CreateEnumerable <SpikePrediction>(transformedData, false);

            var colCDN  = dataView.GetColumn <float>(ml, "ConsumptionDiffNormalized").ToArray();
            var colTime = dataView.GetColumn <DateTime>(ml, "time").ToArray();

            // Output the input data and predictions
            Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
            Console.WriteLine("Date              \tReadingDiff\tAlert\tScore\tP-Value");

            int i = 0;

            foreach (var p in predictionColumn)
            {
                if (p.Prediction[0] == 1)
                {
                    Console.BackgroundColor = ConsoleColor.DarkYellow;
                    Console.ForegroundColor = ConsoleColor.Black;
                }
                Console.WriteLine("{0}\t{1:0.0000}\t{2:0.00}\t{3:0.00}\t{4:0.00}",
                                  colTime[i], colCDN[i],
                                  p.Prediction[0], p.Prediction[1], p.Prediction[2]);
                Console.ResetColor();
                i++;
            }
        }
コード例 #5
0
        private static IEnumerable <DayInfo> DetectAnomalies(MLContext context, IDataView dataView)
        {
            ITransformer trainedModel = context.Model.Load(ModelPath, out _);

            var transformedData = trainedModel.Transform(dataView);

            // Getting the data of the newly created column as an IEnumerable
            IEnumerable <SpikePrediction> predictions =
                context.Data.CreateEnumerable <SpikePrediction>(transformedData, false);

            var columnCount = dataView.GetColumn <float>(nameof(DayInfo.Count)).ToArray();
            var columnDate  = dataView.GetColumn <DateTime>(nameof(DayInfo.Date)).ToArray();

            // Output the input data and predictions
            Console.WriteLine("======Displaying anomalies in the PressCenters.com data=========");
            Console.WriteLine("Date                          \tCount\tAlert\tScore\tP-Value");

            var anomalies = new List <DayInfo>();

            int i = 0;

            foreach (var p in predictions)
            {
                if (p.Prediction[0] > 0)
                {
                    anomalies.Add(new DayInfo {
                        Date = columnDate[i], Count = columnCount[i]
                    });
                    Console.BackgroundColor = ConsoleColor.DarkYellow;
                    Console.ForegroundColor = ConsoleColor.Black;
                }

                // if (p.Prediction[0] > 0)
                {
                    Console.WriteLine(
                        "{0}\t{1:0}\t{2:0.00}\t{3:0.00}\t{4:0.00}",
                        columnDate[i].ToLongDateString().PadRight(25),
                        columnCount[i],
                        p.Prediction[0],
                        p.Prediction[1],
                        p.Prediction[2]);
                    Console.ResetColor();
                }

                i++;
            }

            return(anomalies);
        }
コード例 #6
0
        private void Button_Click(object sender, RoutedEventArgs e)
        {
            try
            {
                string modelLocation = @"D:\model.onnx";

                /*MLContext mlContext = new MLContext();
                 *              string modelLocation = "/ADNMenuSample;Component/model.zip";
                 *
                 * Uri uri = new Uri(modelLocation, UriKind.Relative);
                 * System.Windows.Resources.StreamResourceInfo info = Application.GetResourceStream(uri);
                 * ITransformer trainedModel = mlContext.Model.Load(info.Stream, out DataViewSchema modelSchema);*/
                MLContext mlContext = new MLContext();
                var       data      = mlContext.Data.LoadFromEnumerable(new List <ImageData>());
                var       pipeline  = mlContext.Transforms.LoadImages(outputColumnName: "image", imageFolder: "", inputColumnName: nameof(ImageData.ImagePath))
                                      .Append(mlContext.Transforms.ResizeImages(outputColumnName: ModelSettings.ModelInput, imageWidth: ImageResNetSettings.imageWidth, imageHeight: ImageResNetSettings.imageHeight, inputColumnName: "image"))
                                      .Append(mlContext.Transforms.ExtractPixels(outputColumnName: ModelSettings.ModelInput))
                                      .Append(mlContext.Transforms.CustomMapping(new NormalizeMapping().GetMapping(), contractName: nameof(NormalizeMapping)))
                                      .Append(mlContext.Transforms.ApplyOnnxModel(modelFile: modelLocation, outputColumnNames: new[] { ModelSettings.ModelOutput }, inputColumnNames: new[] { ModelSettings.ModelInput }));
                var model = pipeline.Fit(data);
                mlContext.ComponentCatalog.RegisterAssembly(typeof(NormalizeMapping).Assembly);
                IEnumerable <ImageData> image = ImageData.ReadImageFromPaths(new string[] { @"E:\test.jpg" });
                IDataView      imageDataView  = mlContext.Data.LoadFromEnumerable(image);
                IDataView      scoredData     = model.Transform(imageDataView);
                List <float[]> probabilities  = scoredData.GetColumn <float[]>(ModelSettings.ModelOutput).ToList();
            }
            catch (Exception ex) { MessageBox.Show(ex.Message); }
        }
コード例 #7
0
        // Get Data
        // row# : recipeId, Ingredient (id, name), score (1=present, 0=NOT_present in recipe)
        public Data[] GetData(IDataView dataView, IDataView features)
        {
            int[]    ingredients = dataView.GetColumn <int>(dataView.Schema["ingrId"]).ToArray();
            int[]    recipes     = dataView.GetColumn <int>(dataView.Schema["recipeId"]).ToArray();
            float[]  scores      = dataView.GetColumn <float>(dataView.Schema["Label"]).ToArray();
            string[] ingrNames   = features.GetColumn <string>(features.Schema["ingrName"]).ToArray();

            Data[] data = new Data[recipes.Length];

            for (int row = 0; row < recipes.Length; row++)
            {
                data[row] = new Data(recipes[row], new Ingredient(ingredients[row], ingrNames[ingredients[row]]), (int)scores[row]);
            }

            return(data);
        }
コード例 #8
0
        // This method using 'DebuggerExtensions.Preview()' should only be used when debugging/developing, not for release/production trainings
        public static void PeekVectorColumnDataInConsole(MLContext mlContext, string columnName, IDataView dataView, IEstimator <ITransformer> pipeline, int numberOfRows = 4)
        {
            string msg = string.Format("Peek data in DataView: : Show {0} rows with just the '{1}' column", numberOfRows, columnName);

            ConsoleWriteHeader(msg);

            ITransformer transformer     = pipeline.Fit(dataView);
            IDataView    transformedData = transformer.Transform(dataView);

            // Extract the 'Features' column.
            List <float[]> someColumnData = transformedData.GetColumn <float[]>(columnName)
                                            .Take(numberOfRows).ToList();

            // print to console the peeked rows

            int currentRow = 0;

            someColumnData.ForEach(row =>
            {
                currentRow++;
                String concatColumn = String.Empty;
                foreach (float f in row)
                {
                    concatColumn += f.ToString();
                }

                Console.WriteLine();
                string rowMsg = string.Format("**** Row {0} with '{1}' field value ****", currentRow, columnName);
                Console.WriteLine(rowMsg);
                Console.WriteLine(concatColumn);
                Console.WriteLine();
            });
        }
コード例 #9
0
        public string Classify(string imageFilePath)
        {
            MLContext mlContext = new MLContext();

            var pipeline = mlContext.Transforms.LoadImages(outputColumnName: "image",
                                                           imageFolder: Path.GetDirectoryName(imageFilePath), inputColumnName: "ImagePath")
                           .Append(mlContext.Transforms.ResizeImages(outputColumnName: "image",
                                                                     imageWidth: ImageWidth, imageHeight: ImageHeight, inputColumnName: "image"))
                           .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "data",
                                                                      inputColumnName: "image", orderOfExtraction: ColorsOrder))
                           .Append(mlContext.Transforms.ApplyOnnxModel(modelFile: _modelFilePath.Value,
                                                                       outputColumnNames: new[] { ModelOutput },
                                                                       inputColumnNames: new[] { ModelInput }));

            IEnumerable <ImageData> images = new[] { new ImageData()
                                                     {
                                                         ImagePath = Path.GetFileName(imageFilePath)
                                                     } };
            IDataView imageData = mlContext.Data.LoadFromEnumerable(images);

            var model = pipeline.Fit(imageData);

            IDataView scoredData = model.Transform(imageData);

            var tags     = scoredData.GetColumn <string[]>("classLabel");
            var firstTag = tags.FirstOrDefault()?.FirstOrDefault();

            return(firstTag);
        }
コード例 #10
0
        // predict using the loaded model
        protected IEnumerable <float[]> PredictDataUsingModel(IDataView data, ITransformer model)
        {
            IDataView             scoredData    = model.Transform(data);
            IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(modelSettings.modelOutput);

            return(probabilities);
        }
コード例 #11
0
        /// <summary>
        /// Auto-detect purpose for the data view columns.
        /// </summary>
        public static PurposeInference.Column[] InferPurposes(MLContext context, IDataView data, string label,
                                                              PurposeInference.Column[] columnOverrides = null)
        {
            var labelColumn = data.GetColumn(label);

            // select columns to include in inferencing
            var columnIndices = CalcIncludedIndices(data.Schema.Count, labelColumn.Index, columnOverrides);

            // do purpose inferencing
            var intermediateCols = InferPurposes(context, data, columnIndices);

            // result to return to caller
            var result = new PurposeInference.Column[data.Schema.Count];

            // add label column to result
            result[labelColumn.Index] = (new IntermediateColumn(data, labelColumn.Index, ColumnPurpose.Label)).GetColumn();

            // add inferred columns to result
            foreach (var intermediateCol in intermediateCols)
            {
                result[intermediateCol.ColumnIndex] = intermediateCol.GetColumn();
            }

            // add overrides to result
            if (columnOverrides != null)
            {
                foreach (var columnOverride in columnOverrides)
                {
                    result[columnOverride.ColumnIndex] = columnOverride;
                }
            }

            return(result);
        }
コード例 #12
0
        private static void InitDataSet(MLContext mlContext)
        {
            IDataView data = mlContext.Data.LoadFromTextFile <Appointment>(TrainDataPath);
            var       cnt  = data.GetColumn <float>(nameof(Appointment.IceCream)).Count();

            Console.WriteLine(cnt);
        }
コード例 #13
0
        private static void PrintDataColumn(IDataView encodedData, string columnName)
        {
            var countSelectColumn = encodedData.GetColumn <float[]>(encodedData.Schema[columnName]);

            ConsoleHelper.ConsoleWriteHeader("==========One Hot Encoding Results=========");

            int j = 0;

            foreach (var row in countSelectColumn)
            {
                for (int i = 0; i < row.Length; i++)
                {
                    Console.Write($"{row[i]}\t");
                }

                Console.WriteLine();

                j++;

                if (j > 10)
                {
                    break;
                }
            }
        }
コード例 #14
0
ファイル: ML.cs プロジェクト: actopozipc/Telluz
        public async Task <List <YearWithValue> > PredictCo2OverYearsAsync(Model modelContainer, int futureYear, int coa_id, List <YearWithValue> emissions, CNTK cNTK)
        {
            //Get Population till future year
            List <YearWithValue> population = await dB.GetPopulationByCoaIdAsync(coa_id);       //get population that is known

            if (CompareBiggestValueToFutureYear(population, futureYear))                        //check if known population is enough to predict emission
            {
                population = await cNTK.PredictPopulationAsync(coa_id, futureYear, population); //get population to predict emission
            }
            EmissionModel[] populationData = new EmissionModel[population.Count];
            for (int i = 0; i < populationData.Count(); i++)
            {
                populationData[i] = new EmissionModel()
                {
                    Year = population[i].Year, Population = population[i].Value.value
                };
            }
            PredictionEngine <EmissionModel, EmissionPrediction> predictionEngine = modelContainer.mLContext.Model.CreatePredictionEngine <EmissionModel, EmissionPrediction>(modelContainer.trainedModel);
            IDataView inputData   = modelContainer.mLContext.Data.LoadFromEnumerable(populationData);
            IDataView predictions = modelContainer.trainedModel.Transform(inputData);

            float[] scoreColumn = predictions.GetColumn <float>("Score").ToArray();

            for (int i = emissions.Count; i < scoreColumn.Length; i++)
            {
                emissions.Add(new YearWithValue(population[i].Year, new Wert(scoreColumn[i], true)));
            }
            return(emissions);
        }
コード例 #15
0
        public float[] Evaluate(ITransformer transformer, DateTime dateTime)
        {
            if (_evaluation.ContainsKey(dateTime))
            {
                float[] score = new[] { _evaluation[dateTime] };
                return(score);
            }

            Dictionary <string, float>         feature  = new Dictionary <string, float>();
            List <Dictionary <string, float> > features = new List <Dictionary <string, float> >();

            var featuresDictionnary = GetFeatures(dateTime);

            if (featuresDictionnary == null)
            {
                return(null);
            }

            foreach (var item in featuresDictionnary)
            {
                feature.Add(item.Key, item.Value);
            }
            features.Add(feature);
            IDataView data = new FloatsDataView(features);

            MLContext mlContext = new MLContext();
            IDataView result    = transformer.Transform(data);

            float[] scoreColumn = result.GetColumn <float>("Score").ToArray();


            return(scoreColumn);
        }
コード例 #16
0
        private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model)
        {
            IDataView scoredData = model.Transform(testData);

            IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(TinyYoloModelSettings.ModelOutput);

            return(probabilities);
        }
コード例 #17
0
        // Get Features (ingredient names)
        public string[] GetFeatures()
        {
            MLContext ml            = new MLContext();
            IDataView features_view = GetDataView(ModelChoice.NB, ml, DataPurpose.FEATURES);

            string[] features = features_view.GetColumn <string>(features_view.Schema["ingrName"]).ToArray();
            return(features);
        }
コード例 #18
0
        private static ITransformer TrainModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            IDataView baseTrainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
            IDataView testDataView         = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

            //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data
            var       cnt = baseTrainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count();
            IDataView trainingDataView = mlContext.Data.FilterRowsByColumn(baseTrainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150);
            var       cnt2             = trainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count();

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(TaxiTrip.FareAmount))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "VendorIdEncoded", inputColumnName: nameof(TaxiTrip.VendorId)))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "RateCodeEncoded", inputColumnName: nameof(TaxiTrip.RateCode)))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "PaymentTypeEncoded", inputColumnName: nameof(TaxiTrip.PaymentType)))
                                      .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.PassengerCount)))
                                      .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripTime)))
                                      .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripDistance)))
                                      .Append(mlContext.Transforms.Concatenate("Features", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", nameof(TaxiTrip.PassengerCount)
                                                                               , nameof(TaxiTrip.TripTime), nameof(TaxiTrip.TripDistance)));

            // (OPTIONAL) Peek data (such as 5 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, trainingDataView, dataProcessPipeline, 5);
            ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 5);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm)
            var trainer          = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            // STEP 4: Train the model fitting to the DataSet
            //The pipeline is trained on the dataset that has been loaded and transformed.
            Console.WriteLine("=============== Training the model ===============");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            ConsoleHelper.ConsoleWriteHeader("=============== End of training process ===============");

            // Append feature contribution calculator in the pipeline. This will be used
            // at prediction time for explainability.
            var fccPipeline = trainedModel.Append(mlContext.Transforms
                                                  .CalculateFeatureContribution(trainedModel.LastTransformer)
                                                  .Fit(dataProcessPipeline.Fit(trainingDataView).Transform(trainingDataView)));

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");

            IDataView predictions = fccPipeline.Transform(testDataView);
            var       metrics     = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score");

            Common.ConsoleHelper.PrintRegressionMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(fccPipeline, trainingDataView.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(fccPipeline);
        }
        private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model)
        {
            //Console.WriteLine("=====Identify the objects in the images=====");
            //Console.WriteLine("");
            IDataView             scoredData    = model.Transform(testData);
            IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(modelSettings.modelOutput);

            return(probabilities);
        }
コード例 #20
0
        /// <summary>
        /// Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data.
        /// </summary>
        /// <param name="mlContext"></param>
        /// <param name="baseTrainingDataView"></param>
        /// <returns></returns>
        private static IDataView GetTrainingDataView(MLContext mlContext, IDataView baseTrainingDataView)
        {
            var       cnt = baseTrainingDataView.GetColumn <float>(nameof(AirTravel.AirFare)).Count();
            IDataView trainingDataView = mlContext.Data.FilterRowsByColumn(baseTrainingDataView,
                                                                           nameof(AirTravel.AirFare), lowerBound: 30, upperBound: 1400);
            var cnt2 = trainingDataView.GetColumn <float>(nameof(AirTravel.AirFare)).Count();

            return(trainingDataView);
        }
コード例 #21
0
        private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model)
        {
            Console.WriteLine($"Images location: {imagesFolder}");
            Console.WriteLine("");
            Console.WriteLine("=====Identify the objects in the images=====");
            Console.WriteLine("");
            IDataView             scoredData    = model.Transform(testData);
            IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(TinyYoloModelSettings.ModelOutput);

            return(probabilities);
        }
コード例 #22
0
        // Get recommendations for a recipe
        // the ingredient score is equivalent to the posterior
        public Recommendation[] RecipeRecommendations(double[][] model, int[] recipe, bool laplace, bool normalize, bool prior)
        {
            MLContext   ml = new MLContext();
            DataManager dm = new DataManager();

            // get features (unique ingredients)
            string[] ingr_names = dm.GetFeatures();

            // get number of training recipes
            IDataView allrecipes  = dm.GetDataView(ModelChoice.NB, ml, DataPurpose.TRAIN);
            int       num_recipes = allrecipes.GetColumn <int>(allrecipes.Schema["recipeId"]).ToArray().Length;

            Recommendation[] recommendations = new Recommendation[ingr_names.Length];

            // iterate through all features (unique ingredients)
            for (int f = 0; f < ingr_names.Length; f++)
            {
                double likelihood = 1.0;
                // iterate through all the ingredients in the recipe
                foreach (int i in recipe)
                {
                    // ignore matching ingredients
                    if (i != f)
                    {
                        // laplace smoothing
                        if (laplace == true)
                        {
                            likelihood *= (model[i][f] + 1.0) / (model[f][f] + ingr_names.Length);
                        }
                        else
                        {
                            likelihood *= model[i][f] / model[f][f];
                        }

                        // normalize
                        if (normalize == true)
                        {
                            likelihood /= model[i][i];
                        }
                    }
                }
                // prior
                if (prior == true)
                {
                    likelihood *= model[f][f] / num_recipes;
                }
                recommendations[f] = new Recommendation(new Ingredient(f, ingr_names[f]), likelihood);
            }
            // sort
            recommendations = recommendations.OrderByDescending(t => t.score).ToArray();

            return(recommendations);
        }
コード例 #23
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            IDataView baseTrainingDataView = mlContext.Data.ReadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
            IDataView testDataView         = mlContext.Data.ReadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

            //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data
            var       cnt = baseTrainingDataView.GetColumn <float>(mlContext, nameof(TaxiTrip.FareAmount)).Count();
            IDataView trainingDataView = mlContext.Data.FilterByColumn(baseTrainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150);
            var       cnt2             = trainingDataView.GetColumn <float>(mlContext, nameof(TaxiTrip.FareAmount)).Count();

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.CopyColumns(outputColumnName: DefaultColumnNames.Label, inputColumnName: nameof(TaxiTrip.FareAmount))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: VendorIdEncoded, inputColumnName: nameof(TaxiTrip.VendorId)))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: RateCodeEncoded, inputColumnName: nameof(TaxiTrip.RateCode)))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: PaymentTypeEncoded, inputColumnName: nameof(TaxiTrip.PaymentType)))
                                      .Append(mlContext.Transforms.Normalize(outputColumnName: nameof(TaxiTrip.PassengerCount), mode: NormalizerMode.MeanVariance))
                                      .Append(mlContext.Transforms.Normalize(outputColumnName: nameof(TaxiTrip.TripTime), mode: NormalizerMode.MeanVariance))
                                      .Append(mlContext.Transforms.Normalize(outputColumnName: nameof(TaxiTrip.TripDistance), mode: NormalizerMode.MeanVariance))
                                      .Append(mlContext.Transforms.Concatenate(DefaultColumnNames.Features, VendorIdEncoded, RateCodeEncoded, PaymentTypeEncoded, nameof(TaxiTrip.PassengerCount)
                                                                               , nameof(TaxiTrip.TripTime), nameof(TaxiTrip.TripDistance)));

            // (OPTIONAL) Peek data (such as 5 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, trainingDataView, dataProcessPipeline, 5);
            ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, DefaultColumnNames.Features, trainingDataView, dataProcessPipeline, 5);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm)
            var trainer          = mlContext.Regression.Trainers.StochasticDualCoordinateAscent(labelColumn: DefaultColumnNames.Label, featureColumn: DefaultColumnNames.Features);
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            // STEP 4: Train the model fitting to the DataSet
            //The pipeline is trained on the dataset that has been loaded and transformed.
            Console.WriteLine("=============== Training the model ===============");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");

            IDataView predictions = trainedModel.Transform(testDataView);
            var       metrics     = mlContext.Regression.Evaluate(predictions, label: DefaultColumnNames.Label, score: DefaultColumnNames.Score);

            Common.ConsoleHelper.PrintRegressionMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file

            using (var fs = File.Create(ModelPath))
                trainedModel.SaveTo(mlContext, fs);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(trainedModel);
        }
コード例 #24
0
        // Evaluate Non-negative Matrix Factorization
        public void EvaluateNMF(ITransformer model)
        {
            Console.WriteLine("\nEvaluating NMF...");
            MLContext mlContext = new MLContext();

            // get test data
            DataManager dm = new DataManager();
            // test data
            IDataView testData = dm.GetDataView(ModelChoice.NMF, mlContext, DataPurpose.TEST);

            Data[] test_data = dm.GetRecipes(ModelChoice.NMF, DataPurpose.TEST);
            // train data
            IDataView trainData = dm.GetDataView(ModelChoice.NMF, mlContext, DataPurpose.TRAIN);

            Data[] train_data = dm.GetRecipes(ModelChoice.NMF, DataPurpose.TRAIN);

            // features
            string[] features    = dm.GetFeatures();
            int[]    recipeArray = testData.GetColumn <int>(testData.Schema["recipeId"]).ToArray();

            Results     results     = new Results(0);
            Recommender recommender = new Recommender();

            // distinct test recipes
            int[] distinct_recipes = recipeArray.Distinct().ToArray();

            // for each test recipe
            foreach (int r in distinct_recipes)
            {
                Recommendation[] recommendations = new Recommendation[features.Length];
                Data[]           recipe          = test_data.Where(d => d.recipeId == r && d.score == 1).ToArray();
                Data[]           trecipe         = train_data.Where(d => d.recipeId == r && d.score == 1).ToArray();
                // get recipe r
                Data[] combined       = recipe.Concat(trecipe).ToArray();
                int[]  current_recipe = dm.GetRecipe(combined.ToArray());

                // iterate through all features
                for (int i = 0; i < dm.GetFeatures().Length; i++)
                {
                    // make prediction (get score)
                    double prediction = recommender.SinglePrediction(mlContext, model, i, r);
                    // save score of ingredient
                    recommendations[i] = new Recommendation(new Ingredient(i, features[i]), prediction);
                }
                // sort
                recommendations = recommendations.OrderByDescending(d => d.score).ToArray();
                results         = GetResults(results, recommendations, current_recipe);
            }
            // Display accuracy results
            results.ShowResults();
            Console.WriteLine();
        }
コード例 #25
0
        private static void PrintDataColumn(IDataView transformedData, string columnName)
        {
            var countSelectColumn = transformedData.GetColumn <float[]>(transformedData.Schema[columnName]);

            foreach (var row in countSelectColumn)
            {
                for (var i = 0; i < row.Length; i++)
                {
                    Console.Write($"{row[i]}\t");
                }
                Console.WriteLine();
            }
        }
コード例 #26
0
        private static ITransformer BuildTrainEvaluteAndSaveModel(MLContext mLContext)
        {
            TextLoader textLoader1 = mLContext.Data.CreateTextLoader(new[]
            {
                new TextLoader.Column("temperature", DataKind.Single, 0),
                new TextLoader.Column("floor1", DataKind.Single, 1),
                new TextLoader.Column("floor2", DataKind.Single, 2),
                new TextLoader.Column("floor3", DataKind.Single, 3),
                new TextLoader.Column("floor4", DataKind.Single, 4),
                new TextLoader.Column("areaEvaluated", DataKind.Single, 5)
            }
                                                                     , hasHeader: true,
                                                                     separatorChar: ','
                                                                     );
            IDataView baseTrainingDataView = textLoader1.Load(pathDataTrain);
            IDataView testDataView         = textLoader1.Load(pathDataTest);
            var       cnt = baseTrainingDataView.GetColumn <float>(mLContext, "areaEvaluated").Count();
            IDataView trainingDataView = mLContext.Data.FilterRowsByColumn(baseTrainingDataView, "areaEvaluated", lowerBound: 150, upperBound: 350);
            var       cnt2             = trainingDataView.GetColumn <float>(mLContext, "areaEvaluated").Count();

            var dataProcessPipeline = mLContext.Transforms.CopyColumns("Label", "areaEvaluated")
                                      .Append(mLContext.Transforms.Normalize(outputColumnName: "temperature", inputColumnName: "temperature", mode: NormalizerMode.MeanVariance))
                                      .Append(mLContext.Transforms.Normalize(outputColumnName: "floor1", inputColumnName: "floor1", mode: NormalizerMode.MeanVariance))
                                      .Append(mLContext.Transforms.Normalize(outputColumnName: "floor2", inputColumnName: "floor2", mode: NormalizerMode.MeanVariance))
                                      .Append(mLContext.Transforms.Normalize(outputColumnName: "floor3", inputColumnName: "floor3", mode: NormalizerMode.MeanVariance))
                                      .Append(mLContext.Transforms.Normalize(outputColumnName: "floor4", inputColumnName: "floor4", mode: NormalizerMode.MeanVariance))
                                      .Append(mLContext.Transforms.Concatenate("Features", "temperature", "floor1", "floor2", "floor3", "floor4")
                                              );

            var trainer          = mLContext.Regression.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Label", featureColumnName: "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);


            MessageBox.Show("Well phase1");
            //System.Console.WriteLine("training the model");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            //System.Console.WriteLine("Testing model");
            MessageBox.Show("Well phase2");
            IDataView predictions = trainedModel.Transform(testDataView);
            var       metrics     = mLContext.Regression.Evaluate(predictions, label: "Label", score: "Score");

            using (var fs = File.Create(ModelPath))
                trainedModel.SaveTo(mLContext, fs);

            //System.Console.WriteLine("the model is saved to {0}", ModelPath);
            MessageBox.Show("fin saved");

            return(trainedModel);
        }
コード例 #27
0
        private void VerifyDataFrameColumnAndDataViewColumnValues <T>(string columnName, IDataView data, DataFrame df, int maxRows = -1)
        {
            int cc = 0;
            var nameDataViewColumn = data.GetColumn <T>(columnName);

            foreach (var value in nameDataViewColumn)
            {
                if (maxRows != -1 && cc >= maxRows)
                {
                    return;
                }
                Assert.Equal(value, df.Columns[columnName][cc++]);
            }
        }
コード例 #28
0
        /// <summary>
        /// Создание прогнозов
        /// </summary>
        /// <param name="testData"></param>
        /// <param name="model"></param>
        /// <returns></returns>
        private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model)
        {
            Console.WriteLine($"Images location: {ImagesFolder}");
            Console.WriteLine("");
            Console.WriteLine("=====Identify the objects in the images=====");
            Console.WriteLine("");

            // оценка данных
            IDataView scoredData = model.Transform(testData);

            // Извлекаем прогнозируемые вероятности и возвращаем их для дополнительной обработки
            IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(TinyYoloModelSettings.ModelOutput);

            return(probabilities);
        }
コード例 #29
0
        protected override void Train(string[] args)
        {
            var modelObject = Activator.CreateInstance <EmploymentHistory>();

            IDataView baseTrainingDataView = MlContext.Data.LoadFromTextFile <EmploymentHistory>(args[(int)CommandLineArguments.INPUT_FILE], hasHeader: true, separatorChar: ',');
            var       testDataView         = MlContext.Data.LoadFromTextFile <EmploymentHistory>(args[(int)CommandLineArguments.INPUT_FILE], hasHeader: true, separatorChar: ',');
            var       cnt = baseTrainingDataView.GetColumn <float>(nameof(EmploymentHistory.DurationInMonths)).Count();
            IDataView trainingDataView = MlContext.Data.FilterRowsByColumn(baseTrainingDataView, nameof(EmploymentHistory.DurationInMonths), lowerBound: 1, upperBound: 150);
            var       cnt2             = trainingDataView.GetColumn <float>(nameof(EmploymentHistory.DurationInMonths)).Count();

            var dataProcessPipeline = MlContext.Transforms.CopyColumns("Label", nameof(EmploymentHistory.DurationInMonths))
                                      .Append(MlContext.Transforms.Categorical.OneHotEncoding("PositionNameEncoded", "PositionName"))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance("IsMarried"))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance("BSDegree"))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance("MSDegree"))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance("YearsExperience")
                                              .Append(MlContext.Transforms.NormalizeMeanVariance("AgeAtHire"))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance("HasKids"))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance("WithinMonthOfVesting"))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance("DeskDecorations"))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance("LongCommute"))
                                              .Append(MlContext.Transforms.Concatenate("Features", "PositionNameEncoded", "IsMarried", "BSDegree", "MSDegree", "YearsExperience", "AgeAtHire", "HasKids", "WithinMonthOfVesting", "DeskDecorations", "LongCommute")));

            var trainer          = MlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            var trainedModel = trainingPipeline.Fit(trainingDataView);

            var dataWithPredictions = trainedModel.Transform(testDataView);

            var metrics = MlContext.BinaryClassification.Evaluate(dataWithPredictions, predictedLabelColumnName: nameof(EmploymentHistoryPrediction.DurationInMonths));

            Console.WriteLine($"Accuracy: {metrics.Accuracy}");
            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve}");
            Console.WriteLine($"F1 Score: {metrics.F1Score}");

            Console.WriteLine($"Negative Precision: {metrics.NegativePrecision}");
            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall}");
            Console.WriteLine($"Positive Precision: {metrics.PositivePrecision}");
            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall}");

            using (var fs = File.Create(args[(int)CommandLineArguments.OUTPUT_FILE]))
            {
                MlContext.Model.Save(trainedModel, trainingDataView.Schema, fs);
            }

            Console.WriteLine($"Saved model to {args[(int)CommandLineArguments.OUTPUT_FILE]}");
        }
コード例 #30
0
ファイル: Trainer.cs プロジェクト: jcapellman/ml-net-examples
        public void Train(string trainingFileName)
        {
            if (!File.Exists(trainingFileName))
            {
                Console.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }

            var trainingDataView = MlContext.Data.LoadFromTextFile <EmploymentHistory>(trainingFileName, ',');

            IDataView trainingDataView = MlContext.Data.FilterRowsByColumn(trainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150);
            var       cnt2             = trainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count();


            var dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.4);

            var dataProcessPipeline = MlContext.Transforms.CopyColumns("Label", nameof(EmploymentHistory.DurationInMonths))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.IsMarried)))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.BSDegree)))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.MSDegree)))
                                      .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.YearsExperience))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.AgeAtHire)))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.HasKids)))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.WithinMonthOfVesting)))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.DeskDecorations)))
                                              .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.LongCommute)))
                                              .Append(MlContext.Transforms.Concatenate("Features",
                                                                                       typeof(EmploymentHistory).ToPropertyList <EmploymentHistory>(nameof(EmploymentHistory.DurationInMonths)))));

            var trainer = MlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");

            var trainingPipeline = dataProcessPipeline.Append(trainer);

            ITransformer trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);

            MlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, ModelPath);

            var testSetTransform = trainedModel.Transform(dataSplit.TestSet);

            var modelMetrics = MlContext.Regression.Evaluate(testSetTransform);

            Console.WriteLine($"Loss Function: {modelMetrics.LossFunction:0.##}{Environment.NewLine}" +
                              $"Mean Absolute Error: {modelMetrics.MeanAbsoluteError:#.##}{Environment.NewLine}" +
                              $"Mean Squared Error: {modelMetrics.MeanSquaredError:#.##}{Environment.NewLine}" +
                              $"RSquared: {modelMetrics.RSquared:0.##}{Environment.NewLine}" +
                              $"Root Mean Squared Error: {modelMetrics.RootMeanSquaredError:#.##}");
        }