Esempio n. 1
0
        [ConditionalFact(typeof(BaseTestBaseline), nameof(BaseTestBaseline.LessThanNetCore30OrNotNetCore))] // netcore3.0 output differs from Baseline
        public void ChangePointDetectionWithSeasonality()
        {
            var       env = new MLContext(conc: 1);
            const int ChangeHistorySize         = 10;
            const int SeasonalitySize           = 10;
            const int NumberOfSeasonsInTraining = 5;
            const int MaxTrainingSize           = NumberOfSeasonsInTraining * SeasonalitySize;

            List <Data> data     = new List <Data>();
            var         dataView = env.Data.ReadFromEnumerable(data);

            var args = new SsaChangePointDetector.Options()
            {
                Confidence          = 95,
                Source              = "Value",
                Name                = "Change",
                ChangeHistoryLength = ChangeHistorySize,
                TrainingWindowSize  = MaxTrainingSize,
                SeasonalWindowSize  = SeasonalitySize
            };

            for (int j = 0; j < NumberOfSeasonsInTraining; j++)
            {
                for (int i = 0; i < SeasonalitySize; i++)
                {
                    data.Add(new Data(i));
                }
            }

            for (int i = 0; i < ChangeHistorySize; i++)
            {
                data.Add(new Data(i * 100));
            }

            // Train
            var detector = new SsaChangePointEstimator(env, args).Fit(dataView);
            // Transform
            var output = detector.Transform(dataView);
            // Get predictions
            var           enumerator     = env.CreateEnumerable <Prediction>(output, true).GetEnumerator();
            Prediction    row            = null;
            List <double> expectedValues = new List <double>()
            {
                0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07,
                0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483
            };

            int index = 0;

            while (enumerator.MoveNext() && index < expectedValues.Count)
            {
                row = enumerator.Current;
                Assert.Equal(expectedValues[index++], row.Change[0], precision: 7);  // Alert
                Assert.Equal(expectedValues[index++], row.Change[1], precision: 7);  // Raw score
                Assert.Equal(expectedValues[index++], row.Change[2], precision: 7);  // P-Value score
                Assert.Equal(expectedValues[index++], row.Change[3], precision: 7);  // Martingale score
            }
        }
Esempio n. 2
0
        public void SpikeDetection()
        {
            var       env  = new MLContext(conc: 1);
            const int Size = 10;
            const int PvalHistoryLength = Size / 4;

            // Generate sample series data with a spike
            List <Data> data     = new List <Data>(Size);
            var         dataView = env.CreateStreamingDataView(data);

            for (int i = 0; i < Size / 2; i++)
            {
                data.Add(new Data(5));
            }
            data.Add(new Data(10)); // This is the spike
            for (int i = 0; i < Size / 2 - 1; i++)
            {
                data.Add(new Data(5));
            }

            // Convert to statically-typed data view.
            var staticData = dataView.AssertStatic(env, c => new { Value = c.R4.Scalar });
            // Build the pipeline
            var staticLearningPipeline = staticData.MakeNewEstimator()
                                         .Append(r => r.Value.IidSpikeDetect(80, PvalHistoryLength));
            // Train
            var detector = staticLearningPipeline.Fit(staticData);
            // Transform
            var output = detector.Transform(staticData);

            // Get predictions
            var enumerator     = env.CreateEnumerable <SpikePrediction>(output.AsDynamic, true).GetEnumerator();
            var expectedValues = new List <double[]>()
            {
                //            Alert   Score   P-Value
                new double[] { 0, 5, 0.5 },
                new double[] { 0, 5, 0.5 },
                new double[] { 0, 5, 0.5 },
                new double[] { 0, 5, 0.5 },
                new double[] { 0, 5, 0.5 },
                new double[] { 1, 10, 0.0 },            // alert is on, predicted spike
                new double[] { 0, 5, 0.261375 },
                new double[] { 0, 5, 0.261375 },
                new double[] { 0, 5, 0.50 },
                new double[] { 0, 5, 0.50 }
            };

            SpikePrediction row = null;

            for (var i = 0; enumerator.MoveNext() && i < expectedValues.Count; i++)
            {
                row = enumerator.Current;

                CompareNumbersWithTolerance(expectedValues[i][0], row.Data[0], digitsOfPrecision: 7);
                CompareNumbersWithTolerance(expectedValues[i][1], row.Data[1], digitsOfPrecision: 7);
                CompareNumbersWithTolerance(expectedValues[i][2], row.Data[2], digitsOfPrecision: 7);
            }
        }
Esempio n. 3
0
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
            IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
            var trainData = mlContext.Data.ReadFromEnumerable(data);

            // Preview of the data.
            //
            // Age    Case  Education  induced     parity  pooled.stratum  row_num  ...
            // 26.0   1.0   0-5yrs      1.0         6.0       3.0      1.0  ...
            // 42.0   1.0   0-5yrs      1.0         1.0       1.0      2.0  ...
            // 39.0   1.0   0-5yrs      2.0         6.0       4.0      3.0  ...
            // 34.0   1.0   0-5yrs      2.0         4.0       2.0      4.0  ...
            // 35.0   1.0   6-11yrs     1.0         3.0       32.0     5.0  ...

            // CopyColumns is commonly used to rename columns.
            // For example, if you want to train towards Age, and your learner expects a "Label" column, you can
            // use CopyColumns to rename Age to Label. Technically, the Age columns still exists, but it won't be
            // materialized unless you actually need it somewhere (e.g. if you were to save the transformed data
            // without explicitly dropping the column). This is a general property of IDataView's lazy evaluation.
            string labelColumnName = "Label";
            var    pipeline        = mlContext.Transforms.CopyColumns(labelColumnName, "Age") as IEstimator <ITransformer>;

            // You also may want to copy a column to perform some hand-featurization using built-in transforms or
            // a CustomMapping transform. For example, we could make an indicator variable if a feature, such as Parity
            // goes above some threshold. We simply copy the Parity column to a new column, then pass it through a custom function.
            Action <InputRow, OutputRow> mapping = (input, output) => output.CustomValue = input.CustomValue > 4 ? 1 : 0;

            pipeline = pipeline.Append(mlContext.Transforms.CopyColumns("CustomValue", "Parity"))
                       .Append(mlContext.Transforms.CustomMapping(mapping, null));

            // Now we can transform the data and look at the output to confirm the behavior of CopyColumns.
            // Don't forget that this operation doesn't actually evaluate data until we read the data below.
            var transformedData = pipeline.Fit(trainData).Transform(trainData);

            // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below.
            var rowEnumerable = mlContext.CreateEnumerable <SampleInfertDataTransformed>(transformedData, reuseRowObject: false);

            // And finally, we can write out the rows of the dataset, looking at the columns of interest.
            Console.WriteLine($"Label, Parity, and CustomValue columns obtained post-transformation.");
            foreach (var row in rowEnumerable)
            {
                Console.WriteLine($"Label: {row.Label} Parity: {row.Parity} CustomValue: {row.CustomValue}");
            }

            // Expected output:
            //  Label, Parity, and CustomValue columns obtained post-transformation.
            //  Label: 26 Parity: 6 CustomValue: 1
            //  Label: 42 Parity: 1 CustomValue: 0
            //  Label: 39 Parity: 6 CustomValue: 1
            //  Label: 34 Parity: 4 CustomValue: 0
            //  Label: 35 Parity: 3 CustomValue: 0
        }
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
            var enumerableData = SamplesUtils.DatasetUtils.GetInfertData();
            var data           = mlContext.Data.ReadFromEnumerable(enumerableData);

            // Before transformation, take a look at the dataset
            Console.WriteLine($"Age\tCase\tEducation\tInduced\tParity\tPooledStratum");
            foreach (var row in enumerableData)
            {
                Console.WriteLine($"{row.Age}\t{row.Case}\t{row.Education}\t{row.Induced}\t{row.Parity}\t{row.PooledStratum}");
            }
            Console.WriteLine();
            // Expected output:
            //  Age     Case    Education       Induced Parity  PooledStratum
            //  26      1       0 - 5yrs        1       6       3
            //  42      1       0 - 5yrs        1       1       1
            //  39      1       12 + yrs        2       6       4
            //  34      1       0 - 5yrs        2       4       2
            //  35      1       6 - 11yrs       1       3       32

            // Select a subset of columns to keep.
            var pipeline = mlContext.Transforms.SelectColumns("Age", "Education");

            // Now we can transform the data and look at the output to confirm the behavior of CopyColumns.
            // Don't forget that this operation doesn't actually evaluate data until we read the data below,
            // as transformations are lazy in ML.NET.
            var transformedData = pipeline.Fit(data).Transform(data);

            // Print the number of columns in the schema
            Console.WriteLine($"There are {transformedData.Schema.Count} columns in the dataset.");

            // Expected output:
            //  There are 2 columns in the dataset.

            // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below.
            var rowEnumerable = mlContext.CreateEnumerable <SampleInfertDataTransformed>(transformedData, reuseRowObject: false);

            // And finally, we can write out the rows of the dataset, looking at the columns of interest.
            Console.WriteLine($"Age and Educations columns obtained post-transformation.");
            foreach (var row in rowEnumerable)
            {
                Console.WriteLine($"Age: {row.Age} Education: {row.Education}");
            }

            // Expected output:
            //  Age and Education columns obtained post-transformation.
            //  Age: 26 Education: 0-5yrs
            //  Age: 42 Education: 0-5yrs
            //  Age: 39 Education: 12+yrs
            //  Age: 34 Education: 0-5yrs
            //  Age: 35 Education: 6-11yrs
        }
Esempio n. 5
0
        // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot).
        // IidSpikeDetector is applied then to identify spiking points in the series.
        public static void IidSpikeDetectorTransform()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var ml = new MLContext();

            // Generate sample series data with a spike
            const int Size = 10;
            var       data = new List <IidSpikeData>(Size);

            for (int i = 0; i < Size / 2; i++)
            {
                data.Add(new IidSpikeData(5));
            }
            // This is a spike
            data.Add(new IidSpikeData(10));
            for (int i = 0; i < Size / 2; i++)
            {
                data.Add(new IidSpikeData(5));
            }

            // Convert data to IDataView.
            var dataView = ml.Data.ReadFromEnumerable(data);

            // Setup IidSpikeDetector arguments
            string outputColumnName = nameof(IidSpikePrediction.Prediction);
            string inputColumnName  = nameof(IidSpikeData.Value);

            // The transformed data.
            var transformedData = ml.Transforms.IidSpikeEstimator(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView);

            // Getting the data of the newly created column as an IEnumerable of IidSpikePrediction.
            var predictionColumn = ml.CreateEnumerable <IidSpikePrediction>(transformedData, reuseRowObject: false);

            Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
            Console.WriteLine("Alert\tScore\tP-Value");
            foreach (var prediction in predictionColumn)
            {
                Console.WriteLine("{0}\t{1:0.00}\t{2:0.00}", prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]);
            }
            Console.WriteLine("");

            // Prediction column obtained post-transformation.
            // Alert   Score   P-Value
            // 0       5.00    0.50
            // 0       5.00    0.50
            // 0       5.00    0.50
            // 0       5.00    0.50
            // 0       5.00    0.50
            // 1       10.00   0.00   <-- alert is on, predicted spike
            // 0       5.00    0.26
            // 0       5.00    0.26
            // 0       5.00    0.50
            // 0       5.00    0.50
            // 0       5.00    0.50
        }
Esempio n. 6
0
        /// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType
        /// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true,
        /// the ValueMappingEstimator will generate a KeyType for each unique value.
        ///
        /// In this example, the education data is mapped to a grouping of 'Undergraduate' and 'Postgraduate'. Because KeyTypes are used, the
        /// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'.
        ///
        /// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example
        /// results in the string value of 'Undergraduate' and 'Postgraduate'.
        public static void Run()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
            IDataView trainData = mlContext.Data.ReadFromEnumerable(data);

            // Creating a list of keys based on the Education values from the dataset
            // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable.
            var educationKeys = new List <string>()
            {
                "0-5yrs",
                "6-11yrs",
                "12+yrs"
            };

            // Creating a list of values that are sample strings. These will be converted to KeyTypes
            var educationValues = new List <string>()
            {
                "Undergraduate",
                "Postgraduate",
                "Postgraduate"
            };

            // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings.
            // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back
            // to the original value.
            var pipeline = new ValueMappingEstimator <string, string>(mlContext, educationKeys, educationValues, true, ("EducationKeyType", "Education"))
                           .Append(mlContext.Transforms.Conversion.MapKeyToValue(("EducationCategory", "EducationKeyType")));

            // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column.
            IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);

            // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures.
            IEnumerable <SampleInfertDataWithFeatures> featureRows = mlContext.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);

            Console.WriteLine($"Example of mapping string->keytype");
            Console.WriteLine($"Age\tEducation\tEducationCategory");
            foreach (var featureRow in featureRows)
            {
                Console.WriteLine($"{featureRow.Age}\t{featureRow.Education}  \t{featureRow.EducationCategory}");
            }

            // Features column obtained post-transformation.
            //
            // Age Education    EducationCategory
            // 26  0-5yrs       Undergraduate
            // 42  0-5yrs       Undergraduate
            // 39  12+yrs       Postgraduate
            // 34  0-5yrs       Undergraduate
            // 35  6-11yrs      Postgraduate
        }
Esempio n. 7
0
        public void ChangePointDetectionWithSeasonality()
        {
            var       env = new MLContext(conc: 1);
            const int ChangeHistorySize         = 10;
            const int SeasonalitySize           = 10;
            const int NumberOfSeasonsInTraining = 5;
            const int MaxTrainingSize           = NumberOfSeasonsInTraining * SeasonalitySize;

            var data     = new List <Data>();
            var dataView = env.Data.ReadFromEnumerable(data);

            for (int j = 0; j < NumberOfSeasonsInTraining; j++)
            {
                for (int i = 0; i < SeasonalitySize; i++)
                {
                    data.Add(new Data(i));
                }
            }

            for (int i = 0; i < ChangeHistorySize; i++)
            {
                data.Add(new Data(i * 100));
            }

            // Convert to statically-typed data view.
            var staticData = dataView.AssertStatic(env, c => new { Value = c.R4.Scalar });
            // Build the pipeline
            var staticLearningPipeline = staticData.MakeNewEstimator()
                                         .Append(r => r.Value.SsaChangePointDetect(95, ChangeHistorySize, MaxTrainingSize, SeasonalitySize));
            // Train
            var detector = staticLearningPipeline.Fit(staticData);
            // Transform
            var output = detector.Transform(staticData);

            // Get predictions
            var enumerator                       = env.CreateEnumerable <ChangePointPrediction>(output.AsDynamic, true).GetEnumerator();
            ChangePointPrediction row            = null;
            List <double>         expectedValues = new List <double>()
            {
                0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07,
                0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483
            };

            int index = 0;

            while (enumerator.MoveNext() && index < expectedValues.Count)
            {
                row = enumerator.Current;

                CompareNumbersWithTolerance(expectedValues[index++], row.Data[0], digitsOfPrecision: 5);  // Alert
                CompareNumbersWithTolerance(expectedValues[index++], row.Data[1], digitsOfPrecision: 5);  // Raw score
                CompareNumbersWithTolerance(expectedValues[index++], row.Data[2], digitsOfPrecision: 5);  // P-Value score
                CompareNumbersWithTolerance(expectedValues[index++], row.Data[3], digitsOfPrecision: 5);  // Martingale score
            }
        }
        /// This example demonstrates the use of ValueMappingEstimator by mapping float-to-string values. This is useful if the key
        /// data are floating point and need to be grouped into string values. In this example, the Induction value is mapped to
        /// "T1", "T2", "T3", and "T4" groups.
        public static void Run()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            IEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData> data = SamplesUtils.DatasetUtils.GetSampleTemperatureData();
            IDataView trainData = mlContext.Data.ReadFromEnumerable(data);

            // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
            // Creating a list of keys based on the induced value from the dataset
            var temperatureKeys = new List <float>()
            {
                39.0F,
                67.0F,
                75.0F,
                82.0F,
            };

            // Creating a list of values, these strings will map accordingly to each key.
            var classificationValues = new List <string>()
            {
                "T1",
                "T2",
                "T3",
                "T4"
            };

            // Constructs the ValueMappingEstimator making the ML.net pipeline
            var pipeline = mlContext.Transforms.Conversion.ValueMap(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature"));

            // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column.
            IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);

            // Getting the resulting data as an IEnumerable of SampleTemperatureDataWithCategory. This will contain the newly created column TemperatureCategory
            IEnumerable <SampleTemperatureDataWithCategory> featureRows = mlContext.CreateEnumerable <SampleTemperatureDataWithCategory>(transformedData, reuseRowObject: false);

            Console.WriteLine($"Example of mapping float->string");
            Console.WriteLine($"Date\t\tTemperature\tTemperatureCategory");
            foreach (var featureRow in featureRows)
            {
                Console.WriteLine($"{featureRow.Date.ToString("d")}\t{featureRow.Temperature}\t\t{featureRow.TemperatureCategory}");
            }

            // Features column obtained post-transformation.
            //
            // Example of mapping float->string
            // Date         Temperature TemperatureCategory
            // 1/1/2012     39          T1
            // 1/2/2012     82          T4
            // 1/3/2012     75          T3
            // 1/4/2012     67          T2
            // 1/5/2012     75          T3
        }
Esempio n. 9
0
        /// <summary>
        /// Example use of OnnxEstimator in an ML.NET pipeline
        /// </summary>
        public static void OnnxTransformSample()
        {
            // Download the squeeznet image model from ONNX model zoo, version 1.2
            // https://github.com/onnx/models/tree/master/squeezenet
            var modelPath = @"squeezenet\model.onnx";

            // Inspect the model's inputs and outputs
            var session    = new InferenceSession(modelPath);
            var inputInfo  = session.InputMetadata.First();
            var outputInfo = session.OutputMetadata.First();

            Console.WriteLine($"Input Name is {String.Join(",", inputInfo.Key)}");
            Console.WriteLine($"Input Dimensions are {String.Join(",", inputInfo.Value.Dimensions)}");
            Console.WriteLine($"Output Name is {String.Join(",", outputInfo.Key)}");
            Console.WriteLine($"Output Dimensions are {String.Join(",", outputInfo.Value.Dimensions)}");
            // Results..
            // Input Name is data_0
            // Input Dimensions are 1,3,224,224
            // Output Name is softmaxout_1
            // Output Dimensions are 1,1000,1,1

            // Create ML pipeline to score the data using OnnxScoringEstimator
            var mlContext = new MLContext();
            var data      = GetTensorData();
            var idv       = mlContext.Data.ReadFromEnumerable(data);
            var pipeline  = new OnnxScoringEstimator(mlContext, new[] { outputInfo.Key }, new[] { inputInfo.Key }, modelPath);

            // Run the pipeline and get the transformed values
            var transformedValues = pipeline.Fit(idv).Transform(idv);

            // Retrieve model scores into Prediction class
            var predictions = mlContext.CreateEnumerable <Prediction>(transformedValues, reuseRowObject: false);

            // Iterate rows
            foreach (var prediction in predictions)
            {
                int numClasses = 0;
                foreach (var classScore in prediction.softmaxout_1.Take(3))
                {
                    Console.WriteLine($"Class #{numClasses++} score = {classScore}");
                }
                Console.WriteLine(new string('-', 10));
            }

            // Results look like below...
            // Class #0 score = 4.544065E-05
            // Class #1 score = 0.003845858
            // Class #2 score = 0.0001249467
            // ----------
            // Class #0 score = 4.491953E-05
            // Class #1 score = 0.003848222
            // Class #2 score = 0.0001245592
            // ----------
        }
Esempio n. 10
0
        /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data
        /// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to
        /// arbitrary integer arrays with the following association:
        ///     0-5yrs  -> 1, 2, 3
        ///     6-11yrs -> 5, 6, 7
        ///     12+yrs  -> 42,32,64
        public static void Run()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
            IDataView trainData = mlContext.Data.ReadFromEnumerable(data);

            // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
            // Creating a list of keys based on the Education values from the dataset
            var educationKeys = new List <string>()
            {
                "0-5yrs",
                "6-11yrs",
                "12+yrs"
            };

            // Sample list of associated array values
            var educationValues = new List <int[]>()
            {
                new int[] { 1, 2, 3 },
                new int[] { 5, 6, 7 },
                new int[] { 42, 32, 64 }
            };

            // Constructs the ValueMappingEstimator making the ML.net pipeline
            var pipeline = mlContext.Transforms.Conversion.ValueMap <string, int>(educationKeys, educationValues, ("EducationFeature", "Education"));

            // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column.
            IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);

            // Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory
            IEnumerable <SampleInfertDataWithIntArray> featuresColumn = mlContext.CreateEnumerable <SampleInfertDataWithIntArray>(transformedData, reuseRowObject: false);

            Console.WriteLine($"Example of mapping string->array");
            Console.WriteLine($"Age\tEducation\tEducationFeature");
            foreach (var featureRow in featuresColumn)
            {
                Console.WriteLine($"{featureRow.Age}\t{featureRow.Education}  \t{string.Join(",", featureRow.EducationFeature)}");
            }

            // Features column obtained post-transformation.
            //
            // Example of mapping string->array
            // Age     Education   EducationFeature
            // 26      0 - 5yrs    1,2,3
            // 42      0 - 5yrs    1,2,3
            // 39      12 + yrs    42,32,64
            // 34      0 - 5yrs    1,2,3
            // 35      6 - 11yrs   5,6,7
        }
Esempio n. 11
0
 public static void ShowPredictions(MLContext env, IDataView data, bool label = true, int count = 2)
 {
     env
     // Convert to an enumerable of user-defined type.
     .CreateEnumerable <TransactionFraudPrediction>(data, reuseRowObject: false)
     .Where(x => x.PredictedLabel == label)
     // Take a couple values as an array.
     .Take(count)
     .ToList()
     // print to console
     .ForEach(row => { row.PrintToConsole(); });
 }
        public void BuildAndTrain()
        {
            var featurizerModelLocation = inputModelLocation;

            ConsoleWriteHeader("Read model");
            Console.WriteLine($"Model location: {featurizerModelLocation}");
            Console.WriteLine($"Images folder: {imagesFolder}");
            Console.WriteLine($"Training file: {dataLocation}");
            Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}");



            var data = mlContext.Data.ReadFromTextFile <ImageNetData>(path: dataLocation, hasHeader: false);

            var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: LabelTokey, inputColumnName: DefaultColumnNames.Label)
                           .Append(mlContext.Transforms.LoadImages(imagesFolder, (ImageReal, nameof(ImageNetData.ImagePath))))
                           .Append(mlContext.Transforms.Resize(outputColumnName: ImageReal, imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: ImageReal))
                           .Append(mlContext.Transforms.ExtractPixels(new ImagePixelExtractorTransformer.ColumnInfo(name: "input", inputColumnName: ImageReal, interleave: ImageNetSettings.channelsLast, offset: ImageNetSettings.mean)))
                           .Append(mlContext.Transforms.ScoreTensorFlowModel(modelLocation: featurizerModelLocation, outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" }))
                           .Append(mlContext.MulticlassClassification.Trainers.LogisticRegression(labelColumn: LabelTokey, featureColumn: "softmax2_pre_activation"))
                           .Append(mlContext.Transforms.Conversion.MapKeyToValue((PredictedLabelValue, DefaultColumnNames.PredictedLabel)));

            // Train the model
            ConsoleWriteHeader("Training classification model");
            ITransformer model = pipeline.Fit(data);

            // Process the training data through the model
            // This is an optional step, but it's useful for debugging issues
            var trainData = model.Transform(data);
            var loadedModelOutputColumnNames = trainData.Schema
                                               .Where(col => !col.IsHidden).Select(col => col.Name);
            var trainData2 = mlContext.CreateEnumerable <ImageNetPipeline>(trainData, false, true).ToList();

            trainData2.ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max()));

            // Get some performance metric on the model using training data
            var classificationContext = new MulticlassClassificationCatalog(mlContext);

            ConsoleWriteHeader("Classification metrics");
            var metrics = classificationContext.Evaluate(trainData, label: LabelTokey, predictedLabel: DefaultColumnNames.PredictedLabel);

            Console.WriteLine($"LogLoss is: {metrics.LogLoss}");
            Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}");

            // Save the model to assets/outputs
            ConsoleWriteHeader("Save model to local file");
            ModelHelpers.DeleteAssets(outputModelLocation);
            using (var f = new FileStream(outputModelLocation, FileMode.Create))
                mlContext.Model.Save(model, f);

            Console.WriteLine($"Model saved: {outputModelLocation}");
        }
Esempio n. 13
0
        public static void ConcatTransform()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
            var data      = SamplesUtils.DatasetUtils.GetInfertData();
            var trainData = mlContext.Data.ReadFromEnumerable(data);

            // Preview of the data.
            //
            // Age    Case  Education  induced     parity  pooled.stratum  row_num  ...
            // 26.0   1.0   0-5yrs      1.0         6.0       3.0      1.0  ...
            // 42.0   1.0   0-5yrs      1.0         1.0       1.0      2.0  ...
            // 39.0   1.0   0-5yrs      2.0         6.0       4.0      3.0  ...
            // 34.0   1.0   0-5yrs      2.0         4.0       2.0      4.0  ...
            // 35.0   1.0   6-11yrs     1.0         3.0       32.0     5.0  ...

            // A pipeline for concatenating the Age, Parity and Induced columns together into a vector that will be the Features column.
            // Concatenation is necessary because learners take **feature vectors** as inputs.
            //   e.g. var regressionTrainer = mlContext.Regression.Trainers.FastTree(labelColumn: "Label", featureColumn: "Features");
            string outputColumnName = "Features";
            var    pipeline         = mlContext.Transforms.Concatenate(outputColumnName, new[] { "Age", "Parity", "Induced" });

            // The transformed data.
            var transformedData = pipeline.Fit(trainData).Transform(trainData);

            // Now let's take a look at what this concatenation did.
            // We can extract the newly created column as an IEnumerable of SampleInfertDataWithFeatures, the class we define above.
            var featuresColumn = mlContext.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);

            // And we can write out a few rows
            Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
            foreach (var featureRow in featuresColumn)
            {
                foreach (var value in featureRow.Features.GetValues())
                {
                    Console.Write($"{value} ");
                }
                Console.WriteLine("");
            }

            // Expected output:
            // Features column obtained post-transformation.
            //
            // 26 6 1
            // 42 1 1
            // 39 6 2
            // 34 4 2
            // 35 3 1
        }
        public static void Example()
        {
            // Generate IEnumerable<BinaryLabelFloatFeatureVectorSample> as training examples.
            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100);

            // Information in first example.
            // Label: true
            Console.WriteLine("First example's label is {0}", rawData.First().Label);
            // Features is a 10-element float[]:
            //   [0]	1.0173254	float
            //   [1]	0.9680227	float
            //   [2]	0.7581612	float
            //   [3]	0.406033158	float
            //   [4]	0.7588848	float
            //   [5]	1.10602713	float
            //   [6]	0.6421779	float
            //   [7]	1.17754972	float
            //   [8]	0.473704457	float
            //   [9]	0.4919063	float
            Console.WriteLine("First example's feature vector is {0}", rawData.First().Features);

            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Step 1: Read the data as an IDataView.
            var data = mlContext.Data.ReadFromEnumerable(rawData);

            // ML.NET doesn't cache data set by default. Caching is always recommended when using the
            // StochasticDualCoordinateAscent algorithm because it may incur multiple data passes.
            data = mlContext.Data.Cache(data);

            // Step 2: Create a binary classifier. This trainer may produce a logistic regression model.
            // We set the "Label" column as the label of the dataset, and the "Features" column as the features column.
            var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
                labelColumnName: "Label", featureColumnName: "Features", loss: new HingeLoss(), l2Const: 0.001f);

            // Step 3: Train the pipeline created.
            var model = pipeline.Fit(data);

            // Step 4: Make prediction and evaluate its quality (on training set).
            var prediction = model.Transform(data);

            var rawPrediction = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.NonCalibratedBinaryClassifierOutput>(prediction, false);

            // Step 5: Inspect the prediction of the first example.
            // Note that positive/negative label may be associated with positive/negative score
            var first = rawPrediction.First();

            Console.WriteLine("The first example actual label is {0}. The trained model assigns it a score {1}.",
                              first.Label /*true*/, first.Score /*around 3*/);
        }
Esempio n. 15
0
        public void ExportToIEnumerable()
        {
            var mlContext = new MLContext(seed: 1, conc: 1);

            // Read the dataset from an enumerable.
            var enumerableBefore = TypeTestData.GenerateDataset();
            var data             = mlContext.Data.ReadFromEnumerable(enumerableBefore);

            // Export back to an enumerable.
            var enumerableAfter = mlContext.CreateEnumerable <TypeTestData>(data, true);

            Common.AssertEqual(enumerableBefore, enumerableAfter);
        }
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
            IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
            var trainData = mlContext.Data.ReadFromEnumerable(data);

            // Preview of the data.
            //
            // Age    Case  Education  induced     parity  pooled.stratum  row_num  ...
            // 26.0   1.0   0-5yrs      1.0         6.0       3.0      1.0  ...
            // 42.0   1.0   0-5yrs      1.0         1.0       1.0      2.0  ...
            // 39.0   1.0   0-5yrs      2.0         6.0       4.0      3.0  ...
            // 34.0   1.0   0-5yrs      2.0         4.0       2.0      4.0  ...
            // 35.0   1.0   6-11yrs     1.0         3.0       32.0     5.0  ...

            // Select a subset of columns to keep.
            var pipeline = mlContext.Transforms.SelectColumns(new string[] { "Age", "Education" });

            // Now we can transform the data and look at the output to confirm the behavior of CopyColumns.
            // Don't forget that this operation doesn't actually evaluate data until we read the data below,
            // as transformations are lazy in ML.NET.
            var transformedData = pipeline.Fit(trainData).Transform(trainData);

            // Print the number of columns in the schema
            Console.WriteLine($"There are {transformedData.Schema.Count} columns in the dataset.");

            // Expected output:
            //  There are 2 columns in the dataset.

            // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below.
            var rowEnumerable = mlContext.CreateEnumerable <SampleInfertDataTransformed>(transformedData, reuseRowObject: false);

            // And finally, we can write out the rows of the dataset, looking at the columns of interest.
            Console.WriteLine($"Age and Educations columns obtained post-transformation.");
            foreach (var row in rowEnumerable)
            {
                Console.WriteLine($"Age: {row.Age} Education: {row.Education}");
            }

            // Expected output:
            //  Age and Education columns obtained post-transformation.
            //  Age: 26 Education: 0 - 5yrs
            //  Age: 42 Education: 0 - 5yrs
            //  Age: 39 Education: 0 - 5yrs
            //  Age: 34 Education: 0 - 5yrs
            //  Age: 35 Education: 6 - 11yrs
        }
Esempio n. 17
0
        void PredictAndMetadata()
        {
            var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);
            var ml       = new MLContext();

            var data = ml.Data.ReadFromTextFile <IrisData>(dataPath, separatorChar: ',');

            var pipeline = ml.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest)
                           .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
                                       new SdcaMultiClassTrainer.Options {
                MaxIterations = 100, Shuffle = true, NumThreads = 1,
            }));

            var model  = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
            var engine = model.CreatePredictionEngine <IrisDataNoLabel, IrisPredictionNotCasted>(ml);

            var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), hasHeader: true, separatorChar: ',');
            var testData   = ml.CreateEnumerable <IrisData>(testLoader, false);

            // During prediction we will get Score column with 3 float values.
            // We need to find way to map each score to original label.
            // In order to do what we need to get SlotNames from Score column.
            // Slot names on top of Score column represent original labels for i-th value in Score array.
            VBuffer <ReadOnlyMemory <char> > slotNames = default;

            engine.OutputSchema[nameof(IrisPrediction.Score)].GetSlotNames(ref slotNames);
            // Since we apply MapValueToKey estimator with default parameters, key values
            // depends on order of occurence in data file. Which is "Iris-setosa", "Iris-versicolor", "Iris-virginica"
            // So if we have Score column equal to [0.2, 0.3, 0.5] that's mean what score for
            // Iris-setosa is 0.2
            // Iris-versicolor is 0.3
            // Iris-virginica is 0.5.
            Assert.True(slotNames.GetItemOrDefault(0).ToString() == "Iris-setosa");
            Assert.True(slotNames.GetItemOrDefault(1).ToString() == "Iris-versicolor");
            Assert.True(slotNames.GetItemOrDefault(2).ToString() == "Iris-virginica");

            // Let's look how we can convert key value for PredictedLabel to original labels.
            // We need to read KeyValues for "PredictedLabel" column.
            VBuffer <ReadOnlyMemory <char> > keys = default;

            engine.OutputSchema[nameof(IrisPrediction.PredictedLabel)].GetKeyValues(ref keys);
            foreach (var input in testData.Take(20))
            {
                var prediction = engine.Predict(input);
                // Predicted label is key type which internal representation starts from 1.
                // (0 reserved for NaN value) so in order to cast key to index in key metadata we need to distract 1 from it.
                var deciphieredLabel = keys.GetItemOrDefault((int)prediction.PredictedLabel - 1).ToString();
                Assert.True(deciphieredLabel == input.Label);
            }
        }
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            var dataEnumerable = DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(10, naRate: 0.05);
            var data           = mlContext.Data.ReadFromEnumerable(dataEnumerable);

            // Look at the original dataset
            Console.WriteLine($"Label\tFeatures");
            foreach (var row in dataEnumerable)
            {
                Console.WriteLine($"{row.Label}\t({string.Join(", ", row.Features)})");
            }
            Console.WriteLine();
            // Expected output:
            //  Label Features
            //  0 (0.9680227, 0.4060332, 1.106027, 1.17755, 0.4919063, 0.8326591, 1.182151, NaN, 1.195347, 0.5145918)
            //  1 (0.9919022, NaN, 0.5262842, 0.6876203, 0.08110995, 0.4533272, 0.9885438, 0.7629636, NaN, 0.3431419)
            //  1 (0.7159725, 0.2734515, 0.7947656, 0.4572088, 0.2213147, 0.7187268, 0.4879681, 0.8781915, 0.7353975, 0.679749)
            //  0 (1.095362, 0.2865799, 0.3701428, 1.026814, 1.199973, 0.8522052, 1.009463, 0.929094, 0.3255273, 0.3891238)
            //  1 (0.3255007, 0.4683977, 0.8092038, 0.764506, 0.2949968, 0.6633928, 0.2867224, 0.2225179, 0.06851885, 0.693045)
            //  1 (0.221342, 0.0665216, 0.6785055, 0.1490974, 0.6098703, 0.4906252, 0.6776115, 0.2254031, 0.005082198, 0.850485)
            //  0 (0.9049759, 1.188812, 0.7227401, 0.7065761, 0.2570084, 0.6960788, 0.8131579, 0.942329, 1.133393, 0.8996523)
            //  0 (0.8851265, 0.3727676, 0.8091109, 1.197115, 0.2634366, 1.04256, 0.8459901, 1.170127, 0.7129673, 1.013653)
            //  1 (0.5528619, 0.9945465, 0.06445368, 0.4830741, 0.0716896, 0.1508327, 0.4510793, NaN, 0.8160448, 0.9136292)
            //  1 (0.9628896, 0.01686989, 0.2783295, 0.5877925, 0.324167, 0.974933, 0.9728873, 0.1322647, 0.1782212, 0.5446572)

            // Filter out any row with an NA value
            var filteredData = mlContext.Data.FilterByMissingValues(data, "Features");

            // Take a look at the resulting dataset and note that the Feature vectors with NaNs are missing.
            var enumerable = mlContext.CreateEnumerable <DatasetUtils.FloatLabelFloatFeatureVectorSample>(filteredData, reuseRowObject: true);

            Console.WriteLine($"Label\tFeatures");
            foreach (var row in enumerable)
            {
                Console.WriteLine($"{row.Label}\t({string.Join(", ", row.Features)})");
            }
            // Expected output:
            //  Label Features
            //  1 (0.7159725, 0.2734515, 0.7947656, 0.4572088, 0.2213147, 0.7187268, 0.4879681, 0.8781915, 0.7353975, 0.679749)
            //  0 (1.095362, 0.2865799, 0.3701428, 1.026814, 1.199973, 0.8522052, 1.009463, 0.929094, 0.3255273, 0.3891238)
            //  1 (0.3255007, 0.4683977, 0.8092038, 0.764506, 0.2949968, 0.6633928, 0.2867224, 0.2225179, 0.06851885, 0.693045)
            //  1 (0.221342, 0.0665216, 0.6785055, 0.1490974, 0.6098703, 0.4906252, 0.6776115, 0.2254031, 0.005082198, 0.850485)
            //  0 (0.9049759, 1.188812, 0.7227401, 0.7065761, 0.2570084, 0.6960788, 0.8131579, 0.942329, 1.133393, 0.8996523)
            //  0 (0.8851265, 0.3727676, 0.8091109, 1.197115, 0.2634366, 1.04256, 0.8459901, 1.170127, 0.7129673, 1.013653)
            //  1 (0.9628896, 0.01686989, 0.2783295, 0.5877925, 0.324167, 0.974933, 0.9728873, 0.1322647, 0.1782212, 0.5446572)
        }
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            IEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData> enumerableOfData = SamplesUtils.DatasetUtils.GetSampleTemperatureData(10);
            var data = mlContext.Data.ReadFromEnumerable(enumerableOfData);

            // Before we apply a filter, examine all the records in the dataset.
            Console.WriteLine($"Date\tTemperature");
            foreach (var row in enumerableOfData)
            {
                Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
            }
            Console.WriteLine();
            // Expected output:
            //  Date    Temperature
            //  1/2/2012        36
            //  1/3/2012        36
            //  1/4/2012        34
            //  1/5/2012        35
            //  1/6/2012        35
            //  1/7/2012        39
            //  1/8/2012        40
            //  1/9/2012        35
            //  1/10/2012       30
            //  1/11/2012       29

            // Filter the data by the values of the temperature. The lower bound is inclusive, the upper exclusive.
            var filteredData = mlContext.Data.FilterByColumn(data, columnName: "Temperature", lowerBound: 34, upperBound: 37);

            // Look at the filtered data and observe that values outside [34,37) have been dropped.
            var enumerable = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData>(filteredData, reuseRowObject: true);

            Console.WriteLine($"Date\tTemperature");
            foreach (var row in enumerable)
            {
                Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
            }

            // Expected output:
            //  Date    Temperature
            //  1/2/2012        36
            //  1/3/2012        36
            //  1/4/2012        34
            //  1/5/2012        35
            //  1/6/2012        35
            //  1/9/2012        35
        }
Esempio n. 20
0
        private void IntermediateData(string dataPath)
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Create the reader: define the data columns and where to find them in the text file.
            var reader = mlContext.Data.CreateTextReader(ctx => (
                                                             // A boolean column depicting the 'target label'.
                                                             IsOver50K: ctx.LoadBool(0),
                                                             // Three text columns.
                                                             Workclass: ctx.LoadText(1),
                                                             Education: ctx.LoadText(2),
                                                             MaritalStatus: ctx.LoadText(3)),
                                                         hasHeader: true);

            // Start creating our processing pipeline. For now, let's just concatenate all the text columns
            // together into one.
            var dataPipeline = reader.MakeNewEstimator()
                               .Append(row => (
                                           row.IsOver50K,
                                           AllFeatures: row.Workclass.ConcatWith(row.Education, row.MaritalStatus)
                                           ));

            // Let's verify that the data has been read correctly.
            // First, we read the data file.
            var data = reader.Read(dataPath);

            // Fit our data pipeline and transform data with it.
            var transformedData = dataPipeline.Fit(data).Transform(data);

            // 'transformedData' is a 'promise' of data. Let's actually read it.
            var someRows = mlContext
                           // Convert to an enumerable of user-defined type.
                           .CreateEnumerable <InspectedRow>(transformedData.AsDynamic, reuseRowObject: false)
                           // Take a couple values as an array.
                           .Take(4).ToArray();

            // Extract the 'AllFeatures' column.
            // This will give the entire dataset: make sure to only take several row
            // in case the dataset is huge.
            var featureColumns = transformedData.GetColumn(r => r.AllFeatures)
                                 .Take(20).ToArray();

            // The same extension method also applies to the dynamic-typed data, except you have to
            // specify the column name and type:
            var dynamicData        = transformedData.AsDynamic;
            var sameFeatureColumns = dynamicData.GetColumn <string[]>(mlContext, "AllFeatures")
                                     .Take(20).ToArray();
        }
Esempio n. 21
0
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            var enumerableOfData = SamplesUtils.DatasetUtils.GetSampleTemperatureData(10);
            var data             = mlContext.Data.ReadFromEnumerable(enumerableOfData);

            // Before we apply a filter, examine all the records in the dataset.
            Console.WriteLine($"Date\tTemperature");
            foreach (var row in enumerableOfData)
            {
                Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
            }
            Console.WriteLine();
            // Expected output:
            //  Date    Temperature
            //  1/2/2012        36
            //  1/3/2012        36
            //  1/4/2012        34
            //  1/5/2012        35
            //  1/6/2012        35
            //  1/7/2012        39
            //  1/8/2012        40
            //  1/9/2012        35
            //  1/10/2012       30
            //  1/11/2012       29

            // Skip the first 5 rows in the dataset
            var filteredData = mlContext.Data.SkipRows(data, 5);

            // Look at the filtered data and observe that the first 5 rows have been dropped
            var enumerable = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData>(filteredData, reuseRowObject: true);

            Console.WriteLine($"Date\tTemperature");
            foreach (var row in enumerable)
            {
                Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
            }
            // Expected output:
            //  Date    Temperature
            //  1/7/2012        39
            //  1/8/2012        40
            //  1/9/2012        35
            //  1/10/2012       30
            //  1/11/2012       29
        }
Esempio n. 22
0
        public void ChangeDetection()
        {
            var         env      = new MLContext(conc: 1);
            const int   size     = 10;
            List <Data> data     = new List <Data>(size);
            var         dataView = env.Data.ReadFromEnumerable(data);

            for (int i = 0; i < size / 2; i++)
            {
                data.Add(new Data(5));
            }

            for (int i = 0; i < size / 2; i++)
            {
                data.Add(new Data((float)(5 + i * 1.1)));
            }

            var args = new IidChangePointDetector.Options()
            {
                Confidence          = 80,
                Source              = "Value",
                Name                = "Change",
                ChangeHistoryLength = size
            };
            // Train
            var detector = new IidChangePointEstimator(env, args).Fit(dataView);
            // Transform
            var output = detector.Transform(dataView);
            // Get predictions
            var           enumerator     = env.CreateEnumerable <Prediction>(output, true).GetEnumerator();
            Prediction    row            = null;
            List <double> expectedValues = new List <double>()
            {
                0, 5, 0.5, 5.1200000000000114E-08, 0, 5, 0.4999999995, 5.1200000046080209E-08, 0, 5, 0.4999999995, 5.1200000092160303E-08,
                0, 5, 0.4999999995, 5.12000001382404E-08
            };
            int index = 0;

            while (enumerator.MoveNext() && index < expectedValues.Count)
            {
                row = enumerator.Current;

                Assert.Equal(expectedValues[index++], row.Change[0]);
                Assert.Equal(expectedValues[index++], row.Change[1]);
                Assert.Equal(expectedValues[index++], row.Change[2]);
                Assert.Equal(expectedValues[index++], row.Change[3]);
            }
        }
Esempio n. 23
0
        // (OPTIONAL) Try/test a single prediction with the trained model and any test data
        private static void TrySinglePrediction(MLContext mlContext, ITransformer model, IDataView dataView)
        {
            // Load data to test. Could be any test data. Since this is generated code, a row from a dataView is used
            // But here you can try wit any sample data to make a prediction
            var sample = mlContext.CreateEnumerable <SampleObservation>(dataView, false).First();

            // Create prediction engine to perform a single prediction
            var predEngine = model.CreatePredictionEngine <SampleObservation, SamplePrediction>(mlContext);

            // Predict
            var resultprediction = predEngine.Predict(sample);

            Console.WriteLine($"=============== Single Prediction  ===============");
            Console.WriteLine($"Actual value: {sample.Fare_amount} | Predicted value: {resultprediction.Score}");
            Console.WriteLine($"==================================================");
        }
Esempio n. 24
0
        public static void ConcatTransform()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var ml = new MLContext();

            // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
            IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
            var trainData = ml.Data.ReadFromEnumerable(data);

            // Preview of the data.
            //
            // Age    Case  Education  induced     parity  pooled.stratum  row_num  ...
            // 26.0   1.0   0-5yrs      1.0         6.0       3.0      1.0  ...
            // 42.0   1.0   0-5yrs      1.0         1.0       1.0      2.0  ...
            // 39.0   1.0   0-5yrs      2.0         6.0       4.0      3.0  ...
            // 34.0   1.0   0-5yrs      2.0         4.0       2.0      4.0  ...
            // 35.0   1.0   6-11yrs     1.0         3.0       32.0     5.0  ...

            // A pipeline for concatenating the age, parity and induced columns together in the Features column.
            string outputColumnName = "Features";
            var    pipeline         = new ColumnConcatenatingEstimator(ml, outputColumnName, new[] { "Age", "Parity", "Induced" });

            // The transformed data.
            var transformedData = pipeline.Fit(trainData).Transform(trainData);

            // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures.
            var featuresColumn = ml.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);

            Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
            foreach (var featureRow in featuresColumn)
            {
                foreach (var value in featureRow.Features.GetValues())
                {
                    Console.Write($"{value} ");
                }
                Console.WriteLine("");
            }

            // Features column obtained post-transformation.
            //
            // 26 6 1
            // 42 1 1
            // 39 6 2
            // 34 4 2
            // 35 3 1
        }
Esempio n. 25
0
        public void ChangeDetection()
        {
            var       env      = new MLContext(conc: 1);
            const int Size     = 10;
            var       data     = new List <Data>(Size);
            var       dataView = env.Data.ReadFromEnumerable(data);

            for (int i = 0; i < Size / 2; i++)
            {
                data.Add(new Data(5));
            }

            for (int i = 0; i < Size / 2; i++)
            {
                data.Add(new Data((float)(5 + i * 1.1)));
            }

            // Convert to statically-typed data view.
            var staticData = dataView.AssertStatic(env, c => new { Value = c.R4.Scalar });
            // Build the pipeline
            var staticLearningPipeline = staticData.MakeNewEstimator()
                                         .Append(r => r.Value.IidChangePointDetect(80, Size));
            // Train
            var detector = staticLearningPipeline.Fit(staticData);
            // Transform
            var output = detector.Transform(staticData);

            // Get predictions
            var enumerator                       = env.CreateEnumerable <ChangePointPrediction>(output.AsDynamic, true).GetEnumerator();
            ChangePointPrediction row            = null;
            List <double>         expectedValues = new List <double>()
            {
                0, 5, 0.5, 5.1200000000000114E-08, 0, 5, 0.4999999995, 5.1200000046080209E-08, 0, 5, 0.4999999995, 5.1200000092160303E-08,
                0, 5, 0.4999999995, 5.12000001382404E-08
            };
            int index = 0;

            while (enumerator.MoveNext() && index < expectedValues.Count)
            {
                row = enumerator.Current;

                Assert.Equal(expectedValues[index++], row.Data[0], precision: 7);
                Assert.Equal(expectedValues[index++], row.Data[1], precision: 7);
                Assert.Equal(expectedValues[index++], row.Data[2], precision: 7);
                Assert.Equal(expectedValues[index++], row.Data[3], precision: 7);
            }
        }
Esempio n. 26
0
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            var enumerableOfData = DatasetUtils.GetSampleTemperatureData(5);
            var data             = mlContext.Data.ReadFromEnumerable(enumerableOfData);

            // Before we apply a filter, examine all the records in the dataset.
            Console.WriteLine($"Date\tTemperature");
            foreach (var row in enumerableOfData)
            {
                Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
            }
            Console.WriteLine();
            // Expected output:
            //  Date    Temperature
            //  1/2/2012        36
            //  1/3/2012        36
            //  1/4/2012        34
            //  1/5/2012        35
            //  1/6/2012        35

            // Shuffle the dataset.
            var shuffledData = mlContext.Data.ShuffleRows(data, seed: 123);

            // Look at the shuffled data and observe that the rows are in a randomized order.
            var enumerable = mlContext.CreateEnumerable <DatasetUtils.SampleTemperatureData>(shuffledData, reuseRowObject: true);

            Console.WriteLine($"Date\tTemperature");
            foreach (var row in enumerable)
            {
                Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
            }
            // Expected output:
            //  Date    Temperature
            //  1/4/2012        34
            //  1/2/2012        36
            //  1/5/2012        35
            //  1/3/2012        36
            //  1/6/2012        35
        }
        /// <summary>
        /// Example use of the TensorFlow image model in a ML.NET pipeline.
        /// </summary>
        public static void ScoringWithImageClassificationModelSample()
        {
            // Download the ResNet 101 model from the location below.
            // https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz
            var modelLocation = @"resnet_v2_101/resnet_v2_101_299_frozen.pb";

            var mlContext = new MLContext();
            var data      = GetTensorData();
            var idv       = mlContext.Data.ReadFromEnumerable(data);

            // Create a ML pipeline.
            var pipeline = mlContext.Transforms.ScoreTensorFlowModel(
                modelLocation,
                new[] { nameof(OutputScores.output) },
                new[] { nameof(TensorData.input) });

            // Run the pipeline and get the transformed values.
            var estimator         = pipeline.Fit(idv);
            var transformedValues = estimator.Transform(idv);

            // Retrieve model scores.
            var outScores = mlContext.CreateEnumerable <OutputScores>(transformedValues, reuseRowObject: false);

            // Display scores. (for the sake of brevity we display scores of the first 3 classes)
            foreach (var prediction in outScores)
            {
                int numClasses = 0;
                foreach (var classScore in prediction.output.Take(3))
                {
                    Console.WriteLine($"Class #{numClasses++} score = {classScore}");
                }
                Console.WriteLine(new string('-', 10));
            }

            // Results look like below...
            //Class #0 score = -0.8092947
            //Class #1 score = -0.3310375
            //Class #2 score = 0.1119193
            //----------
            //Class #0 score = -0.7807726
            //Class #1 score = -0.2158062
            //Class #2 score = 0.1153686
            //----------
        }
Esempio n. 28
0
        public void TrainSaveModelAndPredict()
        {
            var ml   = new MLContext(seed: 1, conc: 1);
            var data = ml.Data.ReadFromTextFile <SentimentData>(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true);

            // Pipeline.
            var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
                           .AppendCacheCheckpoint(ml)
                           .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
                                       new SdcaNonCalibratedBinaryTrainer.Options {
                NumThreads = 1
            }));

            // Train.
            var model = pipeline.Fit(data);

            var modelPath = GetOutputPath("temp.zip");

            // Save model.
            using (var file = File.Create(modelPath))
                model.SaveTo(ml, file);

            // Load model.
            ITransformer loadedModel;

            using (var file = File.OpenRead(modelPath))
                loadedModel = TransformerChain.LoadFrom(ml, file);

            // Create prediction engine and test predictions.
            var engine = loadedModel.CreatePredictionEngine <SentimentData, SentimentPrediction>(ml);

            // Take a couple examples out of the test data and run predictions on top.
            var testData = ml.CreateEnumerable <SentimentData>(
                ml.Data.ReadFromTextFile <SentimentData>(GetDataPath(TestDatasets.Sentiment.testFilename), hasHeader: true), false);

            foreach (var input in testData.Take(5))
            {
                var prediction = engine.Predict(input);
                // Verify that predictions match and scores are separated from zero.
                Assert.Equal(input.Sentiment, prediction.Sentiment);
                Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
            }
        }
Esempio n. 29
0
        public void SdcaLogisticRegression()
        {
            // Generate C# objects as training examples.
            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100);

            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Step 1: Read the data as an IDataView.
            var data = mlContext.Data.ReadFromEnumerable(rawData);

            // ML.NET doesn't cache data set by default. Caching is very helpful when working with iterative
            // algorithms which needs many data passes. Since SDCA is the case, we cache.
            data = mlContext.Data.Cache(data);

            // Step 2: Create a binary classifier.
            // We set the "Label" column as the label of the dataset, and the "Features" column as the features column.
            var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Label", featureColumnName: "Features", l2Const: 0.001f);

            // Step 3: Train the pipeline created.
            var model = pipeline.Fit(data);

            // Step 4: Make prediction and evaluate its quality (on training set).
            var prediction = model.Transform(data);
            var metrics    = mlContext.BinaryClassification.Evaluate(prediction);

            // Check a few metrics to make sure the trained model is ok.
            Assert.InRange(metrics.Auc, 0.9, 1);
            Assert.InRange(metrics.LogLoss, 0, 0.5);

            var rawPrediction = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.CalibratedBinaryClassifierOutput>(prediction, false);

            // Step 5: Inspect the prediction of the first example.
            var first = rawPrediction.First();

            // This is a positive example.
            Assert.True(first.Label);
            // Positive example should have non-negative score.
            Assert.True(first.Score > 0);
            // Positive example should have high probability of belonging the positive class.
            Assert.InRange(first.Probability, 0.8, 1);
        }
Esempio n. 30
0
        private static (int lines, double columnAverage, double elapsedSeconds) TimeToScanIDataView(MLContext mlContext, IDataView data)
        {
            int    lines         = 0;
            double columnAverage = 0.0;
            var    enumerable    = mlContext.CreateEnumerable <DatasetUtils.HousingRegression>(data, reuseRowObject: true);
            var    watch         = System.Diagnostics.Stopwatch.StartNew();

            foreach (var row in enumerable)
            {
                lines++;
                columnAverage += row.MedianHomeValue + row.CrimesPerCapita + row.PercentResidental + row.PercentNonRetail + row.CharlesRiver
                                 + row.NitricOxides + row.RoomsPerDwelling + row.PercentPre40s + row.EmploymentDistance
                                 + row.HighwayDistance + row.TaxRate + row.TeacherRatio;
            }
            watch.Stop();
            columnAverage /= lines;
            var elapsed = watch.Elapsed;

            return(lines, columnAverage, elapsed.Seconds);
        }