[ConditionalFact(typeof(BaseTestBaseline), nameof(BaseTestBaseline.LessThanNetCore30OrNotNetCore))] // netcore3.0 output differs from Baseline public void ChangePointDetectionWithSeasonality() { var env = new MLContext(conc: 1); const int ChangeHistorySize = 10; const int SeasonalitySize = 10; const int NumberOfSeasonsInTraining = 5; const int MaxTrainingSize = NumberOfSeasonsInTraining * SeasonalitySize; List <Data> data = new List <Data>(); var dataView = env.Data.ReadFromEnumerable(data); var args = new SsaChangePointDetector.Options() { Confidence = 95, Source = "Value", Name = "Change", ChangeHistoryLength = ChangeHistorySize, TrainingWindowSize = MaxTrainingSize, SeasonalWindowSize = SeasonalitySize }; for (int j = 0; j < NumberOfSeasonsInTraining; j++) { for (int i = 0; i < SeasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < ChangeHistorySize; i++) { data.Add(new Data(i * 100)); } // Train var detector = new SsaChangePointEstimator(env, args).Fit(dataView); // Transform var output = detector.Transform(dataView); // Get predictions var enumerator = env.CreateEnumerable <Prediction>(output, true).GetEnumerator(); Prediction row = null; List <double> expectedValues = new List <double>() { 0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07, 0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483 }; int index = 0; while (enumerator.MoveNext() && index < expectedValues.Count) { row = enumerator.Current; Assert.Equal(expectedValues[index++], row.Change[0], precision: 7); // Alert Assert.Equal(expectedValues[index++], row.Change[1], precision: 7); // Raw score Assert.Equal(expectedValues[index++], row.Change[2], precision: 7); // P-Value score Assert.Equal(expectedValues[index++], row.Change[3], precision: 7); // Martingale score } }
public void SpikeDetection() { var env = new MLContext(conc: 1); const int Size = 10; const int PvalHistoryLength = Size / 4; // Generate sample series data with a spike List <Data> data = new List <Data>(Size); var dataView = env.CreateStreamingDataView(data); for (int i = 0; i < Size / 2; i++) { data.Add(new Data(5)); } data.Add(new Data(10)); // This is the spike for (int i = 0; i < Size / 2 - 1; i++) { data.Add(new Data(5)); } // Convert to statically-typed data view. var staticData = dataView.AssertStatic(env, c => new { Value = c.R4.Scalar }); // Build the pipeline var staticLearningPipeline = staticData.MakeNewEstimator() .Append(r => r.Value.IidSpikeDetect(80, PvalHistoryLength)); // Train var detector = staticLearningPipeline.Fit(staticData); // Transform var output = detector.Transform(staticData); // Get predictions var enumerator = env.CreateEnumerable <SpikePrediction>(output.AsDynamic, true).GetEnumerator(); var expectedValues = new List <double[]>() { // Alert Score P-Value new double[] { 0, 5, 0.5 }, new double[] { 0, 5, 0.5 }, new double[] { 0, 5, 0.5 }, new double[] { 0, 5, 0.5 }, new double[] { 0, 5, 0.5 }, new double[] { 1, 10, 0.0 }, // alert is on, predicted spike new double[] { 0, 5, 0.261375 }, new double[] { 0, 5, 0.261375 }, new double[] { 0, 5, 0.50 }, new double[] { 0, 5, 0.50 } }; SpikePrediction row = null; for (var i = 0; enumerator.MoveNext() && i < expectedValues.Count; i++) { row = enumerator.Current; CompareNumbersWithTolerance(expectedValues[i][0], row.Data[0], digitsOfPrecision: 7); CompareNumbersWithTolerance(expectedValues[i][1], row.Data[1], digitsOfPrecision: 7); CompareNumbersWithTolerance(expectedValues[i][2], row.Data[2], digitsOfPrecision: 7); } }
public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); var trainData = mlContext.Data.ReadFromEnumerable(data); // Preview of the data. // // Age Case Education induced parity pooled.stratum row_num ... // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... // CopyColumns is commonly used to rename columns. // For example, if you want to train towards Age, and your learner expects a "Label" column, you can // use CopyColumns to rename Age to Label. Technically, the Age columns still exists, but it won't be // materialized unless you actually need it somewhere (e.g. if you were to save the transformed data // without explicitly dropping the column). This is a general property of IDataView's lazy evaluation. string labelColumnName = "Label"; var pipeline = mlContext.Transforms.CopyColumns(labelColumnName, "Age") as IEstimator <ITransformer>; // You also may want to copy a column to perform some hand-featurization using built-in transforms or // a CustomMapping transform. For example, we could make an indicator variable if a feature, such as Parity // goes above some threshold. We simply copy the Parity column to a new column, then pass it through a custom function. Action <InputRow, OutputRow> mapping = (input, output) => output.CustomValue = input.CustomValue > 4 ? 1 : 0; pipeline = pipeline.Append(mlContext.Transforms.CopyColumns("CustomValue", "Parity")) .Append(mlContext.Transforms.CustomMapping(mapping, null)); // Now we can transform the data and look at the output to confirm the behavior of CopyColumns. // Don't forget that this operation doesn't actually evaluate data until we read the data below. var transformedData = pipeline.Fit(trainData).Transform(trainData); // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below. var rowEnumerable = mlContext.CreateEnumerable <SampleInfertDataTransformed>(transformedData, reuseRowObject: false); // And finally, we can write out the rows of the dataset, looking at the columns of interest. Console.WriteLine($"Label, Parity, and CustomValue columns obtained post-transformation."); foreach (var row in rowEnumerable) { Console.WriteLine($"Label: {row.Label} Parity: {row.Parity} CustomValue: {row.CustomValue}"); } // Expected output: // Label, Parity, and CustomValue columns obtained post-transformation. // Label: 26 Parity: 6 CustomValue: 1 // Label: 42 Parity: 1 CustomValue: 0 // Label: 39 Parity: 6 CustomValue: 1 // Label: 34 Parity: 4 CustomValue: 0 // Label: 35 Parity: 3 CustomValue: 0 }
public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. var enumerableData = SamplesUtils.DatasetUtils.GetInfertData(); var data = mlContext.Data.ReadFromEnumerable(enumerableData); // Before transformation, take a look at the dataset Console.WriteLine($"Age\tCase\tEducation\tInduced\tParity\tPooledStratum"); foreach (var row in enumerableData) { Console.WriteLine($"{row.Age}\t{row.Case}\t{row.Education}\t{row.Induced}\t{row.Parity}\t{row.PooledStratum}"); } Console.WriteLine(); // Expected output: // Age Case Education Induced Parity PooledStratum // 26 1 0 - 5yrs 1 6 3 // 42 1 0 - 5yrs 1 1 1 // 39 1 12 + yrs 2 6 4 // 34 1 0 - 5yrs 2 4 2 // 35 1 6 - 11yrs 1 3 32 // Select a subset of columns to keep. var pipeline = mlContext.Transforms.SelectColumns("Age", "Education"); // Now we can transform the data and look at the output to confirm the behavior of CopyColumns. // Don't forget that this operation doesn't actually evaluate data until we read the data below, // as transformations are lazy in ML.NET. var transformedData = pipeline.Fit(data).Transform(data); // Print the number of columns in the schema Console.WriteLine($"There are {transformedData.Schema.Count} columns in the dataset."); // Expected output: // There are 2 columns in the dataset. // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below. var rowEnumerable = mlContext.CreateEnumerable <SampleInfertDataTransformed>(transformedData, reuseRowObject: false); // And finally, we can write out the rows of the dataset, looking at the columns of interest. Console.WriteLine($"Age and Educations columns obtained post-transformation."); foreach (var row in rowEnumerable) { Console.WriteLine($"Age: {row.Age} Education: {row.Education}"); } // Expected output: // Age and Education columns obtained post-transformation. // Age: 26 Education: 0-5yrs // Age: 42 Education: 0-5yrs // Age: 39 Education: 12+yrs // Age: 34 Education: 0-5yrs // Age: 35 Education: 6-11yrs }
// This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // IidSpikeDetector is applied then to identify spiking points in the series. public static void IidSpikeDetectorTransform() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a spike const int Size = 10; var data = new List <IidSpikeData>(Size); for (int i = 0; i < Size / 2; i++) { data.Add(new IidSpikeData(5)); } // This is a spike data.Add(new IidSpikeData(10)); for (int i = 0; i < Size / 2; i++) { data.Add(new IidSpikeData(5)); } // Convert data to IDataView. var dataView = ml.Data.ReadFromEnumerable(data); // Setup IidSpikeDetector arguments string outputColumnName = nameof(IidSpikePrediction.Prediction); string inputColumnName = nameof(IidSpikeData.Value); // The transformed data. var transformedData = ml.Transforms.IidSpikeEstimator(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); // Getting the data of the newly created column as an IEnumerable of IidSpikePrediction. var predictionColumn = ml.CreateEnumerable <IidSpikePrediction>(transformedData, reuseRowObject: false); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Alert\tScore\tP-Value"); foreach (var prediction in predictionColumn) { Console.WriteLine("{0}\t{1:0.00}\t{2:0.00}", prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); } Console.WriteLine(""); // Prediction column obtained post-transformation. // Alert Score P-Value // 0 5.00 0.50 // 0 5.00 0.50 // 0 5.00 0.50 // 0 5.00 0.50 // 0 5.00 0.50 // 1 10.00 0.00 <-- alert is on, predicted spike // 0 5.00 0.26 // 0 5.00 0.26 // 0 5.00 0.50 // 0 5.00 0.50 // 0 5.00 0.50 }
/// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType /// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true, /// the ValueMappingEstimator will generate a KeyType for each unique value. /// /// In this example, the education data is mapped to a grouping of 'Undergraduate' and 'Postgraduate'. Because KeyTypes are used, the /// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'. /// /// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example /// results in the string value of 'Undergraduate' and 'Postgraduate'. public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // Creating a list of keys based on the Education values from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. var educationKeys = new List <string>() { "0-5yrs", "6-11yrs", "12+yrs" }; // Creating a list of values that are sample strings. These will be converted to KeyTypes var educationValues = new List <string>() { "Undergraduate", "Postgraduate", "Postgraduate" }; // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back // to the original value. var pipeline = new ValueMappingEstimator <string, string>(mlContext, educationKeys, educationValues, true, ("EducationKeyType", "Education")) .Append(mlContext.Transforms.Conversion.MapKeyToValue(("EducationCategory", "EducationKeyType"))); // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. IEnumerable <SampleInfertDataWithFeatures> featureRows = mlContext.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping string->keytype"); Console.WriteLine($"Age\tEducation\tEducationCategory"); foreach (var featureRow in featureRows) { Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); } // Features column obtained post-transformation. // // Age Education EducationCategory // 26 0-5yrs Undergraduate // 42 0-5yrs Undergraduate // 39 12+yrs Postgraduate // 34 0-5yrs Undergraduate // 35 6-11yrs Postgraduate }
public void ChangePointDetectionWithSeasonality() { var env = new MLContext(conc: 1); const int ChangeHistorySize = 10; const int SeasonalitySize = 10; const int NumberOfSeasonsInTraining = 5; const int MaxTrainingSize = NumberOfSeasonsInTraining * SeasonalitySize; var data = new List <Data>(); var dataView = env.Data.ReadFromEnumerable(data); for (int j = 0; j < NumberOfSeasonsInTraining; j++) { for (int i = 0; i < SeasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < ChangeHistorySize; i++) { data.Add(new Data(i * 100)); } // Convert to statically-typed data view. var staticData = dataView.AssertStatic(env, c => new { Value = c.R4.Scalar }); // Build the pipeline var staticLearningPipeline = staticData.MakeNewEstimator() .Append(r => r.Value.SsaChangePointDetect(95, ChangeHistorySize, MaxTrainingSize, SeasonalitySize)); // Train var detector = staticLearningPipeline.Fit(staticData); // Transform var output = detector.Transform(staticData); // Get predictions var enumerator = env.CreateEnumerable <ChangePointPrediction>(output.AsDynamic, true).GetEnumerator(); ChangePointPrediction row = null; List <double> expectedValues = new List <double>() { 0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07, 0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483 }; int index = 0; while (enumerator.MoveNext() && index < expectedValues.Count) { row = enumerator.Current; CompareNumbersWithTolerance(expectedValues[index++], row.Data[0], digitsOfPrecision: 5); // Alert CompareNumbersWithTolerance(expectedValues[index++], row.Data[1], digitsOfPrecision: 5); // Raw score CompareNumbersWithTolerance(expectedValues[index++], row.Data[2], digitsOfPrecision: 5); // P-Value score CompareNumbersWithTolerance(expectedValues[index++], row.Data[3], digitsOfPrecision: 5); // Martingale score } }
/// This example demonstrates the use of ValueMappingEstimator by mapping float-to-string values. This is useful if the key /// data are floating point and need to be grouped into string values. In this example, the Induction value is mapped to /// "T1", "T2", "T3", and "T4" groups. public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData> data = SamplesUtils.DatasetUtils.GetSampleTemperatureData(); IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView // Creating a list of keys based on the induced value from the dataset var temperatureKeys = new List <float>() { 39.0F, 67.0F, 75.0F, 82.0F, }; // Creating a list of values, these strings will map accordingly to each key. var classificationValues = new List <string>() { "T1", "T2", "T3", "T4" }; // Constructs the ValueMappingEstimator making the ML.net pipeline var pipeline = mlContext.Transforms.Conversion.ValueMap(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature")); // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleTemperatureDataWithCategory. This will contain the newly created column TemperatureCategory IEnumerable <SampleTemperatureDataWithCategory> featureRows = mlContext.CreateEnumerable <SampleTemperatureDataWithCategory>(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping float->string"); Console.WriteLine($"Date\t\tTemperature\tTemperatureCategory"); foreach (var featureRow in featureRows) { Console.WriteLine($"{featureRow.Date.ToString("d")}\t{featureRow.Temperature}\t\t{featureRow.TemperatureCategory}"); } // Features column obtained post-transformation. // // Example of mapping float->string // Date Temperature TemperatureCategory // 1/1/2012 39 T1 // 1/2/2012 82 T4 // 1/3/2012 75 T3 // 1/4/2012 67 T2 // 1/5/2012 75 T3 }
/// <summary> /// Example use of OnnxEstimator in an ML.NET pipeline /// </summary> public static void OnnxTransformSample() { // Download the squeeznet image model from ONNX model zoo, version 1.2 // https://github.com/onnx/models/tree/master/squeezenet var modelPath = @"squeezenet\model.onnx"; // Inspect the model's inputs and outputs var session = new InferenceSession(modelPath); var inputInfo = session.InputMetadata.First(); var outputInfo = session.OutputMetadata.First(); Console.WriteLine($"Input Name is {String.Join(",", inputInfo.Key)}"); Console.WriteLine($"Input Dimensions are {String.Join(",", inputInfo.Value.Dimensions)}"); Console.WriteLine($"Output Name is {String.Join(",", outputInfo.Key)}"); Console.WriteLine($"Output Dimensions are {String.Join(",", outputInfo.Value.Dimensions)}"); // Results.. // Input Name is data_0 // Input Dimensions are 1,3,224,224 // Output Name is softmaxout_1 // Output Dimensions are 1,1000,1,1 // Create ML pipeline to score the data using OnnxScoringEstimator var mlContext = new MLContext(); var data = GetTensorData(); var idv = mlContext.Data.ReadFromEnumerable(data); var pipeline = new OnnxScoringEstimator(mlContext, new[] { outputInfo.Key }, new[] { inputInfo.Key }, modelPath); // Run the pipeline and get the transformed values var transformedValues = pipeline.Fit(idv).Transform(idv); // Retrieve model scores into Prediction class var predictions = mlContext.CreateEnumerable <Prediction>(transformedValues, reuseRowObject: false); // Iterate rows foreach (var prediction in predictions) { int numClasses = 0; foreach (var classScore in prediction.softmaxout_1.Take(3)) { Console.WriteLine($"Class #{numClasses++} score = {classScore}"); } Console.WriteLine(new string('-', 10)); } // Results look like below... // Class #0 score = 4.544065E-05 // Class #1 score = 0.003845858 // Class #2 score = 0.0001249467 // ---------- // Class #0 score = 4.491953E-05 // Class #1 score = 0.003848222 // Class #2 score = 0.0001245592 // ---------- }
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data /// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to /// arbitrary integer arrays with the following association: /// 0-5yrs -> 1, 2, 3 /// 6-11yrs -> 5, 6, 7 /// 12+yrs -> 42,32,64 public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView // Creating a list of keys based on the Education values from the dataset var educationKeys = new List <string>() { "0-5yrs", "6-11yrs", "12+yrs" }; // Sample list of associated array values var educationValues = new List <int[]>() { new int[] { 1, 2, 3 }, new int[] { 5, 6, 7 }, new int[] { 42, 32, 64 } }; // Constructs the ValueMappingEstimator making the ML.net pipeline var pipeline = mlContext.Transforms.Conversion.ValueMap <string, int>(educationKeys, educationValues, ("EducationFeature", "Education")); // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory IEnumerable <SampleInfertDataWithIntArray> featuresColumn = mlContext.CreateEnumerable <SampleInfertDataWithIntArray>(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping string->array"); Console.WriteLine($"Age\tEducation\tEducationFeature"); foreach (var featureRow in featuresColumn) { Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationFeature)}"); } // Features column obtained post-transformation. // // Example of mapping string->array // Age Education EducationFeature // 26 0 - 5yrs 1,2,3 // 42 0 - 5yrs 1,2,3 // 39 12 + yrs 42,32,64 // 34 0 - 5yrs 1,2,3 // 35 6 - 11yrs 5,6,7 }
public static void ShowPredictions(MLContext env, IDataView data, bool label = true, int count = 2) { env // Convert to an enumerable of user-defined type. .CreateEnumerable <TransactionFraudPrediction>(data, reuseRowObject: false) .Where(x => x.PredictedLabel == label) // Take a couple values as an array. .Take(count) .ToList() // print to console .ForEach(row => { row.PrintToConsole(); }); }
public void BuildAndTrain() { var featurizerModelLocation = inputModelLocation; ConsoleWriteHeader("Read model"); Console.WriteLine($"Model location: {featurizerModelLocation}"); Console.WriteLine($"Images folder: {imagesFolder}"); Console.WriteLine($"Training file: {dataLocation}"); Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}"); var data = mlContext.Data.ReadFromTextFile <ImageNetData>(path: dataLocation, hasHeader: false); var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: LabelTokey, inputColumnName: DefaultColumnNames.Label) .Append(mlContext.Transforms.LoadImages(imagesFolder, (ImageReal, nameof(ImageNetData.ImagePath)))) .Append(mlContext.Transforms.Resize(outputColumnName: ImageReal, imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: ImageReal)) .Append(mlContext.Transforms.ExtractPixels(new ImagePixelExtractorTransformer.ColumnInfo(name: "input", inputColumnName: ImageReal, interleave: ImageNetSettings.channelsLast, offset: ImageNetSettings.mean))) .Append(mlContext.Transforms.ScoreTensorFlowModel(modelLocation: featurizerModelLocation, outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" })) .Append(mlContext.MulticlassClassification.Trainers.LogisticRegression(labelColumn: LabelTokey, featureColumn: "softmax2_pre_activation")) .Append(mlContext.Transforms.Conversion.MapKeyToValue((PredictedLabelValue, DefaultColumnNames.PredictedLabel))); // Train the model ConsoleWriteHeader("Training classification model"); ITransformer model = pipeline.Fit(data); // Process the training data through the model // This is an optional step, but it's useful for debugging issues var trainData = model.Transform(data); var loadedModelOutputColumnNames = trainData.Schema .Where(col => !col.IsHidden).Select(col => col.Name); var trainData2 = mlContext.CreateEnumerable <ImageNetPipeline>(trainData, false, true).ToList(); trainData2.ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max())); // Get some performance metric on the model using training data var classificationContext = new MulticlassClassificationCatalog(mlContext); ConsoleWriteHeader("Classification metrics"); var metrics = classificationContext.Evaluate(trainData, label: LabelTokey, predictedLabel: DefaultColumnNames.PredictedLabel); Console.WriteLine($"LogLoss is: {metrics.LogLoss}"); Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}"); // Save the model to assets/outputs ConsoleWriteHeader("Save model to local file"); ModelHelpers.DeleteAssets(outputModelLocation); using (var f = new FileStream(outputModelLocation, FileMode.Create)) mlContext.Model.Save(model, f); Console.WriteLine($"Model saved: {outputModelLocation}"); }
public static void ConcatTransform() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. var data = SamplesUtils.DatasetUtils.GetInfertData(); var trainData = mlContext.Data.ReadFromEnumerable(data); // Preview of the data. // // Age Case Education induced parity pooled.stratum row_num ... // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... // A pipeline for concatenating the Age, Parity and Induced columns together into a vector that will be the Features column. // Concatenation is necessary because learners take **feature vectors** as inputs. // e.g. var regressionTrainer = mlContext.Regression.Trainers.FastTree(labelColumn: "Label", featureColumn: "Features"); string outputColumnName = "Features"; var pipeline = mlContext.Transforms.Concatenate(outputColumnName, new[] { "Age", "Parity", "Induced" }); // The transformed data. var transformedData = pipeline.Fit(trainData).Transform(trainData); // Now let's take a look at what this concatenation did. // We can extract the newly created column as an IEnumerable of SampleInfertDataWithFeatures, the class we define above. var featuresColumn = mlContext.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false); // And we can write out a few rows Console.WriteLine($"{outputColumnName} column obtained post-transformation."); foreach (var featureRow in featuresColumn) { foreach (var value in featureRow.Features.GetValues()) { Console.Write($"{value} "); } Console.WriteLine(""); } // Expected output: // Features column obtained post-transformation. // // 26 6 1 // 42 1 1 // 39 6 2 // 34 4 2 // 35 3 1 }
public static void Example() { // Generate IEnumerable<BinaryLabelFloatFeatureVectorSample> as training examples. var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100); // Information in first example. // Label: true Console.WriteLine("First example's label is {0}", rawData.First().Label); // Features is a 10-element float[]: // [0] 1.0173254 float // [1] 0.9680227 float // [2] 0.7581612 float // [3] 0.406033158 float // [4] 0.7588848 float // [5] 1.10602713 float // [6] 0.6421779 float // [7] 1.17754972 float // [8] 0.473704457 float // [9] 0.4919063 float Console.WriteLine("First example's feature vector is {0}", rawData.First().Features); // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Step 1: Read the data as an IDataView. var data = mlContext.Data.ReadFromEnumerable(rawData); // ML.NET doesn't cache data set by default. Caching is always recommended when using the // StochasticDualCoordinateAscent algorithm because it may incur multiple data passes. data = mlContext.Data.Cache(data); // Step 2: Create a binary classifier. This trainer may produce a logistic regression model. // We set the "Label" column as the label of the dataset, and the "Features" column as the features column. var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated( labelColumnName: "Label", featureColumnName: "Features", loss: new HingeLoss(), l2Const: 0.001f); // Step 3: Train the pipeline created. var model = pipeline.Fit(data); // Step 4: Make prediction and evaluate its quality (on training set). var prediction = model.Transform(data); var rawPrediction = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.NonCalibratedBinaryClassifierOutput>(prediction, false); // Step 5: Inspect the prediction of the first example. // Note that positive/negative label may be associated with positive/negative score var first = rawPrediction.First(); Console.WriteLine("The first example actual label is {0}. The trained model assigns it a score {1}.", first.Label /*true*/, first.Score /*around 3*/); }
public void ExportToIEnumerable() { var mlContext = new MLContext(seed: 1, conc: 1); // Read the dataset from an enumerable. var enumerableBefore = TypeTestData.GenerateDataset(); var data = mlContext.Data.ReadFromEnumerable(enumerableBefore); // Export back to an enumerable. var enumerableAfter = mlContext.CreateEnumerable <TypeTestData>(data, true); Common.AssertEqual(enumerableBefore, enumerableAfter); }
public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); var trainData = mlContext.Data.ReadFromEnumerable(data); // Preview of the data. // // Age Case Education induced parity pooled.stratum row_num ... // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... // Select a subset of columns to keep. var pipeline = mlContext.Transforms.SelectColumns(new string[] { "Age", "Education" }); // Now we can transform the data and look at the output to confirm the behavior of CopyColumns. // Don't forget that this operation doesn't actually evaluate data until we read the data below, // as transformations are lazy in ML.NET. var transformedData = pipeline.Fit(trainData).Transform(trainData); // Print the number of columns in the schema Console.WriteLine($"There are {transformedData.Schema.Count} columns in the dataset."); // Expected output: // There are 2 columns in the dataset. // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below. var rowEnumerable = mlContext.CreateEnumerable <SampleInfertDataTransformed>(transformedData, reuseRowObject: false); // And finally, we can write out the rows of the dataset, looking at the columns of interest. Console.WriteLine($"Age and Educations columns obtained post-transformation."); foreach (var row in rowEnumerable) { Console.WriteLine($"Age: {row.Age} Education: {row.Education}"); } // Expected output: // Age and Education columns obtained post-transformation. // Age: 26 Education: 0 - 5yrs // Age: 42 Education: 0 - 5yrs // Age: 39 Education: 0 - 5yrs // Age: 34 Education: 0 - 5yrs // Age: 35 Education: 6 - 11yrs }
void PredictAndMetadata() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); var data = ml.Data.ReadFromTextFile <IrisData>(dataPath, separatorChar: ','); var pipeline = ml.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest) .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent( new SdcaMultiClassTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1, })); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = model.CreatePredictionEngine <IrisDataNoLabel, IrisPredictionNotCasted>(ml); var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), hasHeader: true, separatorChar: ','); var testData = ml.CreateEnumerable <IrisData>(testLoader, false); // During prediction we will get Score column with 3 float values. // We need to find way to map each score to original label. // In order to do what we need to get SlotNames from Score column. // Slot names on top of Score column represent original labels for i-th value in Score array. VBuffer <ReadOnlyMemory <char> > slotNames = default; engine.OutputSchema[nameof(IrisPrediction.Score)].GetSlotNames(ref slotNames); // Since we apply MapValueToKey estimator with default parameters, key values // depends on order of occurence in data file. Which is "Iris-setosa", "Iris-versicolor", "Iris-virginica" // So if we have Score column equal to [0.2, 0.3, 0.5] that's mean what score for // Iris-setosa is 0.2 // Iris-versicolor is 0.3 // Iris-virginica is 0.5. Assert.True(slotNames.GetItemOrDefault(0).ToString() == "Iris-setosa"); Assert.True(slotNames.GetItemOrDefault(1).ToString() == "Iris-versicolor"); Assert.True(slotNames.GetItemOrDefault(2).ToString() == "Iris-virginica"); // Let's look how we can convert key value for PredictedLabel to original labels. // We need to read KeyValues for "PredictedLabel" column. VBuffer <ReadOnlyMemory <char> > keys = default; engine.OutputSchema[nameof(IrisPrediction.PredictedLabel)].GetKeyValues(ref keys); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); // Predicted label is key type which internal representation starts from 1. // (0 reserved for NaN value) so in order to cast key to index in key metadata we need to distract 1 from it. var deciphieredLabel = keys.GetItemOrDefault((int)prediction.PredictedLabel - 1).ToString(); Assert.True(deciphieredLabel == input.Label); } }
public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. var dataEnumerable = DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(10, naRate: 0.05); var data = mlContext.Data.ReadFromEnumerable(dataEnumerable); // Look at the original dataset Console.WriteLine($"Label\tFeatures"); foreach (var row in dataEnumerable) { Console.WriteLine($"{row.Label}\t({string.Join(", ", row.Features)})"); } Console.WriteLine(); // Expected output: // Label Features // 0 (0.9680227, 0.4060332, 1.106027, 1.17755, 0.4919063, 0.8326591, 1.182151, NaN, 1.195347, 0.5145918) // 1 (0.9919022, NaN, 0.5262842, 0.6876203, 0.08110995, 0.4533272, 0.9885438, 0.7629636, NaN, 0.3431419) // 1 (0.7159725, 0.2734515, 0.7947656, 0.4572088, 0.2213147, 0.7187268, 0.4879681, 0.8781915, 0.7353975, 0.679749) // 0 (1.095362, 0.2865799, 0.3701428, 1.026814, 1.199973, 0.8522052, 1.009463, 0.929094, 0.3255273, 0.3891238) // 1 (0.3255007, 0.4683977, 0.8092038, 0.764506, 0.2949968, 0.6633928, 0.2867224, 0.2225179, 0.06851885, 0.693045) // 1 (0.221342, 0.0665216, 0.6785055, 0.1490974, 0.6098703, 0.4906252, 0.6776115, 0.2254031, 0.005082198, 0.850485) // 0 (0.9049759, 1.188812, 0.7227401, 0.7065761, 0.2570084, 0.6960788, 0.8131579, 0.942329, 1.133393, 0.8996523) // 0 (0.8851265, 0.3727676, 0.8091109, 1.197115, 0.2634366, 1.04256, 0.8459901, 1.170127, 0.7129673, 1.013653) // 1 (0.5528619, 0.9945465, 0.06445368, 0.4830741, 0.0716896, 0.1508327, 0.4510793, NaN, 0.8160448, 0.9136292) // 1 (0.9628896, 0.01686989, 0.2783295, 0.5877925, 0.324167, 0.974933, 0.9728873, 0.1322647, 0.1782212, 0.5446572) // Filter out any row with an NA value var filteredData = mlContext.Data.FilterByMissingValues(data, "Features"); // Take a look at the resulting dataset and note that the Feature vectors with NaNs are missing. var enumerable = mlContext.CreateEnumerable <DatasetUtils.FloatLabelFloatFeatureVectorSample>(filteredData, reuseRowObject: true); Console.WriteLine($"Label\tFeatures"); foreach (var row in enumerable) { Console.WriteLine($"{row.Label}\t({string.Join(", ", row.Features)})"); } // Expected output: // Label Features // 1 (0.7159725, 0.2734515, 0.7947656, 0.4572088, 0.2213147, 0.7187268, 0.4879681, 0.8781915, 0.7353975, 0.679749) // 0 (1.095362, 0.2865799, 0.3701428, 1.026814, 1.199973, 0.8522052, 1.009463, 0.929094, 0.3255273, 0.3891238) // 1 (0.3255007, 0.4683977, 0.8092038, 0.764506, 0.2949968, 0.6633928, 0.2867224, 0.2225179, 0.06851885, 0.693045) // 1 (0.221342, 0.0665216, 0.6785055, 0.1490974, 0.6098703, 0.4906252, 0.6776115, 0.2254031, 0.005082198, 0.850485) // 0 (0.9049759, 1.188812, 0.7227401, 0.7065761, 0.2570084, 0.6960788, 0.8131579, 0.942329, 1.133393, 0.8996523) // 0 (0.8851265, 0.3727676, 0.8091109, 1.197115, 0.2634366, 1.04256, 0.8459901, 1.170127, 0.7129673, 1.013653) // 1 (0.9628896, 0.01686989, 0.2783295, 0.5877925, 0.324167, 0.974933, 0.9728873, 0.1322647, 0.1782212, 0.5446572) }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData> enumerableOfData = SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); var data = mlContext.Data.ReadFromEnumerable(enumerableOfData); // Before we apply a filter, examine all the records in the dataset. Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerableOfData) { Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}"); } Console.WriteLine(); // Expected output: // Date Temperature // 1/2/2012 36 // 1/3/2012 36 // 1/4/2012 34 // 1/5/2012 35 // 1/6/2012 35 // 1/7/2012 39 // 1/8/2012 40 // 1/9/2012 35 // 1/10/2012 30 // 1/11/2012 29 // Filter the data by the values of the temperature. The lower bound is inclusive, the upper exclusive. var filteredData = mlContext.Data.FilterByColumn(data, columnName: "Temperature", lowerBound: 34, upperBound: 37); // Look at the filtered data and observe that values outside [34,37) have been dropped. var enumerable = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData>(filteredData, reuseRowObject: true); Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerable) { Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}"); } // Expected output: // Date Temperature // 1/2/2012 36 // 1/3/2012 36 // 1/4/2012 34 // 1/5/2012 35 // 1/6/2012 35 // 1/9/2012 35 }
private void IntermediateData(string dataPath) { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Create the reader: define the data columns and where to find them in the text file. var reader = mlContext.Data.CreateTextReader(ctx => ( // A boolean column depicting the 'target label'. IsOver50K: ctx.LoadBool(0), // Three text columns. Workclass: ctx.LoadText(1), Education: ctx.LoadText(2), MaritalStatus: ctx.LoadText(3)), hasHeader: true); // Start creating our processing pipeline. For now, let's just concatenate all the text columns // together into one. var dataPipeline = reader.MakeNewEstimator() .Append(row => ( row.IsOver50K, AllFeatures: row.Workclass.ConcatWith(row.Education, row.MaritalStatus) )); // Let's verify that the data has been read correctly. // First, we read the data file. var data = reader.Read(dataPath); // Fit our data pipeline and transform data with it. var transformedData = dataPipeline.Fit(data).Transform(data); // 'transformedData' is a 'promise' of data. Let's actually read it. var someRows = mlContext // Convert to an enumerable of user-defined type. .CreateEnumerable <InspectedRow>(transformedData.AsDynamic, reuseRowObject: false) // Take a couple values as an array. .Take(4).ToArray(); // Extract the 'AllFeatures' column. // This will give the entire dataset: make sure to only take several row // in case the dataset is huge. var featureColumns = transformedData.GetColumn(r => r.AllFeatures) .Take(20).ToArray(); // The same extension method also applies to the dynamic-typed data, except you have to // specify the column name and type: var dynamicData = transformedData.AsDynamic; var sameFeatureColumns = dynamicData.GetColumn <string[]>(mlContext, "AllFeatures") .Take(20).ToArray(); }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. var enumerableOfData = SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); var data = mlContext.Data.ReadFromEnumerable(enumerableOfData); // Before we apply a filter, examine all the records in the dataset. Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerableOfData) { Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}"); } Console.WriteLine(); // Expected output: // Date Temperature // 1/2/2012 36 // 1/3/2012 36 // 1/4/2012 34 // 1/5/2012 35 // 1/6/2012 35 // 1/7/2012 39 // 1/8/2012 40 // 1/9/2012 35 // 1/10/2012 30 // 1/11/2012 29 // Skip the first 5 rows in the dataset var filteredData = mlContext.Data.SkipRows(data, 5); // Look at the filtered data and observe that the first 5 rows have been dropped var enumerable = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.SampleTemperatureData>(filteredData, reuseRowObject: true); Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerable) { Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}"); } // Expected output: // Date Temperature // 1/7/2012 39 // 1/8/2012 40 // 1/9/2012 35 // 1/10/2012 30 // 1/11/2012 29 }
public void ChangeDetection() { var env = new MLContext(conc: 1); const int size = 10; List <Data> data = new List <Data>(size); var dataView = env.Data.ReadFromEnumerable(data); for (int i = 0; i < size / 2; i++) { data.Add(new Data(5)); } for (int i = 0; i < size / 2; i++) { data.Add(new Data((float)(5 + i * 1.1))); } var args = new IidChangePointDetector.Options() { Confidence = 80, Source = "Value", Name = "Change", ChangeHistoryLength = size }; // Train var detector = new IidChangePointEstimator(env, args).Fit(dataView); // Transform var output = detector.Transform(dataView); // Get predictions var enumerator = env.CreateEnumerable <Prediction>(output, true).GetEnumerator(); Prediction row = null; List <double> expectedValues = new List <double>() { 0, 5, 0.5, 5.1200000000000114E-08, 0, 5, 0.4999999995, 5.1200000046080209E-08, 0, 5, 0.4999999995, 5.1200000092160303E-08, 0, 5, 0.4999999995, 5.12000001382404E-08 }; int index = 0; while (enumerator.MoveNext() && index < expectedValues.Count) { row = enumerator.Current; Assert.Equal(expectedValues[index++], row.Change[0]); Assert.Equal(expectedValues[index++], row.Change[1]); Assert.Equal(expectedValues[index++], row.Change[2]); Assert.Equal(expectedValues[index++], row.Change[3]); } }
// (OPTIONAL) Try/test a single prediction with the trained model and any test data private static void TrySinglePrediction(MLContext mlContext, ITransformer model, IDataView dataView) { // Load data to test. Could be any test data. Since this is generated code, a row from a dataView is used // But here you can try wit any sample data to make a prediction var sample = mlContext.CreateEnumerable <SampleObservation>(dataView, false).First(); // Create prediction engine to perform a single prediction var predEngine = model.CreatePredictionEngine <SampleObservation, SamplePrediction>(mlContext); // Predict var resultprediction = predEngine.Predict(sample); Console.WriteLine($"=============== Single Prediction ==============="); Console.WriteLine($"Actual value: {sample.Fare_amount} | Predicted value: {resultprediction.Score}"); Console.WriteLine($"=================================================="); }
public static void ConcatTransform() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); var trainData = ml.Data.ReadFromEnumerable(data); // Preview of the data. // // Age Case Education induced parity pooled.stratum row_num ... // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... // A pipeline for concatenating the age, parity and induced columns together in the Features column. string outputColumnName = "Features"; var pipeline = new ColumnConcatenatingEstimator(ml, outputColumnName, new[] { "Age", "Parity", "Induced" }); // The transformed data. var transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. var featuresColumn = ml.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); foreach (var featureRow in featuresColumn) { foreach (var value in featureRow.Features.GetValues()) { Console.Write($"{value} "); } Console.WriteLine(""); } // Features column obtained post-transformation. // // 26 6 1 // 42 1 1 // 39 6 2 // 34 4 2 // 35 3 1 }
public void ChangeDetection() { var env = new MLContext(conc: 1); const int Size = 10; var data = new List <Data>(Size); var dataView = env.Data.ReadFromEnumerable(data); for (int i = 0; i < Size / 2; i++) { data.Add(new Data(5)); } for (int i = 0; i < Size / 2; i++) { data.Add(new Data((float)(5 + i * 1.1))); } // Convert to statically-typed data view. var staticData = dataView.AssertStatic(env, c => new { Value = c.R4.Scalar }); // Build the pipeline var staticLearningPipeline = staticData.MakeNewEstimator() .Append(r => r.Value.IidChangePointDetect(80, Size)); // Train var detector = staticLearningPipeline.Fit(staticData); // Transform var output = detector.Transform(staticData); // Get predictions var enumerator = env.CreateEnumerable <ChangePointPrediction>(output.AsDynamic, true).GetEnumerator(); ChangePointPrediction row = null; List <double> expectedValues = new List <double>() { 0, 5, 0.5, 5.1200000000000114E-08, 0, 5, 0.4999999995, 5.1200000046080209E-08, 0, 5, 0.4999999995, 5.1200000092160303E-08, 0, 5, 0.4999999995, 5.12000001382404E-08 }; int index = 0; while (enumerator.MoveNext() && index < expectedValues.Count) { row = enumerator.Current; Assert.Equal(expectedValues[index++], row.Data[0], precision: 7); Assert.Equal(expectedValues[index++], row.Data[1], precision: 7); Assert.Equal(expectedValues[index++], row.Data[2], precision: 7); Assert.Equal(expectedValues[index++], row.Data[3], precision: 7); } }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. var enumerableOfData = DatasetUtils.GetSampleTemperatureData(5); var data = mlContext.Data.ReadFromEnumerable(enumerableOfData); // Before we apply a filter, examine all the records in the dataset. Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerableOfData) { Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}"); } Console.WriteLine(); // Expected output: // Date Temperature // 1/2/2012 36 // 1/3/2012 36 // 1/4/2012 34 // 1/5/2012 35 // 1/6/2012 35 // Shuffle the dataset. var shuffledData = mlContext.Data.ShuffleRows(data, seed: 123); // Look at the shuffled data and observe that the rows are in a randomized order. var enumerable = mlContext.CreateEnumerable <DatasetUtils.SampleTemperatureData>(shuffledData, reuseRowObject: true); Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerable) { Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}"); } // Expected output: // Date Temperature // 1/4/2012 34 // 1/2/2012 36 // 1/5/2012 35 // 1/3/2012 36 // 1/6/2012 35 }
/// <summary> /// Example use of the TensorFlow image model in a ML.NET pipeline. /// </summary> public static void ScoringWithImageClassificationModelSample() { // Download the ResNet 101 model from the location below. // https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz var modelLocation = @"resnet_v2_101/resnet_v2_101_299_frozen.pb"; var mlContext = new MLContext(); var data = GetTensorData(); var idv = mlContext.Data.ReadFromEnumerable(data); // Create a ML pipeline. var pipeline = mlContext.Transforms.ScoreTensorFlowModel( modelLocation, new[] { nameof(OutputScores.output) }, new[] { nameof(TensorData.input) }); // Run the pipeline and get the transformed values. var estimator = pipeline.Fit(idv); var transformedValues = estimator.Transform(idv); // Retrieve model scores. var outScores = mlContext.CreateEnumerable <OutputScores>(transformedValues, reuseRowObject: false); // Display scores. (for the sake of brevity we display scores of the first 3 classes) foreach (var prediction in outScores) { int numClasses = 0; foreach (var classScore in prediction.output.Take(3)) { Console.WriteLine($"Class #{numClasses++} score = {classScore}"); } Console.WriteLine(new string('-', 10)); } // Results look like below... //Class #0 score = -0.8092947 //Class #1 score = -0.3310375 //Class #2 score = 0.1119193 //---------- //Class #0 score = -0.7807726 //Class #1 score = -0.2158062 //Class #2 score = 0.1153686 //---------- }
public void TrainSaveModelAndPredict() { var ml = new MLContext(seed: 1, conc: 1); var data = ml.Data.ReadFromTextFile <SentimentData>(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); // Pipeline. var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText") .AppendCacheCheckpoint(ml) .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated( new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 })); // Train. var model = pipeline.Fit(data); var modelPath = GetOutputPath("temp.zip"); // Save model. using (var file = File.Create(modelPath)) model.SaveTo(ml, file); // Load model. ITransformer loadedModel; using (var file = File.OpenRead(modelPath)) loadedModel = TransformerChain.LoadFrom(ml, file); // Create prediction engine and test predictions. var engine = loadedModel.CreatePredictionEngine <SentimentData, SentimentPrediction>(ml); // Take a couple examples out of the test data and run predictions on top. var testData = ml.CreateEnumerable <SentimentData>( ml.Data.ReadFromTextFile <SentimentData>(GetDataPath(TestDatasets.Sentiment.testFilename), hasHeader: true), false); foreach (var input in testData.Take(5)) { var prediction = engine.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } }
public void SdcaLogisticRegression() { // Generate C# objects as training examples. var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100); // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Step 1: Read the data as an IDataView. var data = mlContext.Data.ReadFromEnumerable(rawData); // ML.NET doesn't cache data set by default. Caching is very helpful when working with iterative // algorithms which needs many data passes. Since SDCA is the case, we cache. data = mlContext.Data.Cache(data); // Step 2: Create a binary classifier. // We set the "Label" column as the label of the dataset, and the "Features" column as the features column. var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Label", featureColumnName: "Features", l2Const: 0.001f); // Step 3: Train the pipeline created. var model = pipeline.Fit(data); // Step 4: Make prediction and evaluate its quality (on training set). var prediction = model.Transform(data); var metrics = mlContext.BinaryClassification.Evaluate(prediction); // Check a few metrics to make sure the trained model is ok. Assert.InRange(metrics.Auc, 0.9, 1); Assert.InRange(metrics.LogLoss, 0, 0.5); var rawPrediction = mlContext.CreateEnumerable <SamplesUtils.DatasetUtils.CalibratedBinaryClassifierOutput>(prediction, false); // Step 5: Inspect the prediction of the first example. var first = rawPrediction.First(); // This is a positive example. Assert.True(first.Label); // Positive example should have non-negative score. Assert.True(first.Score > 0); // Positive example should have high probability of belonging the positive class. Assert.InRange(first.Probability, 0.8, 1); }
private static (int lines, double columnAverage, double elapsedSeconds) TimeToScanIDataView(MLContext mlContext, IDataView data) { int lines = 0; double columnAverage = 0.0; var enumerable = mlContext.CreateEnumerable <DatasetUtils.HousingRegression>(data, reuseRowObject: true); var watch = System.Diagnostics.Stopwatch.StartNew(); foreach (var row in enumerable) { lines++; columnAverage += row.MedianHomeValue + row.CrimesPerCapita + row.PercentResidental + row.PercentNonRetail + row.CharlesRiver + row.NitricOxides + row.RoomsPerDwelling + row.PercentPre40s + row.EmploymentDistance + row.HighwayDistance + row.TaxRate + row.TeacherRatio; } watch.Stop(); columnAverage /= lines; var elapsed = watch.Elapsed; return(lines, columnAverage, elapsed.Seconds); }