/// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType /// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true, /// the ValueMappingEstimator will generate a KeyType for each unique value. /// /// In this example, the education data is mapped to a grouping of 'Undergraduate' and 'Postgraduate'. Because KeyTypes are used, the /// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'. /// /// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example /// results in the string value of 'Undergraduate' and 'Postgraduate'. public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // Creating a list of keys based on the Education values from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. var educationKeys = new List <string>() { "0-5yrs", "6-11yrs", "12+yrs" }; // Creating a list of values that are sample strings. These will be converted to KeyTypes var educationValues = new List <string>() { "Undergraduate", "Postgraduate", "Postgraduate" }; // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back // to the original value. var pipeline = new ValueMappingEstimator <string, string>(mlContext, educationKeys, educationValues, true, ("EducationKeyType", "Education")) .Append(new KeyToValueMappingEstimator(mlContext, ("EducationCategory", "EducationKeyType"))); // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. IEnumerable <SampleInfertDataWithFeatures> featureRows = mlContext.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping string->keytype"); Console.WriteLine($"Age\tEducation\tEducationCategory"); foreach (var featureRow in featureRows) { Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); } // Features column obtained post-transformation. // // Age Education EducationCategory // 26 0-5yrs Undergraduate // 42 0-5yrs Undergraduate // 39 12+yrs Postgraduate // 34 0-5yrs Undergraduate // 35 6-11yrs Postgraduate }
public void KeyToValueWorkout() { string dataPath = GetDataPath("iris.txt"); var reader = new TextLoader(Env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("ScalarString", DataKind.TX, 1), new TextLoader.Column("VectorString", DataKind.TX, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column { Name = "BareKey", Source = new[] { new TextLoader.Range(0) }, Type = DataKind.U4, KeyCount = new KeyCount(6), } } }); var data = reader.Read(dataPath); data = new ValueToKeyMappingEstimator(Env, new[] { new ValueToKeyMappingTransformer.ColumnInfo("A", "ScalarString"), new ValueToKeyMappingTransformer.ColumnInfo("B", "VectorString") }).Fit(data).Transform(data); var badData1 = new ColumnCopyingTransformer(Env, ("A", "BareKey")).Transform(data); var badData2 = new ColumnCopyingTransformer(Env, ("B", "VectorString")).Transform(data); var est = new KeyToValueMappingEstimator(Env, ("A_back", "A"), ("B_back", "B")); TestEstimatorCore(est, data, invalidInput: badData1); TestEstimatorCore(est, data, invalidInput: badData2); var outputPath = GetOutputPath("KeyToValue", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); IDataView savedData = est.Fit(data).Transform(data); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("KeyToValue", "featurized.tsv"); Done(); }