public void SetupIrisPipeline() { _irisExample = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }; string _irisDataPath = Program.GetInvariantCultureDataPath("iris.txt"); var env = new MLContext(seed: 1, conc: 1); var reader = new TextLoader(env, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("PetalLength", DataKind.R4, 3), new TextLoader.Column("PetalWidth", DataKind.R4, 4), }, hasHeader: true ); IDataView data = reader.Read(_irisDataPath); var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) .Append(new SdcaMultiClassTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; })); var model = pipeline.Fit(data); _irisModel = model.CreatePredictionEngine <IrisData, IrisPrediction>(env); }
public static SuggestedTransform ConcatColumnsIntoOne(List <string> columnNames, string concatColumnName, Type transformType, bool isNumeric) { StringBuilder columnName = new StringBuilder(); columnNames.ForEach(column => { columnName.AppendFormat("{0}", column); }); string columnsToConcat = string.Join(",", columnNames); var env = new MLContext(); var input = new ColumnConcatenatingEstimator(env, concatColumnName, columnNames.ToArray()); // Not sure if resulting columns will be numeric or text, since concat can apply to either. ColumnRoutingStructure.AnnotatedName[] columnsSource = columnNames.Select(c => new ColumnRoutingStructure.AnnotatedName { IsNumeric = isNumeric, Name = c }).ToArray(); ColumnRoutingStructure.AnnotatedName[] columnsDest = new[] { new ColumnRoutingStructure.AnnotatedName { IsNumeric = isNumeric, Name = concatColumnName } }; var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest); return(new SuggestedTransform(input, routingStructure)); }
void New_Extensibility() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); var data = ml.Data.TextReader(MakeIrisTextLoaderArgs()) .Read(dataPath); Action <IrisData, IrisData> action = (i, j) => { j.Label = i.Label; j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength; j.PetalWidth = i.PetalWidth; j.SepalLength = i.SepalLength; j.SepalWidth = i.SepalWidth; }; var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new MyLambdaTransform <IrisData, IrisData>(ml, action), TransformerScope.TrainTest) .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; })) .Append(new KeyToValueEstimator(ml, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = model.MakePredictionFunction <IrisDataNoLabel, IrisPrediction>(ml); var testLoader = TextLoader.ReadFile(ml, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(ml, false); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } }
private TransformerChain <MulticlassPredictionTransformer <MaximumEntropyModelParameters> > Train(string dataPath) { // Create text loader. var options = new TextLoader.Options() { Columns = new[] { new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column("SepalLength", DataKind.Single, 1), new TextLoader.Column("SepalWidth", DataKind.Single, 2), new TextLoader.Column("PetalLength", DataKind.Single, 3), new TextLoader.Column("PetalWidth", DataKind.Single, 4), }, HasHeader = true, }; var loader = new TextLoader(mlContext, options: options); IDataView data = loader.Load(dataPath); var pipeline = new ColumnConcatenatingEstimator(mlContext, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) .Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated()); return(pipeline.Fit(data)); }
public void MetacomponentsFeaturesRenamed() { // Create text loader. var options = new TextLoader.Options() { Columns = TestDatasets.irisData.GetLoaderColumns(), Separators = new[] { ',' }, }; var loader = new TextLoader(Env, options: options); var data = loader.Load(GetDataPath(TestDatasets.irisData.trainFilename)); var sdcaTrainer = ML.BinaryClassification.Trainers.SdcaNonCalibrated( new SdcaNonCalibratedBinaryTrainer.Options { LabelColumnName = "Label", FeatureColumnName = "Vars", MaximumNumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1, }); var pipeline = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest) .Append(ML.MulticlassClassification.Trainers.OneVersusAll(sdcaTrainer)) .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel")); var model = pipeline.Fit(data); TestEstimatorCore(pipeline, data); Done(); }
void New_DecomposableTrainAndPredict() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); var data = ml.Data.CreateTextReader(MakeIrisColumns(), separatorChar: ',') .Read(dataPath); var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; })) .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = model.MakePredictionFunction <IrisDataNoLabel, IrisPrediction>(ml); var testLoader = ml.Data.ReadFromTextFile(dataPath, MakeIrisColumns(), separatorChar: ','); var testData = testLoader.AsEnumerable <IrisData>(ml, false); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } }
public PredictionFunction <SearchData, FlatPrediction> GetPredictor() { var reader = TextLoader.CreateReader(_env, ctx => ( RateCode: ctx.LoadFloat(1), PassengerCount: ctx.LoadFloat(2), TripTime: ctx.LoadFloat(3), TripDistance: ctx.LoadFloat(4)), separator: ',', hasHeader: true); var dummyTempFile = Path.GetTempFileName(); var data = reader.Read(new MultiFileSource(dummyTempFile)); var pipeline = new ColumnConcatenatingEstimator(_env, "Features", "RateCode", "PassengerCount", "TripTime", "TripDistance") .Append(new ColumnSelectingEstimator(_env, "Features")) .Append(new OnnxScoringEstimator(_env, _onnxFilePath, "Features", "Estimate")) .Append(new ColumnSelectingEstimator(_env, "Estimate")) .Append(new CustomMappingEstimator <RawPrediction, FlatPrediction>(_env, contractName: "OnnxPredictionExtractor", mapAction: (input, output) => { output.Estimate = input.Estimate[0]; })); var transformer = pipeline.Fit(data.AsDynamic); File.Delete(dummyTempFile); return(transformer.MakePredictionFunction <SearchData, FlatPrediction>(_env)); }
void DecomposableTrainAndPredict() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); var data = ml.Data.LoadFromTextFile <IrisData>(dataPath, separatorChar: ','); var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) .Append(ml.MulticlassClassification.Trainers.SdcaCalibrated( new SdcaCalibratedMulticlassTrainer.Options { MaximumNumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1, })) .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = ml.Model.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(model); var testLoader = ml.Data.LoadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',', hasHeader: true); var testData = ml.Data.CreateEnumerable <IrisData>(testLoader, false); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } }
void TestConcat() { string dataPath = GetDataPath("adult.test"); var source = new MultiFileSource(dataPath); var loader = new TextLoader(Env, new TextLoader.Arguments { Column = new[]{ new TextLoader.Column("float1", DataKind.R4, 0), new TextLoader.Column("float4", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }), new TextLoader.Column("float6", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10, 12) }), new TextLoader.Column("vfloat", DataKind.R4, new[]{new TextLoader.Range(14, null) { AutoEnd = false, VariableEnd = true } }) }, Separator = ",", HasHeader = true }, new MultiFileSource(dataPath)); var data = loader.Read(source); ColumnType GetType(Schema schema, string name) { Assert.True(schema.TryGetColumnIndex(name, out int cIdx), $"Could not find '{name}'"); return schema.GetColumnType(cIdx); } var pipe = new ColumnConcatenatingEstimator(Env, "f1", "float1") .Append(new ColumnConcatenatingEstimator(Env, "f2", "float1", "float1")) .Append(new ColumnConcatenatingEstimator(Env, "f3", "float4", "float1")) .Append(new ColumnConcatenatingEstimator(Env, "f4", "float6", "vfloat", "float1")); data = TakeFilter.Create(Env, data, 10); data = pipe.Fit(data).Transform(data); ColumnType t; t = GetType(data.Schema, "f1"); Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 1); t = GetType(data.Schema, "f2"); Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 2); t = GetType(data.Schema, "f3"); Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 5); t = GetType(data.Schema, "f4"); Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 0); data = new ChooseColumnsTransform(Env, data, "f1", "f2", "f3", "f4"); var subdir = Path.Combine("Transform", "Concat"); var outputPath = GetOutputPath(subdir, "Concat1.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true, Dense = true }); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, data, fs, keepHidden: false); } CheckEquality(subdir, "Concat1.tsv"); Done(); }
public void Metacomponents() { var ml = new MLContext(); var data = ml.Data.ReadFromTextFile <IrisData>(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ','); var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }); var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) .Append(new Ova(ml, sdcaTrainer)) .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel")); var model = pipeline.Fit(data); }
public void MetacomponentsFeaturesRenamed() { var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',') .Read(GetDataPath(TestDatasets.irisData.trainFilename)); var sdcaTrainer = new SdcaBinaryTrainer(Env, "Label", "Vars", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }); var pipeline = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest) .Append(new Ova(Env, sdcaTrainer)) .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel")); var model = pipeline.Fit(data); TestEstimatorCore(pipeline, data); Done(); }
public static void ConcatTransform() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); var trainData = ml.Data.ReadFromEnumerable(data); // Preview of the data. // // Age Case Education induced parity pooled.stratum row_num ... // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... // A pipeline for concatenating the age, parity and induced columns together in the Features column. string outputColumnName = "Features"; var pipeline = new ColumnConcatenatingEstimator(ml, outputColumnName, new[] { "Age", "Parity", "Induced" }); // The transformed data. var transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. var featuresColumn = ml.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); foreach (var featureRow in featuresColumn) { foreach (var value in featureRow.Features.GetValues()) { Console.Write($"{value} "); } Console.WriteLine(""); } // Features column obtained post-transformation. // // 26 6 1 // 42 1 1 // 39 6 2 // 34 4 2 // 35 3 1 }
public void Metacomponents() { var ml = new MLContext(); var data = ml.Data.LoadFromTextFile <IrisData>(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ','); var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated( new SdcaNonCalibratedBinaryTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1, }); var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest) .Append(ml.MulticlassClassification.Trainers.OneVersusAll(sdcaTrainer)) .Append(ml.Transforms.Conversion.MapKeyToValue(("PredictedLabel"))); var model = pipeline.Fit(data); }
public void TestEP_Q_KMeansEntryPointAPI_06() { var iris = FileHelper.GetTestFile("iris.txt"); using (var env = new ConsoleEnvironment()) { var reader = new TextLoader(env, new TextLoader.Arguments() { Separator = "\t", HasHeader = true, Column = new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("Sepal_length", DataKind.R4, 1), new TextLoader.Column("Sepal_width", DataKind.R4, 2), new TextLoader.Column("Petal_length", DataKind.R4, 3), new TextLoader.Column("Petal_width", DataKind.R4, 4), } }); var pipeline = new ColumnConcatenatingEstimator(env, "Features", "Sepal_length", "Sepal_width", "Petal_length", "Petal_width") .Append(new KMeansPlusPlusTrainer(env, "Features", clustersCount: 3)); IDataView trainingDataView = reader.Read(new MultiFileSource(iris)); var model = pipeline.Fit(trainingDataView); var obs = new IrisObservation() { Sepal_length = 3.3f, Sepal_width = 1.6f, Petal_length = 0.2f, Petal_width = 5.1f, }; var prediction = model.MakePredictionFunction <IrisObservation, IrisPrediction>(env).Predict(obs); Assert.IsTrue(prediction.PredictedLabel != 0); var df = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var prediction2 = model.MakePredictionFunctionDataFrame(env, df.Schema); var df2 = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var df3 = prediction2.Predict(df2); Assert.AreEqual(df.Shape[0], df3.Shape[0]); } }
public override IEnumerable <SuggestedTransform> Apply(IntermediateColumn[] columns) { var selectedColumns = columns.Where(c => !IgnoreColumn(c.Purpose)).ToArray(); var colList = selectedColumns.Select(c => c.ColumnName).ToArray(); bool allColumnsNumeric = selectedColumns.All(c => c.Purpose == ColumnPurpose.NumericFeature && c.Type.ItemType() != BoolType.Instance); bool allColumnsNonNumeric = selectedColumns.All(c => c.Purpose != ColumnPurpose.NumericFeature); if (colList.Length > 0) { // Check if column is named features and already numeric if (colList.Length == 1 && colList[0] == DefaultColumnNames.Features && allColumnsNumeric) { yield break; } if (!allColumnsNumeric && !allColumnsNonNumeric) { yield break; } List <string> columnList = new List <string>(); foreach (var column in colList) { var columnName = new StringBuilder(); columnName.AppendFormat("{0}", column); columnList.Add(columnName.ToString()); } var input = new ColumnConcatenatingEstimator(Env, DefaultColumnNames.Features, columnList.ToArray()); ColumnRoutingStructure.AnnotatedName[] columnsSource = columnList.Select(c => new ColumnRoutingStructure.AnnotatedName { IsNumeric = allColumnsNumeric, Name = c }).ToArray(); ColumnRoutingStructure.AnnotatedName[] columnsDest = { new ColumnRoutingStructure.AnnotatedName { IsNumeric = allColumnsNumeric, Name = DefaultColumnNames.Features } }; var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest); yield return(new SuggestedTransform(input, routingStructure)); } }
public void SetupIrisPipeline() { _irisExample = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }; string irisDataPath = GetBenchmarkDataPath("iris.txt"); var env = new MLContext(seed: 1); // Create text loader. var options = new TextLoader.Options() { Columns = new[] { new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column("SepalLength", DataKind.Single, 1), new TextLoader.Column("SepalWidth", DataKind.Single, 2), new TextLoader.Column("PetalLength", DataKind.Single, 3), new TextLoader.Column("PetalWidth", DataKind.Single, 4), }, HasHeader = true, }; var loader = new TextLoader(env, options: options); IDataView data = loader.Load(irisDataPath); var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) .Append(env.Transforms.Conversion.MapValueToKey("Label")) .Append(env.MulticlassClassification.Trainers.SdcaMaximumEntropy( new SdcaMaximumEntropyMulticlassTrainer.Options { NumberOfThreads = 1, ConvergenceTolerance = 1e-2f, })); var model = pipeline.Fit(data); _irisModel = env.Model.CreatePredictionEngine <IrisData, IrisPrediction>(model); }
public void SetupIrisPipeline() { _irisExample = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }; string _irisDataPath = BaseTestClass.GetDataPath("iris.txt"); var env = new MLContext(seed: 1, conc: 1); // Create text loader. var options = new TextLoader.Options() { Columns = new[] { new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column("SepalLength", DataKind.Single, 1), new TextLoader.Column("SepalWidth", DataKind.Single, 2), new TextLoader.Column("PetalLength", DataKind.Single, 3), new TextLoader.Column("PetalWidth", DataKind.Single, 4), }, HasHeader = true, }; var loader = new TextLoader(env, options: options); IDataView data = loader.Load(_irisDataPath); var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) .Append(env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent( new SdcaMultiClassTrainer.Options { NumberOfThreads = 1, ConvergenceTolerance = 1e-2f, })); var model = pipeline.Fit(data); _irisModel = model.CreatePredictionEngine <IrisData, IrisPrediction>(env); }
private TransformerChain <MulticlassPredictionTransformer <MulticlassLogisticRegressionModelParameters> > Train(string dataPath) { var reader = new TextLoader(_env, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("PetalLength", DataKind.R4, 3), new TextLoader.Column("PetalWidth", DataKind.R4, 4), }, hasHeader: true ); IDataView data = reader.Read(dataPath); var pipeline = new ColumnConcatenatingEstimator(_env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) .Append(_env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent()); return(pipeline.Fit(data)); }
public void Train(string dest) { using (var env = new ConsoleEnvironment(verbose: false)) { var args = new TextLoader.Arguments() { Separator = ",", HasHeader = true, Column = new TextLoader.Column[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("Sepal_length", DataKind.R4, 1), new TextLoader.Column("Sepal_width", DataKind.R4, 2), new TextLoader.Column("Petal_length", DataKind.R4, 3), new TextLoader.Column("Petal_width", DataKind.R4, 4), } }; var reader = new TextLoader(env, args); var concat = new ColumnConcatenatingEstimator(env, "Features", "Sepal_length", "Sepal_width", "Petal_length", "Petal_width"); var km = new MulticlassLogisticRegression(env, "Label", "Features"); var pipeline = concat.Append(km); IDataView trainingDataView = reader.Read(new MultiFileSource(_dataset)); var model = pipeline.Fit(trainingDataView); var obs = new IrisObservation() { Sepal_length = 3.3f, Sepal_width = 1.6f, Petal_length = 0.2f, Petal_width = 5.1f, }; _fct = model.MakePredictionFunction <IrisObservation, IrisPrediction>(env); using (var stdest = File.OpenWrite(dest)) model.SaveTo(env, stdest); } }
public void SetupIrisPipeline() { _irisExample = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }; string _irisDataPath = Program.GetInvariantCultureDataPath("iris.txt"); using (var env = new ConsoleEnvironment(seed: 1, conc: 1, verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { var reader = new TextLoader(env, new TextLoader.Arguments() { Separator = "\t", HasHeader = true, Column = new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("PetalLength", DataKind.R4, 3), new TextLoader.Column("PetalWidth", DataKind.R4, 4), } }); IDataView data = reader.Read(_irisDataPath); var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) .Append(new SdcaMultiClassTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; })); var model = pipeline.Fit(data); _irisModel = model.MakePredictionFunction <IrisData, IrisPrediction>(env); } }
void Extensibility() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); var data = ml.Data.CreateTextLoader(TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',') .Load(dataPath); Action <IrisData, IrisData> action = (i, j) => { j.Label = i.Label; j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength; j.PetalWidth = i.PetalWidth; j.SepalLength = i.SepalLength; j.SepalWidth = i.SepalWidth; }; var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new CustomMappingEstimator <IrisData, IrisData>(ml, action, null), TransformerScope.TrainTest) .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent( new SdcaMultiClassTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1 })) .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = model.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(ml); var testLoader = ml.Data.LoadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ','); var testData = ml.Data.CreateEnumerable <IrisData>(testLoader, false); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } }
public void MetacomponentsFeaturesRenamed() { var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',') .Read(GetDataPath(TestDatasets.irisData.trainFilename)); var sdcaTrainer = ML.BinaryClassification.Trainers.StochasticDualCoordinateAscent( new SdcaBinaryTrainer.Options { LabelColumn = "Label", FeatureColumn = "Vars", MaxIterations = 100, Shuffle = true, NumThreads = 1, }); var pipeline = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest) .Append(new Ova(Env, sdcaTrainer)) .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel")); var model = pipeline.Fit(data); TestEstimatorCore(pipeline, data); Done(); }
private TransformerChain <MulticlassPredictionTransformer <MulticlassLogisticRegressionModelParameters> > Train(string dataPath) { // Create text loader. var options = new TextLoader.Options() { Columns = new[] { new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column("SepalLength", DataKind.Single, 1), new TextLoader.Column("SepalWidth", DataKind.Single, 2), new TextLoader.Column("PetalLength", DataKind.Single, 3), new TextLoader.Column("PetalWidth", DataKind.Single, 4), }, HasHeader = true, }; var loader = new TextLoader(mlContext, options: options); IDataView data = loader.Load(dataPath); var pipeline = new ColumnConcatenatingEstimator(mlContext, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent()); return(pipeline.Fit(data)); }
public override IEnumerable <SuggestedTransform> Apply(IntermediateColumn[] columns) { int count = 0; bool isAllText = true; var colSpec = new StringBuilder(); var colSpecTextOnly = new List <string>(); var columnList = new List <string>(); foreach (var column in columns) { var columnName = new StringBuilder(); if (column.Purpose != ColumnPurpose.Name) { continue; } count++; if (colSpec.Length > 0) { colSpec.Append(","); } colSpec.Append(column.ColumnName); columnName.Append(column.ColumnName); columnList.Add(columnName.ToString()); if (column.Type.ItemType().IsText()) { colSpecTextOnly.Add(column.ColumnName); } isAllText = isAllText && column.Type.ItemType().IsText(); } if (count == 1 && colSpec.ToString() != DefaultColumnNames.Name) { var columnName = new StringBuilder(); columnName.AppendFormat("{0}", colSpec); var input = new ColumnCopyingEstimator(Env, columnName.ToString(), DefaultColumnNames.Name); ColumnRoutingStructure.AnnotatedName[] columnsSource = { new ColumnRoutingStructure.AnnotatedName { IsNumeric = false, Name = columnName.ToString() } }; ColumnRoutingStructure.AnnotatedName[] columnsDest = { new ColumnRoutingStructure.AnnotatedName { IsNumeric = false, Name = DefaultColumnNames.Name } }; var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest); yield return(new SuggestedTransform(input, routingStructure)); } else if (count > 1) { if (string.IsNullOrWhiteSpace(colSpecTextOnly.ToString())) { yield break; } // suggested grouping name columns into one vector var input = new ColumnConcatenatingEstimator(Env, DefaultColumnNames.Name, columnList.ToArray()); ColumnRoutingStructure.AnnotatedName[] columnsSource = columnList.Select(c => new ColumnRoutingStructure.AnnotatedName { IsNumeric = false, Name = c }).ToArray(); ColumnRoutingStructure.AnnotatedName[] columnsDest = { new ColumnRoutingStructure.AnnotatedName { IsNumeric = false, Name = DefaultColumnNames.Name } }; var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest); yield return(new SuggestedTransform(input, routingStructure)); } }