public void ConversionMinValueToNullBehavior() { using (var env = new TlcEnvironment()) { var data = new List <ConversionLossMinValueClass> { new ConversionLossMinValueClass() { fSByte = null, fInt = null, fLong = null, fShort = null }, new ConversionLossMinValueClass() { fSByte = sbyte.MinValue, fInt = int.MinValue, fLong = long.MinValue, fShort = short.MinValue } }; foreach (var field in typeof(ConversionLossMinValueClass).GetFields()) { var dataView = ComponentCreation.CreateDataView(env, data); var enumerator = dataView.AsEnumerable <ConversionLossMinValueClass>(env, false).GetEnumerator(); while (enumerator.MoveNext()) { Assert.True(enumerator.Current.fInt == null && enumerator.Current.fLong == null && enumerator.Current.fSByte == null && enumerator.Current.fShort == null); } } } }
/// <summary> /// Performs train-test on a pipeline. /// </summary> /// <typeparam name="TInput">Class type that represents input schema.</typeparam> /// <typeparam name="TOutput">Class type that represents prediction schema.</typeparam> /// <param name="pipeline">Machine learning pipeline that contains <see cref="ILearningPipelineLoader"/>, /// transforms and at least one trainer.</param> /// <param name="testData"><see cref="ILearningPipelineLoader"/> that represents the test dataset.</param> /// <returns>Metrics and predictor model.</returns> public TrainTestEvaluatorOutput <TInput, TOutput> TrainTestEvaluate <TInput, TOutput>(LearningPipeline pipeline, ILearningPipelineLoader testData) where TInput : class where TOutput : class, new() { using (var environment = new TlcEnvironment()) { Experiment subGraph = environment.CreateExperiment(); ILearningPipelineStep step = null; List <ILearningPipelineLoader> loaders = new List <ILearningPipelineLoader>(); List <Var <ITransformModel> > transformModels = new List <Var <ITransformModel> >(); Var <ITransformModel> lastTransformModel = null; Var <IDataView> firstPipelineDataStep = null; Var <IPredictorModel> firstModel = null; ILearningPipelineItem firstTransform = null; foreach (ILearningPipelineItem currentItem in pipeline) { if (currentItem is ILearningPipelineLoader loader) { loaders.Add(loader); continue; } step = currentItem.ApplyStep(step, subGraph); if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) { transformModels.Add(dataStep.Model); if (firstPipelineDataStep == null) { firstPipelineDataStep = dataStep.Data; firstTransform = currentItem; } }
/// <summary> /// Called at the beginning of a test - it dumps the usage of the Arguments class(es). /// </summary> private static void Init(IndentingTextWriter wrt, object defaults) { var env = new TlcEnvironment(seed: 42); wrt.WriteLine("Usage:"); wrt.WriteLine(CmdParser.ArgumentsUsage(env, defaults.GetType(), defaults, false, 200)); }
public void TestPipelineNodeCloning() { using (var env = new TlcEnvironment()) { var lr1 = RecipeInference .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer) .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("LogisticRegression")); var sdca1 = RecipeInference .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer) .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("StochasticDualCoordinateAscent")); // Clone and change hyperparam values var lr2 = lr1.Clone(); lr1.PipelineNode.SweepParams[0].RawValue = 1.2f; lr2.PipelineNode.SweepParams[0].RawValue = 3.5f; var sdca2 = sdca1.Clone(); sdca1.PipelineNode.SweepParams[0].RawValue = 3; sdca2.PipelineNode.SweepParams[0].RawValue = 0; // Make sure the changes are propagated to entry point objects env.Check(lr1.PipelineNode.UpdateProperties()); env.Check(lr2.PipelineNode.UpdateProperties()); env.Check(sdca1.PipelineNode.UpdateProperties()); env.Check(sdca2.PipelineNode.UpdateProperties()); env.Check(lr1.PipelineNode.CheckEntryPointStateMatchesParamValues()); env.Check(lr2.PipelineNode.CheckEntryPointStateMatchesParamValues()); env.Check(sdca1.PipelineNode.CheckEntryPointStateMatchesParamValues()); env.Check(sdca2.PipelineNode.CheckEntryPointStateMatchesParamValues()); // Make sure second object's set of changes didn't overwrite first object's env.Check(!lr1.PipelineNode.SweepParams[0].RawValue.Equals(lr2.PipelineNode.SweepParams[0].RawValue)); env.Check(!sdca2.PipelineNode.SweepParams[0].RawValue.Equals(sdca1.PipelineNode.SweepParams[0].RawValue)); } }
void New_DecomposableTrainAndPredict() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var data = new MyTextLoader(env, MakeIrisTextLoaderArgs()) .FitAndRead(new MultiFileSource(dataPath)); var pipeline = new MyConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new MyTermTransform(env, "Label"), TransformerScope.TrainTest) .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }, "Features", "Label")) .Append(new MyKeyToValueTransform(env, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = new MyPredictionEngine <IrisDataNoLabel, IrisPrediction>(env, model); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
void TestCommandLine() { using (var env = new TlcEnvironment()) { Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0} xf=Term{col=B:A} in=f:\2.txt" }), (int)0); } }
void New_MultithreadedPrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var reader = new TextLoader(env, MakeSentimentTextLoaderArgs()); var data = reader.Read(new MultiFileSource(dataPath)); // Pipeline. var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs()) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(data); // Create prediction engine and test predictions. var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model); // Take a couple examples out of the test data and run predictions on top. var testData = reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath))) .AsEnumerable <SentimentData>(env, false); Parallel.ForEach(testData, (input) => { lock (engine) { var prediction = engine.Predict(input); } }); } }
public void TestLearnerConstrainingByName() { string pathData = GetDataPath("adult.train"); int numOfSampleRows = 1000; int batchSize = 1; int numIterations = 1; int numTransformLevels = 2; var retainedLearnerNames = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" }; using (var env = new TlcEnvironment()) { SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) brain. PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); // Run initial experiment. var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, metric, out var _, numOfSampleRows, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); // Keep only logistic regression and FastTree. amls.KeepSelectedLearners(retainedLearnerNames); var space = amls.GetSearchSpace(); // Make sure only learners left are those retained. Assert.Equal(retainedLearnerNames.Length, space.Item2.Length); Assert.True(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName))); } }
public void AutoNormalizationAndCaching() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(false), loader); // Train. var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1, ConvergenceTolerance = 1f }); // Auto-caching. IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, trans, prefetch: null) : trans; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); } }
public void ClassWithInheritedPropertiesConversion() { var data = new List <ClassWithInheritedProperties>() { new ClassWithInheritedProperties() { IntProp = 1, StringProp = "lala", LongProp = 17, ByteProp = 3 }, new ClassWithInheritedProperties() { IntProp = -1, StringProp = "", LongProp = 2, ByteProp = 4 }, new ClassWithInheritedProperties() { IntProp = 0, StringProp = null, LongProp = 18, ByteProp = 5 } }; using (var env = new TlcEnvironment()) { var dataView = ComponentCreation.CreateDataView(env, data); var enumeratorSimple = dataView.AsEnumerable <ClassWithInheritedProperties>(env, false).GetEnumerator(); var originalEnumerator = data.GetEnumerator(); while (enumeratorSimple.MoveNext() && originalEnumerator.MoveNext()) { Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current)); } Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext()); } }
public void TestRegressionPipelineWithMinimizingMetric() { string pathData = GetDataPath("../Housing (regression)/housing.txt"); int numOfSampleRows = 100; int batchSize = 5; int numIterations = 10; int numTransformLevels = 1; using (var env = new TlcEnvironment()) { SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro); // Using the simple, uniform random sampling (with replacement) brain PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); // Run initial experiments var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureRegressorTrainer); // Allow for one more iteration amls.UpdateTerminator(new IterationTerminator(numIterations + 1)); // Do learning. Only retained learner should be left in all pipelines. bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); // Make sure hyperparameter value did not change Assert.NotNull(bestPipeline); Assert.True(amls.GetAllEvaluatedPipelines().All( p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue)); } }
public void ClassWithPrivateFieldsAndPropertiesConversion() { var data = new List <ClassWithPrivateFieldsAndProperties>() { new ClassWithPrivateFieldsAndProperties() { StringProp = "lala" }, new ClassWithPrivateFieldsAndProperties() { StringProp = "baba" } }; using (var env = new TlcEnvironment()) { var dataView = ComponentCreation.CreateDataView(env, data); var enumeratorSimple = dataView.AsEnumerable <ClassWithPrivateFieldsAndProperties>(env, false).GetEnumerator(); var originalEnumerator = data.GetEnumerator(); while (enumeratorSimple.MoveNext() && originalEnumerator.MoveNext()) { Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current)); Assert.True(enumeratorSimple.Current.UnusedPropertyWithPrivateSetter == 100); } Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext()); } }
public void ClassWithMixOfFieldsAndPropertiesConversion() { var data = new List <ClassWithMixOfFieldsAndProperties>() { new ClassWithMixOfFieldsAndProperties() { IntProp = 1, fString = "lala" }, new ClassWithMixOfFieldsAndProperties() { IntProp = -1, fString = "" }, new ClassWithMixOfFieldsAndProperties() { IntProp = 0, fString = null } }; using (var env = new TlcEnvironment()) { var dataView = ComponentCreation.CreateDataView(env, data); var enumeratorSimple = dataView.AsEnumerable <ClassWithMixOfFieldsAndProperties>(env, false).GetEnumerator(); var originalEnumerator = data.GetEnumerator(); while (enumeratorSimple.MoveNext() && originalEnumerator.MoveNext()) { Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current)); } Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext()); } }
public void ConversionMinValueToNullBehaviorProperties() { using (var env = new TlcEnvironment()) { var data = new List <ConversionLossMinValueClassProperties> { new ConversionLossMinValueClassProperties() { SByteProp = null, IntProp = null, LongProp = null, ShortProp = null }, new ConversionLossMinValueClassProperties() { SByteProp = sbyte.MinValue, IntProp = int.MinValue, LongProp = long.MinValue, ShortProp = short.MinValue } }; foreach (var field in typeof(ConversionLossMinValueClassProperties).GetFields()) { var dataView = ComponentCreation.CreateDataView(env, data); var enumerator = dataView.AsEnumerable <ConversionLossMinValueClassProperties>(env, false).GetEnumerator(); while (enumerator.MoveNext()) { Assert.True(enumerator.Current.IntProp == null && enumerator.Current.LongProp == null && enumerator.Current.SByteProp == null && enumerator.Current.ShortProp == null); } } } }
void New_FileBasedSavingOfData() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())); var trainData = pipeline.Fit(new MultiFileSource(dataPath)).Read(new MultiFileSource(dataPath)); using (var file = env.CreateOutputFile("i.idv")) trainData.SaveAsBinary(env, file.CreateWriteStream()); var trainer = new MySdca(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label"); var loadedTrainData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv")); // Train. var model = trainer.Train(loadedTrainData); DeleteOutputPath("i.idv"); } }
public void TrainWithValidationSet() { var dataPath = GetDataPath(SentimentDataPath); var validationDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); var trainData = trans; // Apply the same transformations on the validation set. // Sadly, there is no way to easily apply the same loader to different data, so we either have // to create another loader, or to save the loader to model file and then reload. // A new one is not always feasible, but this time it is. var validLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(validationDataPath)); var validData = ApplyTransformUtils.ApplyAllTransformsToData(env, trainData, validLoader); // Cache both datasets. var cachedTrain = new CacheDataView(env, trainData, prefetch: null); var cachedValid = new CacheDataView(env, validData, prefetch: null); // Train. var trainer = new FastTreeBinaryClassificationTrainer(env, new FastTreeBinaryClassificationTrainer.Arguments { NumTrees = 3 }); var trainRoles = new RoleMappedData(cachedTrain, label: "Label", feature: "Features"); var validRoles = new RoleMappedData(cachedValid, label: "Label", feature: "Features"); trainer.Train(new Runtime.TrainContext(trainRoles, validRoles)); } }
public void TestEstimatorSaveLoad() { using (var env = new TlcEnvironment()) { var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = env.CreateLoader("Text{col=ImagePath:TX:0 col=Name:TX:1}", new MultiFileSource(dataFile)); var pipe = new ImageLoaderEstimator(env, imageFolder, ("ImagePath", "ImageReal")) .Append(new ImageResizerEstimator(env, "ImageReal", "ImageReal", 100, 100)) .Append(new ImagePixelExtractorEstimator(env, "ImageReal", "ImagePixels")) .Append(new ImageGrayscaleEstimator(env, ("ImageReal", "ImageGray"))); pipe.GetOutputSchema(Core.Data.SchemaShape.Create(data.Schema)); var model = pipe.Fit(data); using (var file = env.CreateTempFile()) { using (var fs = file.CreateWriteStream()) model.SaveTo(env, fs); var model2 = TransformerChain.LoadFrom(env, file.OpenReadStream()); var newCols = ((ImageLoaderTransform)model2.First()).Columns; var oldCols = ((ImageLoaderTransform)model.First()).Columns; Assert.True(newCols .Zip(oldCols, (x, y) => x == y) .All(x => x)); } } Done(); }
void New_CrossValidation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var data = new TextLoader(env, MakeSentimentTextLoaderArgs()) .Read(new MultiFileSource(dataPath)); // Pipeline. var pipeline = new TextTransform(env, "SentimentText", "Features") .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1, ConvergenceTolerance = 1f }, "Features", "Label")); var cv = new MyCrossValidation.BinaryCrossValidator(env) { NumFolds = 2 }; var cvResult = cv.CrossValidate(data, pipeline); } }
public void TestCommandLine() { using (var env = new TlcEnvironment()) { Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0} xf=Term{col=B:A} xf=KeyToVector{col=C:B col={name=D source=B bag+}} in=f:\2.txt" }), (int)0); } }
public void New_TrainWithInitialPredictor() { var dataPath = GetDataPath(SentimentDataPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())); // Train the pipeline, prepare train set. var reader = pipeline.Fit(new MultiFileSource(dataPath)); var trainData = reader.Read(new MultiFileSource(dataPath)); // Train the first predictor. var trainer = new MySdca(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label"); var firstModel = trainer.Fit(trainData); // Train the second predictor on the same data. var secondTrainer = new MyAveragedPerceptron(env, new AveragedPerceptronTrainer.Arguments(), "Features", "Label"); var finalModel = secondTrainer.Train(trainData, firstModel.InnerModel); } }
public void New_SimpleTrainAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(new MultiFileSource(dataPath)); // Create prediction engine and test predictions. var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model.Transformer); // Take a couple examples out of the test data and run predictions on top. var testData = model.Reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath))) .AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = engine.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
public void InferSchemaCommandTest() { var datasets = new[] { GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv")) }; using (var env = new TlcEnvironment()) { var h = env.Register("InferSchemaCommandTest", seed: 0, verbose: false); using (var ch = h.Start("InferSchemaCommandTest")) { for (int i = 0; i < datasets.Length; i++) { var outFile = string.Format("dataset-infer-schema-result-{0:00}.txt", i); string dataPath = GetOutputPath(Path.Combine("..", "Common", "Inference"), outFile); var args = new InferSchemaCommand.Arguments() { DataFile = datasets[i], OutputFile = dataPath, }; var cmd = new InferSchemaCommand(Env, args); cmd.Run(); CheckEquality(Path.Combine("..", "Common", "Inference"), outFile); } } } Done(); }
public void New_ReconfigurablePrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var dataReader = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Fit(new MultiFileSource(dataPath)); var data = dataReader.Read(new MultiFileSource(dataPath)); var testData = dataReader.Read(new MultiFileSource(testDataPath)); // Pipeline. var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs()) .Fit(data); var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label"); var trainData = pipeline.Transform(data); var model = trainer.Fit(trainData); var scoredTest = model.Transform(pipeline.Transform(testData)); var metrics = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()).Evaluate(scoredTest, "Label", "Probability"); var newModel = new BinaryPredictionTransformer <IPredictorProducing <float> >(env, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability); var newScoredTest = newModel.Transform(pipeline.Transform(testData)); var newMetrics = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments { Threshold = 0.01f, UseRawScoreThreshold = false }).Evaluate(newScoredTest, "Label", "Probability"); } }
void FileBasedSavingOfData() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); var saver = new BinarySaver(env, new BinarySaver.Arguments()); using (var ch = env.Start("SaveData")) using (var file = env.CreateOutputFile("i.idv")) { DataSaverUtils.SaveDataView(ch, saver, trans, file); } var binData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv")); var trainRoles = new RoleMappedData(binData, label: "Label", feature: "Features"); var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); DeleteOutputPath("i.idv"); } }
public void TestSimpleExperiment() { var dataPath = GetDataPath(@"adult.tiny.with-schema.txt"); using (var env = new TlcEnvironment()) { var experiment = env.CreateExperiment(); var importInput = new ML.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var normalizeInput = new ML.Transforms.MinMaxNormalizer { Data = importOutput.Data }; normalizeInput.AddColumn("NumericFeatures"); var normalizeOutput = experiment.Add(normalizeInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(normalizeOutput.OutputData); var schema = data.Schema; Assert.Equal(5, schema.ColumnCount); var expected = new[] { "Label", "Workclass", "Categories", "NumericFeatures", "NumericFeatures" }; for (int i = 0; i < schema.ColumnCount; i++) { Assert.Equal(expected[i], schema.GetColumnName(i)); } } }
public void New_Evaluation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(new MultiFileSource(dataPath)); // Evaluate on the test set. var dataEval = model.Read(new MultiFileSource(testDataPath)); var evaluator = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments() { }); var metrics = evaluator.Evaluate(dataEval); } }
/// <summary> /// Trains a model using SAR. /// </summary> /// <param name="settings">The training settings</param> /// <param name="usageEvents">The usage events to use for training</param> /// <param name="catalogItems">The catalog items to use for training</param> /// <param name="uniqueUsersCount">The number of users in the user id index file.</param> /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param> /// <param name="cancellationToken">A cancellation token</param> public IPredictorModel Train(ITrainingSettings settings, IList <SarUsageEvent> usageEvents, IList <SarCatalogItem> catalogItems, int uniqueUsersCount, int uniqueUsageItemsCount, CancellationToken cancellationToken) { if (settings == null) { throw new ArgumentNullException(nameof(settings)); } if (usageEvents == null) { throw new ArgumentNullException(nameof(usageEvents)); } if (settings.EnableColdItemPlacement && catalogItems == null) { throw new ArgumentNullException(nameof(catalogItems)); } if (uniqueUsersCount < 0) { var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer"); _tracer.TraceWarning(exception.ToString()); throw exception; } if (uniqueUsageItemsCount < 0) { var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer"); _tracer.TraceWarning(exception.ToString()); throw exception; } cancellationToken.ThrowIfCancellationRequested(); using (TlcEnvironment environment = new TlcEnvironment(verbose: true)) { _detectedFeatureWeights = null; try { environment.AddListener <ChannelMessage>(ChannelMessageListener); IHost environmentHost = environment.Register("SarHost"); // bind the cancellation token to SAR cancellation using (cancellationToken.Register(() => { environmentHost.StopExecution(); })) { _tracer.TraceInformation("Starting training model using SAR"); return(TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount, uniqueUsageItemsCount)); } } finally { environment.RemoveListener <ChannelMessage>(ChannelMessageListener); } } }
public LearningPipelineDebugProxy(LearningPipeline pipeline) { if (pipeline == null) { throw new ArgumentNullException(nameof(pipeline)); } _pipeline = new LearningPipeline(); // use a ConcurrencyFactor of 1 so other threads don't need to run in the debugger _environment = new TlcEnvironment(conc: 1); foreach (ILearningPipelineItem item in pipeline) { _pipeline.Add(item); if (item is ILearningPipelineLoader loaderItem) { // add a take filter to any loaders, so it returns in a reasonable // amount of time _pipeline.Add(new RowTakeFilter() { Count = MaxLoaderRows }); } } }
public void New_TrainWithInitialPredictor() { var dataPath = GetDataPath(SentimentDataPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var data = new TextLoader(env, MakeSentimentTextLoaderArgs()).Read(new MultiFileSource(dataPath)); // Pipeline. var pipeline = new TextTransform(env, "SentimentText", "Features"); // Train the pipeline, prepare train set. var trainData = pipeline.FitAndTransform(data); // Train the first predictor. var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label"); var firstModel = trainer.Fit(trainData); // Train the second predictor on the same data. var secondTrainer = new AveragedPerceptronTrainer(env, new AveragedPerceptronTrainer.Arguments()); var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); var finalModel = secondTrainer.Train(new TrainContext(trainRoles, initialPredictor: firstModel.Model)); } }
public void ConversionExceptionsBehavior() { using (var env = new TlcEnvironment()) { var data = new ConversionNotSupportedMinValueClass[1]; foreach (var field in typeof(ConversionNotSupportedMinValueClass).GetFields()) { data[0] = new ConversionNotSupportedMinValueClass(); FieldInfo fi; if ((fi = field.FieldType.GetField("MinValue")) != null) { field.SetValue(data[0], fi.GetValue(null)); } var dataView = ComponentCreation.CreateDataView(env, data); var enumerator = dataView.AsEnumerable <ConversionNotSupportedMinValueClass>(env, false).GetEnumerator(); try { enumerator.MoveNext(); Assert.True(false); } catch { } } } }