static void Main(string[] args) { var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader("SalaryData.csv") .CreateFrom <SalaryData>(useHeader: true, separator: ',')); pipeline.Add(new ColumnConcatenator("Features", "YearsExperience")); pipeline.Add(new GeneralizedAdditiveModelRegressor()); var model = pipeline.Train <SalaryData, SalaryPrediction>(); var testData = new TextLoader("SalaryData-test.csv") .CreateFrom <SalaryData>(useHeader: true, separator: ','); var evaluator = new RegressionEvaluator(); var metrics = evaluator.Evaluate(model, testData); Console.WriteLine($"RMS - {metrics.Rms}"); Console.WriteLine($"R^2 - {metrics.RSquared}"); var prediction = model.Predict(new SalaryData { YearsExperience = 7 }); Console.WriteLine($"Salary - {prediction.PredictedSalary}"); Console.ReadLine(); }
public void TransformOnlyPipeline() { const string _dataPath = @"..\..\Data\breast-cancer.txt"; var pipeline = new LearningPipeline(); pipeline.Add(new ML.Data.TextLoader(_dataPath).CreateFrom <InputData>(useHeader: false)); pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag }); var model = pipeline.Train <InputData, TransformedData>(); var predictionModel = model.Predict(new InputData() { F1 = "5" }); Assert.NotNull(predictionModel); Assert.NotNull(predictionModel.TransformedF1); Assert.Equal(1024, predictionModel.TransformedF1.Length); for (int index = 0; index < 1024; index++) { if (index == 265) { Assert.Equal(1, predictionModel.TransformedF1[index]); } else { Assert.Equal(0, predictionModel.TransformedF1[index]); } } }
public void TutorialOne() { var pipeline = new LearningPipeline(); var dataPath = @"C:\Test\MLNetTutorials\MLNetTutorials\Data\iris.data.txt"; pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(separator: ',')); pipeline.Add(new Dictionarizer("Label")); pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); //Learning algorithm pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); var model = pipeline.Train <IrisData, IrisPrediction>(); var prediction = model.Predict(new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }); Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabels}"); }
static void Main(string[] args) { var pipeline = new LearningPipeline(); string dataPath = "data.txt"; pipeline.Add(new TextLoader <IrisData>(dataPath, separator: ",")); pipeline.Add(new Dictionarizer("Label")); pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); var model = pipeline.Train <IrisData, IrisPrediction>(); var prediction = model.Predict(new IrisData() { SepalLength = 5.03f, SepalWidth = 2.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }); Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabels}"); }
public static void PredictIris() { var pipeline = new LearningPipeline(); string dataPath = "iris-data.txt"; pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(separator: ',')); pipeline.Add(new Dictionarizer("Label")); pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); var model = pipeline.Train <IrisData, IrisPrediction>(); var prediction = model.Predict(new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 0.2f, }); var prediction2 = model.Predict(new IrisData() { SepalLength = 5.8f, SepalWidth = 2.7f, PetalLength = 5.1f, PetalWidth = 1.9f }); Console.WriteLine($"Predicred flower type is: {prediction.PredictedLabels}"); Console.WriteLine($"Predicred 2 flower type is: {prediction2.PredictedLabels}"); }
static void Main(string[] args) { // Creating a pipeline var pipeline = new LearningPipeline(); var fileName = "iris-data.csv"; pipeline.Add(new TextLoader <IrisData>(fileName, separator: ",")); // Assign numeric values to the texts in Label column (4) pipeline.Add(new Dictionarizer("Label")); // Put all features into a vector pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); //Adding classifier pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter { PredictedLabelColumn = "PredictedLabel" }); var model = pipeline.Train <IrisData, IrisPrediction>(); var prediction = model.Predict(new IrisData { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f }); System.Console.WriteLine($"Predicted flower type is : {prediction.PredictedLabels}"); }
public static void CrossValidate() { // Define pipeline var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader("1_BinaryClassification/problem1.csv").CreateFrom <BeerOrWineData>(useHeader: true, separator: ',')); pipeline.Add(new TextFeaturizer("Features", "FullName")); pipeline.Add(new Dictionarizer(("Type", "Label"))); pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier() { }); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); // Cross validation var cv = new CrossValidator().CrossValidate <BeerOrWineData, BeerOrWinePrediction>(pipeline); // show matrix }
public static async Task <PredictionModel <TaxiTrip, TaxiTripFarePrediction> > Train() { var pipeline = new LearningPipeline(); var textLoaderPiplelineItem = new TextLoader(_datapath).CreateFrom <TaxiTrip>(useHeader: true, separator: ','); pipeline.Add(textLoaderPiplelineItem); pipeline.Add(new ColumnCopier(("FareAmount", "Label"))); pipeline.Add(new CategoricalOneHotVectorizer("VendorId", "RateCode", "PaymentType")); pipeline.Add(new ColumnConcatenator("Features", "VendorId", "RateCode", "PassengerCount", "TripDistance", "PaymentType")); pipeline.Add(new FastTreeRegressor()); PredictionModel <TaxiTrip, TaxiTripFarePrediction> model = pipeline.Train <TaxiTrip, TaxiTripFarePrediction>(); await model.WriteAsync(_modelpath); return(model); }
TrainAsync(string trainingDataFile, string modelPath) { var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader(trainingDataFile).CreateFrom <InputData>(separator: ',')); pipeline.Add(new Dictionarizer("Label")); pipeline.Add(new ColumnConcatenator("Features", "MFCC1", "MFCC2", "MFCC3", "MFCC4", "MFCC5", "MFCC6", "MFCC7", "MFCC8", "MFCC9", "MFCC10", "MFCC11", "MFCC12", "MFCC13", "MFCCDelta1", "MFCCDelta2", "MFCCDelta3", "MFCCDelta4", "MFCCDelta5", "MFCCDelta6", "MFCCDelta7", "MFCCDelta8", "MFCCDelta9", "MFCCDelta10", "MFCCDelta11", "MFCCDelta12", "MFCCDelta13", "MFCCDeltaDelta1", "MFCCDeltaDelta2", "MFCCDeltaDelta3", "MFCCDeltaDelta4", "MFCCDeltaDelta5", "MFCCDeltaDelta6", "MFCCDeltaDelta7", "MFCCDeltaDelta8", "MFCCDeltaDelta9", "MFCCDeltaDelta10", "MFCCDeltaDelta11", "MFCCDeltaDelta12", "MFCCDeltaDelta13")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); PredictionModel <InputData, OutputData> model = pipeline.Train <InputData, OutputData>(); await model.WriteAsync(modelPath); Console.WriteLine("Model created"); }
static void Main(string[] args) { //Create a Pipeline and Load the Data var pipeline = new LearningPipeline(); string dataPath = "flowers.txt"; pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(separator: ',')); //Transform data from string to numeric pipeline.Add(new Dictionarizer("Label")); pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); //adding learning/training algorithm pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); //Train the model var model = pipeline.Train <IrisData, IrisPrediction>(); //Using the model to make predictions var prediction = model.Predict(new IrisData() { SepalLength = 0.3f, SepalWidth = 0.6f, PetalLength = 1.2f, PetalWidth = 1.1f }); Console.WriteLine($"Pridicted flower class is : {prediction.PredictedLabels}"); Console.Read(); }
public Task Build(string trainingDataPath) { return(Task.Run(() => { Pipeline = new LearningPipeline(); Pipeline.Add(new TextLoader(trainingDataPath).CreateFrom <MulticlassClassificationData>()); // Create buckets. Pipeline.Add(new Dictionarizer("Label")); // Transform the text into a feature vector. Pipeline.Add(new TextFeaturizer("Features", "Text")); Pipeline.Add(new StochasticDualCoordinateAscentClassifier()); // Alternative algorithms: //Pipeline.Add(new LogisticRegressionClassifier()); //Pipeline.Add(new NaiveBayesClassifier()); Pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); })); }
private LocalEnvironment _mlContext = new LocalEnvironment(seed: null); // v0.6; public PredictionModel <BinaryClassificationData, BinaryClassificationPrediction> BuildAndTrain(string trainingDataPath, ILearningPipelineItem algorithm) { var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader(trainingDataPath).CreateFrom <BinaryClassificationData>(useHeader: true, separator: ';')); pipeline.Add(new MissingValueSubstitutor("FixedAcidity") { ReplacementKind = NAReplaceTransformReplacementKind.Mean }); pipeline.Add(MakeNormalizer()); pipeline.Add(new ColumnConcatenator("Features", "FixedAcidity", "VolatileAcidity", "CitricAcid", "ResidualSugar", "Chlorides", "FreeSulfurDioxide", "TotalSulfurDioxide", "Density", "Ph", "Sulphates", "Alcohol")); pipeline.Add(algorithm); return(pipeline.Train <BinaryClassificationData, BinaryClassificationPrediction>()); }
void CrossValidation() { var dataPath = GetDataPath(SentimentDataPath); var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader(dataPath).CreateFrom <SentimentData>()); pipeline.Add(MakeSentimentTextTransform()); pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); var cv = new CrossValidator().CrossValidate <SentimentData, SentimentPrediction>(pipeline); var metrics = cv.BinaryClassificationMetrics[0]; var singlePrediction = cv.PredictorModels[0].Predict(new SentimentData() { SentimentText = "Not big fan of this." }); Assert.True(singlePrediction.Sentiment); }
public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train() { // Initialize a new instance of LearningPipeline that will include the data loading, data // processing/feturization, and model. var pipeline = new LearningPipeline(); // The TextLoader object is the first part of the pipeline, and loads the training file data pipeline.Add(new TextLoader(_dataPath).CreateFrom <SentimentData>()); // TextFeaturizer converts the SentimentText colun into a numeric vector called Features used // bye the machine learning algorithm. This is the prepcoessingfeaturization step. Using additional // componentes available in ML.NET can enable better results with the model. pipeline.Add(new TextFeaturizer("Features", "SentimentText")); pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>(); await model.WriteAsync(_modelPath); return(model); }
public static void GetMyPrediction() { Console.WriteLine("Begin ML.NET demo run"); Console.WriteLine("Income from age, sex, politics"); var pipeline = new LearningPipeline(); string dataPath = AppDomain.CurrentDomain.BaseDirectory + "/PeopleData.txt"; pipeline.Add(new TextLoader(dataPath). CreateFrom <myLottery>(separator: ' ')); pipeline.Add(new ColumnCopier(("Income", "Label"))); //pipeline.Add(new CategoricalOneHotVectorizer("Politic")); pipeline.Add(new ColumnConcatenator("Features", "pre10", "pre9", "pre8", "pre7", "pre6", "pre5", "pre4", "pre3" , "pre2", "pre1")); var sdcar = new StochasticDualCoordinateAscentRegressor(); sdcar.MaxIterations = 1000; sdcar.NormalizeFeatures = NormalizeOption.Auto; pipeline.Add(sdcar); // pipeline.N Console.WriteLine("\nStarting training \n"); var model = pipeline.Train <myLottery, myPrediction>(); Console.WriteLine("\nTraining complete \n"); string modelPath = AppDomain.CurrentDomain.BaseDirectory + "/IncomeModel.zip"; Task.Run(async() => { await model.WriteAsync(modelPath); }).GetAwaiter().GetResult(); var testData = new TextLoader(dataPath). CreateFrom <myLottery>(separator: ' '); var evaluator = new RegressionEvaluator(); var metrics = evaluator.Evaluate(model, testData); double rms = metrics.Rms; Console.WriteLine("Root mean squared error = " + rms.ToString("F4")); Console.WriteLine("Income age 40 conservative male: "); myLottery newPatient = new myLottery() { pre10 = 6824298f, pre9 = 2589916f, pre8 = 2602089f, pre7 = 2915497f, pre6 = 8507838f, pre5 = 7679324f, pre4 = 607461f, pre3 = 5806877, pre2 = 6776442f, pre1 = 9975203 }; myPrediction prediction = model.Predict(newPatient); float predIncome = prediction.Income; Console.WriteLine("Predicted income = $" + predIncome.ToString("F2")); Console.WriteLine("\nEnd ML.NET demo"); Console.ReadLine(); }
static async Task <PredictionModel <Open311Data, Open311DataPrediction> > TrainOpen311(string dataPath) { var pipeline = new LearningPipeline(); var dataSource = CollectionDataSource.Create(OpenFile(dataPath, 3, 0, 1, 2)); pipeline.Add(dataSource); pipeline.Add(new Dictionarizer(@"Label")); pipeline.Add(new TextFeaturizer(@"Features", @"Request") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, OutputTokens = true, Language = TextTransformLanguage.German, StopWordsRemover = new PredefinedStopWordsRemover(), VectorNormalizer = TextTransformTextNormKind.L2, CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = true } }); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter { PredictedLabelColumn = @"PredictedLabel" }); var model = pipeline.Train <Open311Data, Open311DataPrediction>(); await model.WriteAsync(_modelPath); return(model); }
public static async Task <PredictionModel <SentimentData, SentimentPrediction> > TrainAsync() { // LearningPipeline holds all steps of the learning process: data, transforms, learners. var pipeline = new LearningPipeline(); // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing // all the column names and their types. pipeline.Add(new TextLoader(TrainDataPath).CreateFrom <SentimentData>()); // TextFeaturizer is a transform that will be used to featurize an input column to format and clean the data. pipeline.Add(new TextFeaturizer("Features", "SentimentText")); // FastTreeBinaryClassifier is an algorithm that will be used to train the model. // It has three hyperparameters for tuning decision tree performance. pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); Console.WriteLine("=============== Training model ==============="); // The pipeline is trained on the dataset that has been loaded and transformed. var model = pipeline.Train <SentimentData, SentimentPrediction>(); // Saving the model as a .zip file. await model.WriteAsync(ModelPath); Console.WriteLine("=============== End training ==============="); Console.WriteLine("The model is saved to {0}", ModelPath); return(model); }
static PredictionModel <NewsData, NewsPrediction> TrainNews() { const string trainingSet = @"news-train.txt"; var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader(trainingSet).CreateFrom <NewsData>()); pipeline.Add(new TextFeaturizer("Features", "Text") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, OutputTokens = true, Language = TextTransformLanguage.English, StopWordsRemover = new PredefinedStopWordsRemover(), VectorNormalizer = TextTransformTextNormKind.L2, CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = true } }); pipeline.Add(new Dictionarizer("Label")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); return(pipeline.Train <NewsData, NewsPrediction>()); }
public static PredictionModel <SentimentData, SentimentPrediction> TrainModelWordEmbeddings(WordEmbeddingsTransformPretrainedModelKind?modelKind) { var pipeline = new LearningPipeline { new TextLoader(TrainDataPath).CreateFrom <SentimentData>(), new TextFeaturizer("FeaturesA", "SentimentText") { OutputTokens = true } }; var we = new WordEmbeddings(("FeaturesA_TransformedText", "FeaturesB")) { ModelKind = modelKind }; pipeline.Add(we); pipeline.Add(new ColumnConcatenator("Features", "FeaturesA", "FeaturesB")); pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); Console.WriteLine("=============== Training model with Word Embeddings ==============="); var model = pipeline.Train <SentimentData, SentimentPrediction>(); Console.WriteLine("=============== End training ==============="); return(model); }
/// <summary> /// Using training data location that is passed trough constructor this method is building /// and training machine learning model. /// </summary> /// <returns>Trained machine learning model.</returns> public PredictionModel <WineQualitySample, WineQualityPrediction> BuildAndTrain() { var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader(_trainingDataLocation).CreateFrom <WineQualitySample>(useHeader: true, separator: ';')); pipeline.Add(new MissingValueSubstitutor("FixedAcidity") { ReplacementKind = NAReplaceTransformReplacementKind.Mean }); pipeline.Add(MakeNormalizer()); pipeline.Add(new ColumnConcatenator("Features", "FixedAcidity", "VolatileAcidity", "CitricAcid", "ResidualSugar", "Chlorides", "FreeSulfurDioxide", "TotalSulfurDioxide", "Density", "Ph", "Sulphates", "Alcohol")); pipeline.Add(_algorythm); return(pipeline.Train <WineQualitySample, WineQualityPrediction>()); }
public void TrainOneVersusAll() { string dataPath = GetDataPath("iris.txt"); var pipeline = new LearningPipeline(seed: 1, conc: 1); pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false)); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier())); var model = pipeline.Train <IrisData, IrisPrediction>(); var testData = new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false); var evaluator = new ClassificationEvaluator(); ClassificationMetrics metrics = evaluator.Evaluate(model, testData); CheckMetrics(metrics); var trainTest = new TrainTestEvaluator() { Kind = MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer }.TrainTestEvaluate <IrisData, IrisPrediction>(pipeline, testData); CheckMetrics(trainTest.ClassificationMetrics); }
TrainAsync(InputData input) { // LearningPipeline allows you to add steps in order to keep everything together // during the learning process. var pipeline = new LearningPipeline(); // The TextLoader loads a dataset with comments and corresponding postive or negative sentiment. // When you create a loader, you specify the schema by passing a class to the loader containing // all the column names and their types. This is used to create the model, and train it. //pipeline.Add(new TextLoader(_dataPath).CreateFrom<SentimentData>()); pipeline.Add(new TextLoader(input.TrainingData).CreateFrom <ClassificationData>()); // TextFeaturizer is a transform that is used to featurize an input column. // This is used to format and clean the data. pipeline.Add(new TextFeaturizer("Features", "Text")); // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and // three hyperparameters to be used for tuning decision tree performance. pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); // Train the pipeline based on the dataset that has been loaded, transformed. PredictionModel <ClassificationData, ClassPrediction> model = pipeline.Train <ClassificationData, ClassPrediction>(); // Saves the model we trained to a zip file. await model.WriteAsync(_modelpath); // Returns the model we trained to use for evaluation. return(model); }
//train the model public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train() { //Instance used to load,process,featurize the data var pipeline = new LearningPipeline(); //to load train data pipeline.Add(new TextLoader(_dataPath).CreateFrom <SentimentData>(useHeader: true)); pipeline.Add(new Dictionarizer("Label")); // TextFeaturizer to convert the SentimentText column into a numeric vector called Features used by the ML algorithm pipeline.Add(new TextFeaturizer("Features", "SentimentText")); //choose learning algorithm pipeline.Add(new StochasticDualCoordinateAscentClassifier()); //pipeline.Add(new LogisticRegressionClassifier()); //pipeline.Add(new NaiveBayesClassifier()); //pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); //train the model PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>(); //save model await model.WriteAsync(_modelpath); return(model); }
public static async Task <PredictionModel <IrisData, IrisPrediction> > TrainModel(string dataPath, string modelPath) { //Initialize Learning Pipeline LearningPipeline pipeline = new LearningPipeline(); // Load Data pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(separator: ',')); // Transform Data // Assign numeric values to text in the "Label" column, because // only numbers can be processed during model training pipeline.Add(new Dictionarizer("Label")); // Vectorize Features pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); // Add Learner pipeline.Add(new StochasticDualCoordinateAscentClassifier()); // Convert Label back to text pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); // Train Model var model = pipeline.Train <IrisData, IrisPrediction>(); // Persist Model await model.WriteAsync(modelPath); return(model); }
private LearningPipeline PreparePipelineLightGBM() { var dataPath = GetDataPath(SentimentDataPath); var pipeline = new LearningPipeline(); pipeline.Add(new Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(0) }, Type = Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", Source = new [] { new TextLoaderRange(1) }, Type = Data.DataKind.Text } } } }); pipeline.Add(new TextFeaturizer("Features", "SentimentText") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, OutputTokens = true, StopWordsRemover = new PredefinedStopWordsRemover(), VectorNormalizer = TextTransformTextNormKind.L2, CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } }); pipeline.Add(new LightGbmBinaryClassifier() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 }); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); return(pipeline); }
public PredictStock() { // Creating a pipeline and loading the data var pipeline = new LearningPipeline(); // Pipelining the training file string dataPath = System.AppDomain.CurrentDomain.BaseDirectory + @"\Profit-Train.txt"; pipeline.Add(new TextLoader(dataPath).CreateFrom <StockData>(separator: ',')); // Labeling the data pipeline.Add(new Dictionarizer("Label")); // Putting features into a vector pipeline.Add(new ColumnConcatenator("Features", "CurrentPrice", "DayHigh", "DayLow")); // Adding learning algorithm pipeline.Add(new StochasticDualCoordinateAscentClassifier()); // Converting the Label back into original text pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); // Train the model this.model = pipeline.Train <StockData, StockPrediction>(); }
public override void Train(List <DataSet> data, List <float> labels = null) { if (TrainedModel != null) { throw new InvalidOperationException("May only train/load a model once"); } #if ML_LEGACY var pipeline = new LearningPipeline(); // add data pipeline.Add(CollectionDataSource.Create(data)); // choose what to predict pipeline.Add(new ColumnCopier(("Score", "Label"))); // add columns as features // do not include the features which should be predicted pipeline.Add(new ColumnConcatenator("Features", DataSet.ColumnNames())); // add a regression prediction pipeline.Add(new FastTreeRegressor()); // train the model TrainedModel = pipeline.Train <DataSet, DataSetPrediction>(); #else // add data var textLoader = GetTextLoader(Context); // spill to disk !?!?! since there is no way to load from a collection var pathToData = ""; try { // write data to disk pathToData = WriteToDisk(data); // read in data IDataView dataView = textLoader.Load(pathToData); InputSchema = dataView.Schema; // configurations var dataPipeline = Context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(DataSet.Score)) .Append(Context.Transforms.Concatenate("Features", DataSet.ColumnNames())); // set the training algorithm var trainer = Context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataPipeline.Append(trainer); TrainedModel = trainingPipeline.Fit(dataView); } finally { // cleanup if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData)) { File.Delete(pathToData); } } #endif }
public void Train() { if (pipeline == null) { Init(); } pipeline.Add(new TextLoader(dataPath).CreateFrom <SubjectData>(separator: ',')); // STEP 3: Transform your data // Assign numeric values to text in the "Label" column, because only // numbers can be processed during model training pipeline.Add(new Dictionarizer("Label")); // Puts all features into a vector pipeline.Add(new TextFeaturizer("Features", "SubjectName")); // STEP 4: Add learner // Add a learning algorithm to the pipeline. // This is a classification scenario (What type of iris is this?) pipeline.Add(new StochasticDualCoordinateAscentClassifier()); // Convert the Label back into original text (after converting to number in step 3) pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); // STEP 5: Train your model based on the data set model = pipeline.Train <SubjectData, SubjectPrediction>(); }
private static async Task RebuildModelAsync(Config config) { Console.WriteLine("RebuildModel:"); var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader(config.DatabasePath).CreateFrom <TrainingDatabaseEntry>(useHeader: true, separator: ',')); var e = new TrainingDatabaseEntry(); pipeline.Add(new CategoricalOneHotVectorizer(nameof(e.globalTolerance))); pipeline.Add(new ColumnConcatenator("Features", nameof(e.toleranceValue), nameof(e.numNotTolerance), nameof(e.numTolerance), nameof(e.percentTolerance), nameof(e.globalTolerance), nameof(e.dayOfWeek))); pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); var model = pipeline.Train <TrainingDatabaseEntry, TimelinessPrediction>(); Console.WriteLine($" Saving model to '{config.ModelPath}'..."); await model.WriteAsync(config.ModelPath); Console.WriteLine(" Model rebuilt."); }
private static PredictionModel <ProductData, ProductPredict> Train() { var data = ExtractTrainingData(); var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader(_dataPath).CreateFrom <ProductData>(separator: ',')); pipeline.Add(new ColumnConcatenator( "Features", "Category", "Price", "BorrowDays" )); pipeline.Add(new KMeansPlusPlusClusterer() { K = 5 }); PredictionModel <ProductData, ProductPredict> model = null; try { model = pipeline.Train <ProductData, ProductPredict>(); } catch (Exception ex) { Console.WriteLine(ex); } return(model); }