public override void Init(IRepository repository, IEnumerable<string> releases) { base.Init(repository, releases); regression = new LogisticRegression(); string previousRevision = null; foreach (var revision in TrainReleases) { foreach (var file in GetFilesInRevision(revision)) { context .SetCommits(previousRevision, revision) .SetFiles(e => e.IdIs(file.ID)); regression.AddTrainingData( GetPredictorValuesFor(context), FileHasDefects(file.ID, revision, previousRevision) ); } previousRevision = revision; } regression.Train(); context.SetCommits(TrainReleases.Last(), PredictionRelease); }
public override void Init(IRepository repository, IEnumerable <string> releases) { base.Init(repository, releases); regression = new LogisticRegression(); string previousRevision = null; foreach (var revision in TrainReleases) { foreach (var file in GetFilesInRevision(revision)) { context .SetCommits(previousRevision, revision) .SetFiles(e => e.IdIs(file.ID)); regression.AddTrainingData( GetPredictorValuesFor(context), FileHasDefects(file.ID, revision, previousRevision) ); } previousRevision = revision; } regression.Train(); context.SetCommits(TrainReleases.Last(), PredictionRelease); }
public void TestEstimatorLogisticRegression() { (IEstimator <ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline(); var trainer = new LogisticRegression(Env, "Label", "Features"); var pipeWithTrainer = pipe.Append(trainer); TestEstimatorCore(pipeWithTrainer, dataView); var transformedDataView = pipe.Fit(dataView).Transform(dataView); var model = trainer.Fit(transformedDataView); trainer.Train(transformedDataView, model.Model); Done(); }
static IList <double> MergeScores(IList <IList <double> > scores, Dictionary <int, IList <int> > candidates, Dictionary <int, IList <int> > hits) { double[] weights; if (log_reg) { var lr = new LogisticRegression(); lr.LearnRate = learn_rate; lr.NumIter = num_it; lr.Regularization = regularization; lr.PredictorVariables = new Matrix <double>(scores); var targets = new byte[scores[0].Count]; int pos = 0; foreach (int u in candidates.Keys) { foreach (int i in candidates[u]) { targets[pos++] = hits[u].Contains(i) ? (byte)1 : (byte)0; } } lr.TargetVariables = targets; lr.Train(); //lr.InitModel(); weights = lr.parameters.ToArray(); /* * for (int i = 0; i < weights.Length; i++) * Console.Error.WriteLine(weights[i]); */ } else { weights = new double[scores.Count]; for (int i = 0; i < weights.Length; i++) { weights[i] = 1; } } return(MergeScores(scores, weights)); }
private async Task OnClassifyLogisticRegression() { try { var logisticRegression = new LogisticRegression(FirstClass, SecondClass); logisticRegression.Train(0.01); var stopwatch = new Stopwatch(); await Task.Run(() => { stopwatch.Start(); for (var i = 0; i < 5000; i++) { var randomPointX = random.NextDouble() * (PlotModel.Axes[0].Maximum - PlotModel.Axes[0].Minimum) + PlotModel.Axes[0].Minimum; var randomPointY = random.NextDouble() * (PlotModel.Axes[1].Maximum - PlotModel.Axes[1].Minimum) + PlotModel.Axes[1].Minimum; var logisticRegressionProbabilityResult = logisticRegression.Classify(randomPointX, randomPointY); if (logisticRegressionProbabilityResult > 0.5) { FirstClassScatterSeries.Points.Add(new ScatterPoint(randomPointX, randomPointY, 4, double.NaN, FirstClass.Id)); } else { SecondClassScatterSeries.Points.Add(new ScatterPoint(randomPointX, randomPointY, 4, double.NaN, FirstClass.Id)); } if (stopwatch.Elapsed > TimeSpan.FromMilliseconds(100)) { PlotModel.InvalidatePlot(true); stopwatch.Restart(); } } }); PlotModel.InvalidatePlot(true); } catch (Exception e) { MessageBox.Show(e.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Error); } }
private static IPredictor TrainKMeansAndLRCore() { string dataPath = s_dataPath; using (var env = new TlcEnvironment(seed: 1)) { // Pipeline var loader = new TextLoader(env, new TextLoader.Arguments() { HasHeader = true, Separator = ",", Column = new[] { new TextLoader.Column() { Name = "Label", Source = new [] { new TextLoader.Range() { Min = 14, Max = 14 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "CatFeatures", Source = new [] { new TextLoader.Range() { Min = 1, Max = 1 }, new TextLoader.Range() { Min = 3, Max = 3 }, new TextLoader.Range() { Min = 5, Max = 9 }, new TextLoader.Range() { Min = 13, Max = 13 } }, Type = DataKind.TX }, new TextLoader.Column() { Name = "NumFeatures", Source = new [] { new TextLoader.Range() { Min = 0, Max = 0 }, new TextLoader.Range() { Min = 2, Max = 2 }, new TextLoader.Range() { Min = 4, Max = 4 }, new TextLoader.Range() { Min = 10, Max = 12 } }, Type = DataKind.R4 } } }, new MultiFileSource(dataPath)); IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments { Column = new[] { new CategoricalTransform.Column { Name = "CatFeatures", Source = "CatFeatures" } } }, loader); trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures"); trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures"); trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments { Trainer = new SubComponent <ITrainer, SignatureTrainer>("KMeans", "k=100"), FeatureColumn = "Features" }, trans); trans = new ConcatTransform(env, trans, "Features", "Features", "Score"); // Train var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); return(trainer.Train(trainRoles)); } }
public ParameterMixingCalibratedPredictor TrainKMeansAndLR() { using (var env = new ConsoleEnvironment(seed: 1)) { // Pipeline var loader = TextLoader.ReadFile(env, new TextLoader.Arguments() { HasHeader = true, Separator = ",", Column = new[] { new TextLoader.Column("Label", DataKind.R4, 14), new TextLoader.Column("CatFeatures", DataKind.TX, new [] { new TextLoader.Range() { Min = 1, Max = 1 }, new TextLoader.Range() { Min = 3, Max = 3 }, new TextLoader.Range() { Min = 5, Max = 9 }, new TextLoader.Range() { Min = 13, Max = 13 } }), new TextLoader.Column("NumFeatures", DataKind.R4, new [] { new TextLoader.Range() { Min = 0, Max = 0 }, new TextLoader.Range() { Min = 2, Max = 2 }, new TextLoader.Range() { Min = 4, Max = 4 }, new TextLoader.Range() { Min = 10, Max = 12 } }) } }, new MultiFileSource(_dataPath)); IDataView trans = new CategoricalEstimator(env, "CatFeatures").Fit(loader).Transform(loader); trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures"); trans = new ConcatTransform(env, "Features", "NumFeatures", "CatFeatures").Transform(trans); trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments { Trainer = ComponentFactoryUtils.CreateFromFunction(host => new KMeansPlusPlusTrainer(host, "Features", advancedSettings: s => { s.K = 100; })), FeatureColumn = "Features" }, trans); trans = new ConcatTransform(env, "Features", "Features", "Score").Transform(trans); // Train var trainer = new LogisticRegression(env, "Features", "Label", advancedSettings: args => { args.EnforceNonNegativity = true; args.OptTol = 1e-3f; }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); return(trainer.Train(trainRoles)); } }