public override void Init(IRepository repository, IEnumerable<string> releases)
        {
            base.Init(repository, releases);

            regression = new LogisticRegression();

            string previousRevision = null;
            foreach (var revision in TrainReleases)
            {
                foreach (var file in GetFilesInRevision(revision))
                {
                    context
                        .SetCommits(previousRevision, revision)
                        .SetFiles(e => e.IdIs(file.ID));

                    regression.AddTrainingData(
                        GetPredictorValuesFor(context),
                        FileHasDefects(file.ID, revision, previousRevision)
                    );
                }
                previousRevision = revision;
            }

            regression.Train();

            context.SetCommits(TrainReleases.Last(), PredictionRelease);
        }
        public override void Init(IRepository repository, IEnumerable <string> releases)
        {
            base.Init(repository, releases);

            regression = new LogisticRegression();

            string previousRevision = null;

            foreach (var revision in TrainReleases)
            {
                foreach (var file in GetFilesInRevision(revision))
                {
                    context
                    .SetCommits(previousRevision, revision)
                    .SetFiles(e => e.IdIs(file.ID));

                    regression.AddTrainingData(
                        GetPredictorValuesFor(context),
                        FileHasDefects(file.ID, revision, previousRevision)
                        );
                }
                previousRevision = revision;
            }

            regression.Train();

            context.SetCommits(TrainReleases.Last(), PredictionRelease);
        }
Esempio n. 3
0
        public void TestEstimatorLogisticRegression()
        {
            (IEstimator <ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline();
            var trainer         = new LogisticRegression(Env, "Label", "Features");
            var pipeWithTrainer = pipe.Append(trainer);

            TestEstimatorCore(pipeWithTrainer, dataView);

            var transformedDataView = pipe.Fit(dataView).Transform(dataView);
            var model = trainer.Fit(transformedDataView);

            trainer.Train(transformedDataView, model.Model);
            Done();
        }
Esempio n. 4
0
    static IList <double> MergeScores(IList <IList <double> > scores, Dictionary <int, IList <int> > candidates, Dictionary <int, IList <int> > hits)
    {
        double[] weights;

        if (log_reg)
        {
            var lr = new LogisticRegression();
            lr.LearnRate      = learn_rate;
            lr.NumIter        = num_it;
            lr.Regularization = regularization;

            lr.PredictorVariables = new Matrix <double>(scores);

            var targets = new byte[scores[0].Count];
            int pos     = 0;
            foreach (int u in candidates.Keys)
            {
                foreach (int i in candidates[u])
                {
                    targets[pos++] = hits[u].Contains(i) ? (byte)1 : (byte)0;
                }
            }
            lr.TargetVariables = targets;

            lr.Train();
            //lr.InitModel();

            weights = lr.parameters.ToArray();

            /*
             * for (int i = 0; i < weights.Length; i++)
             *      Console.Error.WriteLine(weights[i]);
             */
        }
        else
        {
            weights = new double[scores.Count];
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] = 1;
            }
        }

        return(MergeScores(scores, weights));
    }
Esempio n. 5
0
        private async Task OnClassifyLogisticRegression()
        {
            try
            {
                var logisticRegression = new LogisticRegression(FirstClass, SecondClass);
                logisticRegression.Train(0.01);
                var stopwatch = new Stopwatch();

                await Task.Run(() =>
                {
                    stopwatch.Start();
                    for (var i = 0; i < 5000; i++)
                    {
                        var randomPointX = random.NextDouble() * (PlotModel.Axes[0].Maximum - PlotModel.Axes[0].Minimum) + PlotModel.Axes[0].Minimum;
                        var randomPointY = random.NextDouble() * (PlotModel.Axes[1].Maximum - PlotModel.Axes[1].Minimum) + PlotModel.Axes[1].Minimum;

                        var logisticRegressionProbabilityResult = logisticRegression.Classify(randomPointX, randomPointY);

                        if (logisticRegressionProbabilityResult > 0.5)
                        {
                            FirstClassScatterSeries.Points.Add(new ScatterPoint(randomPointX, randomPointY, 4, double.NaN, FirstClass.Id));
                        }
                        else
                        {
                            SecondClassScatterSeries.Points.Add(new ScatterPoint(randomPointX, randomPointY, 4, double.NaN, FirstClass.Id));
                        }


                        if (stopwatch.Elapsed > TimeSpan.FromMilliseconds(100))
                        {
                            PlotModel.InvalidatePlot(true);
                            stopwatch.Restart();
                        }
                    }
                });

                PlotModel.InvalidatePlot(true);
            }
            catch (Exception e)
            {
                MessageBox.Show(e.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Error);
            }
        }
        private static IPredictor TrainKMeansAndLRCore()
        {
            string dataPath = s_dataPath;

            using (var env = new TlcEnvironment(seed: 1))
            {
                // Pipeline
                var loader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    HasHeader = true,
                    Separator = ",",
                    Column    = new[] {
                        new TextLoader.Column()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoader.Range()
                                              {
                                                  Min = 14, Max = 14
                                              } },
                            Type = DataKind.R4
                        },
                        new TextLoader.Column()
                        {
                            Name   = "CatFeatures",
                            Source = new [] {
                                new TextLoader.Range()
                                {
                                    Min = 1, Max = 1
                                },
                                new TextLoader.Range()
                                {
                                    Min = 3, Max = 3
                                },
                                new TextLoader.Range()
                                {
                                    Min = 5, Max = 9
                                },
                                new TextLoader.Range()
                                {
                                    Min = 13, Max = 13
                                }
                            },
                            Type = DataKind.TX
                        },
                        new TextLoader.Column()
                        {
                            Name   = "NumFeatures",
                            Source = new [] {
                                new TextLoader.Range()
                                {
                                    Min = 0, Max = 0
                                },
                                new TextLoader.Range()
                                {
                                    Min = 2, Max = 2
                                },
                                new TextLoader.Range()
                                {
                                    Min = 4, Max = 4
                                },
                                new TextLoader.Range()
                                {
                                    Min = 10, Max = 12
                                }
                            },
                            Type = DataKind.R4
                        }
                    }
                }, new MultiFileSource(dataPath));

                IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments
                {
                    Column = new[]
                    {
                        new CategoricalTransform.Column {
                            Name = "CatFeatures", Source = "CatFeatures"
                        }
                    }
                }, loader);

                trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures");
                trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures");
                trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments
                {
                    Trainer       = new SubComponent <ITrainer, SignatureTrainer>("KMeans", "k=100"),
                    FeatureColumn = "Features"
                }, trans);
                trans = new ConcatTransform(env, trans, "Features", "Features", "Score");

                // Train
                var trainer = new LogisticRegression(env, new LogisticRegression.Arguments()
                {
                    EnforceNonNegativity = true, OptTol = 1e-3f
                });
                var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                return(trainer.Train(trainRoles));
            }
        }
Esempio n. 7
0
        public ParameterMixingCalibratedPredictor TrainKMeansAndLR()
        {
            using (var env = new ConsoleEnvironment(seed: 1))
            {
                // Pipeline
                var loader = TextLoader.ReadFile(env,
                                                 new TextLoader.Arguments()
                {
                    HasHeader = true,
                    Separator = ",",
                    Column    = new[] {
                        new TextLoader.Column("Label", DataKind.R4, 14),
                        new TextLoader.Column("CatFeatures", DataKind.TX,
                                              new [] {
                            new TextLoader.Range()
                            {
                                Min = 1, Max = 1
                            },
                            new TextLoader.Range()
                            {
                                Min = 3, Max = 3
                            },
                            new TextLoader.Range()
                            {
                                Min = 5, Max = 9
                            },
                            new TextLoader.Range()
                            {
                                Min = 13, Max = 13
                            }
                        }),
                        new TextLoader.Column("NumFeatures", DataKind.R4,
                                              new [] {
                            new TextLoader.Range()
                            {
                                Min = 0, Max = 0
                            },
                            new TextLoader.Range()
                            {
                                Min = 2, Max = 2
                            },
                            new TextLoader.Range()
                            {
                                Min = 4, Max = 4
                            },
                            new TextLoader.Range()
                            {
                                Min = 10, Max = 12
                            }
                        })
                    }
                }, new MultiFileSource(_dataPath));

                IDataView trans = new CategoricalEstimator(env, "CatFeatures").Fit(loader).Transform(loader);

                trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures");
                trans = new ConcatTransform(env, "Features", "NumFeatures", "CatFeatures").Transform(trans);
                trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments
                {
                    Trainer = ComponentFactoryUtils.CreateFromFunction(host =>
                                                                       new KMeansPlusPlusTrainer(host, "Features", advancedSettings: s =>
                    {
                        s.K = 100;
                    })),
                    FeatureColumn = "Features"
                }, trans);
                trans = new ConcatTransform(env, "Features", "Features", "Score").Transform(trans);

                // Train
                var trainer    = new LogisticRegression(env, "Features", "Label", advancedSettings: args => { args.EnforceNonNegativity = true; args.OptTol = 1e-3f; });
                var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                return(trainer.Train(trainRoles));
            }
        }