Esempio n. 1
0
 private static LogisticRegression<bool> GetLogisticRegression(double[,] samples,
     bool[] knownOutput)
 {
     var clf = new LogisticRegression<bool>();
     clf.Fit(DenseMatrix.OfArray(samples), knownOutput);
     return clf;
 }
        private static void TestLogisticRegression(FeatureVector training, FeatureVector test)
        {
            LogisticRegression      lr          = new LogisticRegression(0.1, 3000);
            LogisticRegressionModel lrModel     = (LogisticRegressionModel)lr.Fit(training);
            FeatureVector           predictions = lrModel.transform(test);

            PrintPredictionsAndEvaluate(predictions);
        }
        public void TestWriteParameters()
        {
            var clf = new LogisticRegression <int>(random: new Random(0));

            clf.Fit(X, Y1);
            clf.Coef.MapInplace(v => 0);
            clf.Intercept.MapInplace(v => 0);
            Assert.IsTrue(clf.DecisionFunction(X).Column(0).AlmostEquals(new DenseVector(new double[] { 0, 0, 0 })));
        }
        public void TestLiblinearRandomState()
        {
            var classification = SampleGenerator.MakeClassification(nSamples: 20);
            var lr1            = new LogisticRegression <int>(random: new Random(0));

            lr1.Fit(classification.X, classification.Y);
            var lr2 = new LogisticRegression <int>(random: new Random(0));

            lr2.Fit(classification.X, classification.Y);
            Assert.IsTrue(lr1.Coef.AlmostEquals(lr2.Coef));
        }
Esempio n. 5
0
        public void TestEstimatorLogisticRegression()
        {
            (IEstimator <ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline();
            var trainer         = new LogisticRegression(Env, "Label", "Features");
            var pipeWithTrainer = pipe.Append(trainer);

            TestEstimatorCore(pipeWithTrainer, dataView);

            var transformedDataView = pipe.Fit(dataView).Transform(dataView);
            var model = trainer.Fit(transformedDataView);

            trainer.Train(transformedDataView, model.Model);
            Done();
        }
        /// <summary>
        /// Check that the model is able to fit the classification data
        /// </summary>
        private void check_predictions(LogisticRegression<int> clf, Matrix<double> X, int[] y)
        {
            int nSamples = y.Length;
            int[] classes = y.Distinct().OrderBy(v => v).ToArray();
            int nClasses = classes.Length;

            clf.Fit(X, y);
            var predicted = clf.Predict(X);
            Assert.IsTrue(classes.SequenceEqual(clf.Classes));

            Assert.AreEqual(nSamples, predicted.Length);
            Assert.IsTrue(y.SequenceEqual(predicted));
                
            Matrix<double> probabilities = clf.PredictProba(X);
            Assert.AreEqual(Tuple.Create(nSamples, nClasses), probabilities.Shape());
            Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(probabilities.RowCount, i => 1.0)));
            Assert.IsTrue(y.SequenceEqual(probabilities.ArgmaxColumns()));
        }
Esempio n. 7
0
        private async Task DrawRoc( )
        {
            CanInteract = false;

            Status = "Training classifier...";

            await Task.Factory.StartNew(() => logistic.Fit(train,
                                                           trainAns.Select(x => isAimClass(x))
                                                           .ToArray( )));

            var series = new LineSeries( )
            {
                Smooth = false, LineStyle = LineStyle.Solid
            };

            Status = "Iterating over thresholds...";

            for (double tr = -10; tr <= 10; tr += 0.005)
            {
                logistic.Threshold = tr;
                var ans = await Task.Factory.StartNew(() => logistic.Classify(test));

                series.Points.Add(new DataPoint(CalcFPR(ans), CalcTPR(ans)));
            }

            var newModel = new PlotModel
            {
                Title = $"ROC for class {CurrentClass}",
            };

            newModel.Series.Add(series);
            newModel.Series.Add(new FunctionSeries(x => x, 0, 1, 0.01));

            Model = newModel;
            NotifyOfPropertyChange(() => Model);

            var plot = series.Points.Distinct( ).OrderBy(x => x.X * 10 + x.Y).ToList( );

            Status      = $"Area under curve: {CalcArea( plot )}";
            CanInteract = true;
        }
        /// <summary>
        /// Check that the model is able to fit the classification data
        /// </summary>
        private void check_predictions(LogisticRegression <int> clf, Matrix <double> X, int[] y)
        {
            int nSamples = y.Length;

            int[] classes  = y.Distinct().OrderBy(v => v).ToArray();
            int   nClasses = classes.Length;

            clf.Fit(X, y);
            var predicted = clf.Predict(X);

            Assert.IsTrue(classes.SequenceEqual(clf.Classes));

            Assert.AreEqual(nSamples, predicted.Length);
            Assert.IsTrue(y.SequenceEqual(predicted));

            Matrix <double> probabilities = clf.PredictProba(X);

            Assert.AreEqual(Tuple.Create(nSamples, nClasses), probabilities.Shape());
            Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(probabilities.RowCount, i => 1.0)));
            Assert.IsTrue(y.SequenceEqual(probabilities.ArgmaxColumns()));
        }
        public void TestSparsify()
        {
            string[] target = iris.Target.Select(t => iris.TargetNames[t]).ToArray();
            var      clf    = new LogisticRegression <string>(random: new Random(0));

            clf.Fit(iris.Data, target);

            Matrix <double> predDD = clf.DecisionFunction(iris.Data);

            clf.Sparsify();
            Assert.IsInstanceOfType(clf.Coef, typeof(SparseMatrix));
            Matrix <double> predSD = clf.DecisionFunction(iris.Data);

            Matrix          spData = SparseMatrix.OfMatrix(iris.Data);
            Matrix <double> predSS = clf.DecisionFunction(spData);

            clf.Densify();
            Matrix <double> predDS = clf.DecisionFunction(spData);

            Assert.IsTrue(predDD.AlmostEquals(predSD));
            Assert.IsTrue(predDD.AlmostEquals(predSS));
            Assert.IsTrue(predDD.AlmostEquals(predDS));
        }
        public void TestPredictIris()
        {
            int nSamples = iris.Data.RowCount;

            string[] target = iris.Target.Select(v => iris.TargetNames[v]).ToArray();
            var      clf    = new LogisticRegression <string>(c: iris.Data.RowCount);

            clf.Fit(iris.Data, target);
            Assert.IsTrue(target.Distinct().OrderBy(t => t).SequenceEqual(clf.Classes));

            var pred      = clf.Predict(iris.Data);
            var matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count();

            Assert.IsTrue(1.0 * matchingN / pred.Length > 0.95);

            var probabilities = clf.PredictProba(iris.Data);

            Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(nSamples, i => 1.0)));

            pred      = probabilities.RowEnumerator().Select(r => iris.TargetNames[r.Item2.MaximumIndex()]).ToArray();
            matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count();
            Assert.IsTrue(1.0 * matchingN / pred.Length > 0.95);
        }
        public void TestPredictIris()
        {
            int nSamples = iris.Data.RowCount;

            string[] target = iris.Target.Select(v => iris.TargetNames[v]).ToArray();
            var clf = new LogisticRegression<string>(c: iris.Data.RowCount);
            clf.Fit(iris.Data, target);
            Assert.IsTrue(target.Distinct().OrderBy(t => t).SequenceEqual(clf.Classes));

            var pred = clf.Predict(iris.Data);
            var matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count();
            Assert.IsTrue(1.0*matchingN/pred.Length > 0.95);

            var probabilities = clf.PredictProba(iris.Data);
            Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(nSamples, i => 1.0)));

            pred = probabilities.RowEnumerator().Select(r => iris.TargetNames[r.Item2.MaximumIndex()]).ToArray();
            matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count();
            Assert.IsTrue(1.0 * matchingN / pred.Length > 0.95);
        }
 public void TestLiblinearRandomState()
 {
     var classification = SampleGenerator.MakeClassification(nSamples: 20);
     var lr1 = new LogisticRegression<int>(random: new Random(0));
     lr1.Fit(classification.X, classification.Y);
     var lr2 = new LogisticRegression<int>(random: new Random(0));
     lr2.Fit(classification.X, classification.Y);
     Assert.IsTrue(lr1.Coef.AlmostEquals(lr2.Coef));
 }
 public void TestWriteParameters()
 {
     var clf = new LogisticRegression<int>(random: new Random(0));
     clf.Fit(X, Y1);
     clf.Coef.MapInplace(v => 0);
     clf.Intercept.MapInplace(v => 0);
     Assert.IsTrue(clf.DecisionFunction(X).Column(0).AlmostEquals(new DenseVector(new double[]{0, 0, 0})));
 }
        public void TestSparsify()
        {
            string[] target = iris.Target.Select(t => iris.TargetNames[t]).ToArray();
            var clf = new LogisticRegression<string>(random: new Random(0));
            clf.Fit(iris.Data, target);

            Matrix<double> predDD = clf.DecisionFunction(iris.Data);

            clf.Sparsify();
            Assert.IsInstanceOfType(clf.Coef, typeof(SparseMatrix));
            Matrix<double> predSD = clf.DecisionFunction(iris.Data);

            Matrix spData = SparseMatrix.OfMatrix(iris.Data);
            Matrix<double> predSS = clf.DecisionFunction(spData);

            clf.Densify();
            Matrix<double> predDS = clf.DecisionFunction(spData);

            Assert.IsTrue(predDD.AlmostEquals(predSD));
            Assert.IsTrue(predDD.AlmostEquals(predSS));
            Assert.IsTrue(predDD.AlmostEquals(predDS));
        }
Esempio n. 15
0
        private void buttonForDataSplitNext_Click(object sender, EventArgs e)
        {
            trainingSetPercentage = (double)numericUpDownForTrainingSetPercent.Value / 100.0;
            numFolds = (int)numericUpDownForNumFolds.Value;

            double[] smaOut         = null;
            double[] wmaOut         = null;
            double[] emaOut         = null;
            double[] macdOut        = null;
            double[] stochasticsOut = null;
            double[] williamsROut   = null;
            double[] rsiOut         = null;
            double[] closesOut      = null;

            var data = IndicatorService.GetData(code, targetDate, new string[] { "Tarih", "Kapanis" }, numberOfData + 1);

            if (isSMAChecked)
            {
                smaOut = IndicatorDataPreprocessor.GetSMAOut(MovingAverage.Simple(code, targetDate, smaPeriod, numberOfData));
            }
            if (isWMAChecked)
            {
                wmaOut = IndicatorDataPreprocessor.GetWMAOut(MovingAverage.Weighted(code, targetDate, wmaPeriod, numberOfData));
            }
            if (isEMAChecked)
            {
                emaOut = IndicatorDataPreprocessor.GetEMAOut(MovingAverage.Exponential(code, targetDate, emaPeriod, numberOfData));
            }
            if (isMACDChecked)
            {
                macdOut = IndicatorDataPreprocessor.GetMACDOut(new MovingAverageConvergenceDivergence(code, targetDate, firstPeriod, secondPeriod, triggerPeriod, numberOfData));
            }
            if (isStochasticsChecked)
            {
                stochasticsOut = IndicatorDataPreprocessor.GetStochasticsOut(new Stochastics(code, targetDate, fastKPeriod, fastDPeriod, slowDPeriod, numberOfData));
            }
            if (isWilliamsRChecked)
            {
                williamsROut = IndicatorDataPreprocessor.GetWilliamsROut(WilliamsR.Wsr(code, targetDate, williamsRPeriod, numberOfData));
            }
            if (isRSIChecked)
            {
                rsiOut = IndicatorDataPreprocessor.GetRSIOut(RelativeStrengthIndex.Rsi(code, targetDate, rsiPeriod, numberOfData));
            }
            closesOut = IndicatorDataPreprocessor.GetClosesOut(numberOfData, data);

            int minRowCount = 1000000;

            if (smaOut != null)
            {
                minRowCount = smaOut.Length;
            }
            if (wmaOut != null)
            {
                minRowCount = minRowCount < wmaOut.Length ? minRowCount : wmaOut.Length;
            }
            if (emaOut != null)
            {
                minRowCount = minRowCount < emaOut.Length ? minRowCount : emaOut.Length;
            }
            if (macdOut != null)
            {
                minRowCount = minRowCount < macdOut.Length ? minRowCount : macdOut.Length;
            }
            if (rsiOut != null)
            {
                minRowCount = minRowCount < rsiOut.Length ? minRowCount : rsiOut.Length;
            }
            if (williamsROut != null)
            {
                minRowCount = minRowCount < williamsROut.Length ? minRowCount : williamsROut.Length;
            }
            if (stochasticsOut != null)
            {
                minRowCount = minRowCount < stochasticsOut.Length ? minRowCount : stochasticsOut.Length;
            }
            if (closesOut != null)
            {
                minRowCount = minRowCount < closesOut.Length ? minRowCount : closesOut.Length;
            }

            var fv = new FeatureVector();

            if (isSMAChecked)
            {
                fv.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isWMAChecked)
            {
                fv.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isEMAChecked)
            {
                fv.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isMACDChecked)
            {
                fv.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isRSIChecked)
            {
                fv.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isWilliamsRChecked)
            {
                fv.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isStochasticsChecked)
            {
                fv.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            fv.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());

            var training = new FeatureVector();
            var test     = new FeatureVector();
            int count    = fv.Values[0].Length;

            for (int i = 0; i < fv.ColumnName.Count; i++)
            {
                training.AddColumn(fv.ColumnName[i], fv.Values[i].Take((int)(count * trainingSetPercentage)).ToArray());
            }

            for (int i = 0; i < fv.ColumnName.Count; i++)
            {
                test.AddColumn(fv.ColumnName[i], fv.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray()); // Take(count) means take the rest of all elements, number of the rest of the elements is smaller than count.
            }

            if (numFolds > 0)
            {
                BinaryClassificationEvaluator bce1    = new BinaryClassificationEvaluator();
                LinearRegression    linearRegression  = new LinearRegression();
                CrossValidator      cvLinReg          = new CrossValidator(linearRegression, bce1, numFolds);
                CrossValidatorModel cvLinRegModel     = (CrossValidatorModel)cvLinReg.Fit(training);
                FeatureVector       linRegPredictions = cvLinRegModel.transform(test);
                bce1.evaluate(linRegPredictions);
                linRegAcc = bce1.Accuracy;

                BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator();
                LogisticRegression            logisticRegression = new LogisticRegression();
                CrossValidator      cvLogReg          = new CrossValidator(logisticRegression, bce2, numFolds);
                CrossValidatorModel cvLogRegModel     = (CrossValidatorModel)cvLogReg.Fit(training);
                FeatureVector       logRegPredictions = cvLogRegModel.transform(test);
                bce2.evaluate(logRegPredictions);
                logRegAcc = bce2.Accuracy;

                BinaryClassificationEvaluator bce3    = new BinaryClassificationEvaluator();
                NaiveBayes          naiveBayes        = new NaiveBayes();
                CrossValidator      cvNaiBay          = new CrossValidator(naiveBayes, bce3, numFolds);
                CrossValidatorModel cvNaiBayModel     = (CrossValidatorModel)cvNaiBay.Fit(training);
                FeatureVector       naiBayPredictions = cvNaiBayModel.transform(test);
                bce3.evaluate(naiBayPredictions);
                naiBayAcc = bce3.Accuracy;
            }
            else
            {
                BinaryClassificationEvaluator bce1          = new BinaryClassificationEvaluator();
                LinearRegression      linearRegression      = new LinearRegression();
                LinearRegressionModel linearRegressionModel = (LinearRegressionModel)linearRegression.Fit(training);
                FeatureVector         linRegPredictions     = linearRegressionModel.transform(test);
                bce1.evaluate(linRegPredictions);
                linRegAcc = bce1.Accuracy;

                BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator();
                LogisticRegression            logicticRegression      = new LogisticRegression();
                LogisticRegressionModel       logisticRegressionModel = (LogisticRegressionModel)logicticRegression.Fit(training);
                FeatureVector logRegPredictions = logisticRegressionModel.transform(test);
                bce2.evaluate(logRegPredictions);
                logRegAcc = bce2.Accuracy;

                BinaryClassificationEvaluator bce3 = new BinaryClassificationEvaluator();
                NaiveBayes      naiveBayes         = new NaiveBayes();
                NaiveBayesModel naiveBayesModel    = (NaiveBayesModel)naiveBayes.Fit(training);
                FeatureVector   naiBayPredictions  = naiveBayesModel.transform(test);
                bce3.evaluate(naiBayPredictions);
                naiBayAcc = bce3.Accuracy;
            }

            labelForLinRegAcc.Text = linRegAcc.ToString();
            labelForLogRegAcc.Text = logRegAcc.ToString();
            labelForNaiBayAcc.Text = naiBayAcc.ToString();

            panelForResults.BringToFront();
        }
        private static double CalculateAccuracy(List <int> indicators, int mlAlgorithm, bool isCrossValidationEnabled, int minRowCount, double trainingSetPercentage, double[] smaOut, double[] wmaOut, double[] emaOut, double[] macdOut, double[] rsiOut, double[] williamsROut, double[] stochasticsOut, double[] closesOut)
        {
            FeatureVector vector = new FeatureVector();

            if (indicators.Contains(IndicatorService.SMA))
            {
                vector.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.WMA))
            {
                vector.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.EMA))
            {
                vector.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.MACD))
            {
                vector.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.RSI))
            {
                vector.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.WilliamsR))
            {
                vector.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.Stochastics))
            {
                vector.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            vector.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());

            new CSVExporter(vector).Export("c:\\users\\yasin\\indicatorOutput.csv");
            int           count    = vector.Values[0].Length;
            FeatureVector training = new FeatureVector();

            for (int i = 0; i < vector.ColumnName.Count; i++)
            {
                training.AddColumn(vector.ColumnName[i], vector.Values[i].Take((int)(count * trainingSetPercentage)).ToArray());
            }

            FeatureVector test = new FeatureVector();

            for (int i = 0; i < vector.ColumnName.Count; i++)
            {
                test.AddColumn(vector.ColumnName[i], vector.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray());
            }

            double accuracy = 0;

            if (mlAlgorithm == MLAService.LIN_REG)
            {
                var linReg = new LinearRegression();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(linReg, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var linRegModel = (LinearRegressionModel)linReg.Fit(training);
                    var predictions = linRegModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            else if (mlAlgorithm == MLAService.LOG_REG)
            {
                var logReg = new LogisticRegression();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(logReg, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var logRegModel = (LogisticRegressionModel)logReg.Fit(training);
                    var predictions = logRegModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            else if (mlAlgorithm == MLAService.NAI_BAY)
            {
                var naiBay = new NaiveBayes();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(naiBay, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var naiBayModel = (NaiveBayesModel)naiBay.Fit(training);
                    var predictions = naiBayModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            return(accuracy);
        }