private static LogisticRegression<bool> GetLogisticRegression(double[,] samples, bool[] knownOutput) { var clf = new LogisticRegression<bool>(); clf.Fit(DenseMatrix.OfArray(samples), knownOutput); return clf; }
private static void TestLogisticRegression(FeatureVector training, FeatureVector test) { LogisticRegression lr = new LogisticRegression(0.1, 3000); LogisticRegressionModel lrModel = (LogisticRegressionModel)lr.Fit(training); FeatureVector predictions = lrModel.transform(test); PrintPredictionsAndEvaluate(predictions); }
public void TestWriteParameters() { var clf = new LogisticRegression <int>(random: new Random(0)); clf.Fit(X, Y1); clf.Coef.MapInplace(v => 0); clf.Intercept.MapInplace(v => 0); Assert.IsTrue(clf.DecisionFunction(X).Column(0).AlmostEquals(new DenseVector(new double[] { 0, 0, 0 }))); }
public void TestLiblinearRandomState() { var classification = SampleGenerator.MakeClassification(nSamples: 20); var lr1 = new LogisticRegression <int>(random: new Random(0)); lr1.Fit(classification.X, classification.Y); var lr2 = new LogisticRegression <int>(random: new Random(0)); lr2.Fit(classification.X, classification.Y); Assert.IsTrue(lr1.Coef.AlmostEquals(lr2.Coef)); }
public void TestEstimatorLogisticRegression() { (IEstimator <ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline(); var trainer = new LogisticRegression(Env, "Label", "Features"); var pipeWithTrainer = pipe.Append(trainer); TestEstimatorCore(pipeWithTrainer, dataView); var transformedDataView = pipe.Fit(dataView).Transform(dataView); var model = trainer.Fit(transformedDataView); trainer.Train(transformedDataView, model.Model); Done(); }
/// <summary> /// Check that the model is able to fit the classification data /// </summary> private void check_predictions(LogisticRegression<int> clf, Matrix<double> X, int[] y) { int nSamples = y.Length; int[] classes = y.Distinct().OrderBy(v => v).ToArray(); int nClasses = classes.Length; clf.Fit(X, y); var predicted = clf.Predict(X); Assert.IsTrue(classes.SequenceEqual(clf.Classes)); Assert.AreEqual(nSamples, predicted.Length); Assert.IsTrue(y.SequenceEqual(predicted)); Matrix<double> probabilities = clf.PredictProba(X); Assert.AreEqual(Tuple.Create(nSamples, nClasses), probabilities.Shape()); Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(probabilities.RowCount, i => 1.0))); Assert.IsTrue(y.SequenceEqual(probabilities.ArgmaxColumns())); }
private async Task DrawRoc( ) { CanInteract = false; Status = "Training classifier..."; await Task.Factory.StartNew(() => logistic.Fit(train, trainAns.Select(x => isAimClass(x)) .ToArray( ))); var series = new LineSeries( ) { Smooth = false, LineStyle = LineStyle.Solid }; Status = "Iterating over thresholds..."; for (double tr = -10; tr <= 10; tr += 0.005) { logistic.Threshold = tr; var ans = await Task.Factory.StartNew(() => logistic.Classify(test)); series.Points.Add(new DataPoint(CalcFPR(ans), CalcTPR(ans))); } var newModel = new PlotModel { Title = $"ROC for class {CurrentClass}", }; newModel.Series.Add(series); newModel.Series.Add(new FunctionSeries(x => x, 0, 1, 0.01)); Model = newModel; NotifyOfPropertyChange(() => Model); var plot = series.Points.Distinct( ).OrderBy(x => x.X * 10 + x.Y).ToList( ); Status = $"Area under curve: {CalcArea( plot )}"; CanInteract = true; }
/// <summary> /// Check that the model is able to fit the classification data /// </summary> private void check_predictions(LogisticRegression <int> clf, Matrix <double> X, int[] y) { int nSamples = y.Length; int[] classes = y.Distinct().OrderBy(v => v).ToArray(); int nClasses = classes.Length; clf.Fit(X, y); var predicted = clf.Predict(X); Assert.IsTrue(classes.SequenceEqual(clf.Classes)); Assert.AreEqual(nSamples, predicted.Length); Assert.IsTrue(y.SequenceEqual(predicted)); Matrix <double> probabilities = clf.PredictProba(X); Assert.AreEqual(Tuple.Create(nSamples, nClasses), probabilities.Shape()); Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(probabilities.RowCount, i => 1.0))); Assert.IsTrue(y.SequenceEqual(probabilities.ArgmaxColumns())); }
public void TestSparsify() { string[] target = iris.Target.Select(t => iris.TargetNames[t]).ToArray(); var clf = new LogisticRegression <string>(random: new Random(0)); clf.Fit(iris.Data, target); Matrix <double> predDD = clf.DecisionFunction(iris.Data); clf.Sparsify(); Assert.IsInstanceOfType(clf.Coef, typeof(SparseMatrix)); Matrix <double> predSD = clf.DecisionFunction(iris.Data); Matrix spData = SparseMatrix.OfMatrix(iris.Data); Matrix <double> predSS = clf.DecisionFunction(spData); clf.Densify(); Matrix <double> predDS = clf.DecisionFunction(spData); Assert.IsTrue(predDD.AlmostEquals(predSD)); Assert.IsTrue(predDD.AlmostEquals(predSS)); Assert.IsTrue(predDD.AlmostEquals(predDS)); }
public void TestPredictIris() { int nSamples = iris.Data.RowCount; string[] target = iris.Target.Select(v => iris.TargetNames[v]).ToArray(); var clf = new LogisticRegression <string>(c: iris.Data.RowCount); clf.Fit(iris.Data, target); Assert.IsTrue(target.Distinct().OrderBy(t => t).SequenceEqual(clf.Classes)); var pred = clf.Predict(iris.Data); var matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count(); Assert.IsTrue(1.0 * matchingN / pred.Length > 0.95); var probabilities = clf.PredictProba(iris.Data); Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(nSamples, i => 1.0))); pred = probabilities.RowEnumerator().Select(r => iris.TargetNames[r.Item2.MaximumIndex()]).ToArray(); matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count(); Assert.IsTrue(1.0 * matchingN / pred.Length > 0.95); }
public void TestPredictIris() { int nSamples = iris.Data.RowCount; string[] target = iris.Target.Select(v => iris.TargetNames[v]).ToArray(); var clf = new LogisticRegression<string>(c: iris.Data.RowCount); clf.Fit(iris.Data, target); Assert.IsTrue(target.Distinct().OrderBy(t => t).SequenceEqual(clf.Classes)); var pred = clf.Predict(iris.Data); var matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count(); Assert.IsTrue(1.0*matchingN/pred.Length > 0.95); var probabilities = clf.PredictProba(iris.Data); Assert.IsTrue(probabilities.SumOfEveryRow().AlmostEquals(DenseVector.Create(nSamples, i => 1.0))); pred = probabilities.RowEnumerator().Select(r => iris.TargetNames[r.Item2.MaximumIndex()]).ToArray(); matchingN = pred.Zip(target, Tuple.Create).Where(t => t.Item1 == t.Item2).Count(); Assert.IsTrue(1.0 * matchingN / pred.Length > 0.95); }
public void TestLiblinearRandomState() { var classification = SampleGenerator.MakeClassification(nSamples: 20); var lr1 = new LogisticRegression<int>(random: new Random(0)); lr1.Fit(classification.X, classification.Y); var lr2 = new LogisticRegression<int>(random: new Random(0)); lr2.Fit(classification.X, classification.Y); Assert.IsTrue(lr1.Coef.AlmostEquals(lr2.Coef)); }
public void TestWriteParameters() { var clf = new LogisticRegression<int>(random: new Random(0)); clf.Fit(X, Y1); clf.Coef.MapInplace(v => 0); clf.Intercept.MapInplace(v => 0); Assert.IsTrue(clf.DecisionFunction(X).Column(0).AlmostEquals(new DenseVector(new double[]{0, 0, 0}))); }
public void TestSparsify() { string[] target = iris.Target.Select(t => iris.TargetNames[t]).ToArray(); var clf = new LogisticRegression<string>(random: new Random(0)); clf.Fit(iris.Data, target); Matrix<double> predDD = clf.DecisionFunction(iris.Data); clf.Sparsify(); Assert.IsInstanceOfType(clf.Coef, typeof(SparseMatrix)); Matrix<double> predSD = clf.DecisionFunction(iris.Data); Matrix spData = SparseMatrix.OfMatrix(iris.Data); Matrix<double> predSS = clf.DecisionFunction(spData); clf.Densify(); Matrix<double> predDS = clf.DecisionFunction(spData); Assert.IsTrue(predDD.AlmostEquals(predSD)); Assert.IsTrue(predDD.AlmostEquals(predSS)); Assert.IsTrue(predDD.AlmostEquals(predDS)); }
private void buttonForDataSplitNext_Click(object sender, EventArgs e) { trainingSetPercentage = (double)numericUpDownForTrainingSetPercent.Value / 100.0; numFolds = (int)numericUpDownForNumFolds.Value; double[] smaOut = null; double[] wmaOut = null; double[] emaOut = null; double[] macdOut = null; double[] stochasticsOut = null; double[] williamsROut = null; double[] rsiOut = null; double[] closesOut = null; var data = IndicatorService.GetData(code, targetDate, new string[] { "Tarih", "Kapanis" }, numberOfData + 1); if (isSMAChecked) { smaOut = IndicatorDataPreprocessor.GetSMAOut(MovingAverage.Simple(code, targetDate, smaPeriod, numberOfData)); } if (isWMAChecked) { wmaOut = IndicatorDataPreprocessor.GetWMAOut(MovingAverage.Weighted(code, targetDate, wmaPeriod, numberOfData)); } if (isEMAChecked) { emaOut = IndicatorDataPreprocessor.GetEMAOut(MovingAverage.Exponential(code, targetDate, emaPeriod, numberOfData)); } if (isMACDChecked) { macdOut = IndicatorDataPreprocessor.GetMACDOut(new MovingAverageConvergenceDivergence(code, targetDate, firstPeriod, secondPeriod, triggerPeriod, numberOfData)); } if (isStochasticsChecked) { stochasticsOut = IndicatorDataPreprocessor.GetStochasticsOut(new Stochastics(code, targetDate, fastKPeriod, fastDPeriod, slowDPeriod, numberOfData)); } if (isWilliamsRChecked) { williamsROut = IndicatorDataPreprocessor.GetWilliamsROut(WilliamsR.Wsr(code, targetDate, williamsRPeriod, numberOfData)); } if (isRSIChecked) { rsiOut = IndicatorDataPreprocessor.GetRSIOut(RelativeStrengthIndex.Rsi(code, targetDate, rsiPeriod, numberOfData)); } closesOut = IndicatorDataPreprocessor.GetClosesOut(numberOfData, data); int minRowCount = 1000000; if (smaOut != null) { minRowCount = smaOut.Length; } if (wmaOut != null) { minRowCount = minRowCount < wmaOut.Length ? minRowCount : wmaOut.Length; } if (emaOut != null) { minRowCount = minRowCount < emaOut.Length ? minRowCount : emaOut.Length; } if (macdOut != null) { minRowCount = minRowCount < macdOut.Length ? minRowCount : macdOut.Length; } if (rsiOut != null) { minRowCount = minRowCount < rsiOut.Length ? minRowCount : rsiOut.Length; } if (williamsROut != null) { minRowCount = minRowCount < williamsROut.Length ? minRowCount : williamsROut.Length; } if (stochasticsOut != null) { minRowCount = minRowCount < stochasticsOut.Length ? minRowCount : stochasticsOut.Length; } if (closesOut != null) { minRowCount = minRowCount < closesOut.Length ? minRowCount : closesOut.Length; } var fv = new FeatureVector(); if (isSMAChecked) { fv.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (isWMAChecked) { fv.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (isEMAChecked) { fv.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (isMACDChecked) { fv.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (isRSIChecked) { fv.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (isWilliamsRChecked) { fv.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (isStochasticsChecked) { fv.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } fv.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); var training = new FeatureVector(); var test = new FeatureVector(); int count = fv.Values[0].Length; for (int i = 0; i < fv.ColumnName.Count; i++) { training.AddColumn(fv.ColumnName[i], fv.Values[i].Take((int)(count * trainingSetPercentage)).ToArray()); } for (int i = 0; i < fv.ColumnName.Count; i++) { test.AddColumn(fv.ColumnName[i], fv.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray()); // Take(count) means take the rest of all elements, number of the rest of the elements is smaller than count. } if (numFolds > 0) { BinaryClassificationEvaluator bce1 = new BinaryClassificationEvaluator(); LinearRegression linearRegression = new LinearRegression(); CrossValidator cvLinReg = new CrossValidator(linearRegression, bce1, numFolds); CrossValidatorModel cvLinRegModel = (CrossValidatorModel)cvLinReg.Fit(training); FeatureVector linRegPredictions = cvLinRegModel.transform(test); bce1.evaluate(linRegPredictions); linRegAcc = bce1.Accuracy; BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator(); LogisticRegression logisticRegression = new LogisticRegression(); CrossValidator cvLogReg = new CrossValidator(logisticRegression, bce2, numFolds); CrossValidatorModel cvLogRegModel = (CrossValidatorModel)cvLogReg.Fit(training); FeatureVector logRegPredictions = cvLogRegModel.transform(test); bce2.evaluate(logRegPredictions); logRegAcc = bce2.Accuracy; BinaryClassificationEvaluator bce3 = new BinaryClassificationEvaluator(); NaiveBayes naiveBayes = new NaiveBayes(); CrossValidator cvNaiBay = new CrossValidator(naiveBayes, bce3, numFolds); CrossValidatorModel cvNaiBayModel = (CrossValidatorModel)cvNaiBay.Fit(training); FeatureVector naiBayPredictions = cvNaiBayModel.transform(test); bce3.evaluate(naiBayPredictions); naiBayAcc = bce3.Accuracy; } else { BinaryClassificationEvaluator bce1 = new BinaryClassificationEvaluator(); LinearRegression linearRegression = new LinearRegression(); LinearRegressionModel linearRegressionModel = (LinearRegressionModel)linearRegression.Fit(training); FeatureVector linRegPredictions = linearRegressionModel.transform(test); bce1.evaluate(linRegPredictions); linRegAcc = bce1.Accuracy; BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator(); LogisticRegression logicticRegression = new LogisticRegression(); LogisticRegressionModel logisticRegressionModel = (LogisticRegressionModel)logicticRegression.Fit(training); FeatureVector logRegPredictions = logisticRegressionModel.transform(test); bce2.evaluate(logRegPredictions); logRegAcc = bce2.Accuracy; BinaryClassificationEvaluator bce3 = new BinaryClassificationEvaluator(); NaiveBayes naiveBayes = new NaiveBayes(); NaiveBayesModel naiveBayesModel = (NaiveBayesModel)naiveBayes.Fit(training); FeatureVector naiBayPredictions = naiveBayesModel.transform(test); bce3.evaluate(naiBayPredictions); naiBayAcc = bce3.Accuracy; } labelForLinRegAcc.Text = linRegAcc.ToString(); labelForLogRegAcc.Text = logRegAcc.ToString(); labelForNaiBayAcc.Text = naiBayAcc.ToString(); panelForResults.BringToFront(); }
private static double CalculateAccuracy(List <int> indicators, int mlAlgorithm, bool isCrossValidationEnabled, int minRowCount, double trainingSetPercentage, double[] smaOut, double[] wmaOut, double[] emaOut, double[] macdOut, double[] rsiOut, double[] williamsROut, double[] stochasticsOut, double[] closesOut) { FeatureVector vector = new FeatureVector(); if (indicators.Contains(IndicatorService.SMA)) { vector.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (indicators.Contains(IndicatorService.WMA)) { vector.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (indicators.Contains(IndicatorService.EMA)) { vector.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (indicators.Contains(IndicatorService.MACD)) { vector.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (indicators.Contains(IndicatorService.RSI)) { vector.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (indicators.Contains(IndicatorService.WilliamsR)) { vector.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } if (indicators.Contains(IndicatorService.Stochastics)) { vector.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); } vector.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray()); new CSVExporter(vector).Export("c:\\users\\yasin\\indicatorOutput.csv"); int count = vector.Values[0].Length; FeatureVector training = new FeatureVector(); for (int i = 0; i < vector.ColumnName.Count; i++) { training.AddColumn(vector.ColumnName[i], vector.Values[i].Take((int)(count * trainingSetPercentage)).ToArray()); } FeatureVector test = new FeatureVector(); for (int i = 0; i < vector.ColumnName.Count; i++) { test.AddColumn(vector.ColumnName[i], vector.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray()); } double accuracy = 0; if (mlAlgorithm == MLAService.LIN_REG) { var linReg = new LinearRegression(); var bce = new BinaryClassificationEvaluator(); if (isCrossValidationEnabled) { var cv = new CrossValidator(linReg, bce, 10); var cvModel = (CrossValidatorModel)cv.Fit(training); var predictions = cvModel.transform(test); bce.evaluate(predictions); accuracy = bce.Accuracy; } else { var linRegModel = (LinearRegressionModel)linReg.Fit(training); var predictions = linRegModel.transform(test); bce.evaluate(predictions); accuracy = bce.Accuracy; } } else if (mlAlgorithm == MLAService.LOG_REG) { var logReg = new LogisticRegression(); var bce = new BinaryClassificationEvaluator(); if (isCrossValidationEnabled) { var cv = new CrossValidator(logReg, bce, 10); var cvModel = (CrossValidatorModel)cv.Fit(training); var predictions = cvModel.transform(test); bce.evaluate(predictions); accuracy = bce.Accuracy; } else { var logRegModel = (LogisticRegressionModel)logReg.Fit(training); var predictions = logRegModel.transform(test); bce.evaluate(predictions); accuracy = bce.Accuracy; } } else if (mlAlgorithm == MLAService.NAI_BAY) { var naiBay = new NaiveBayes(); var bce = new BinaryClassificationEvaluator(); if (isCrossValidationEnabled) { var cv = new CrossValidator(naiBay, bce, 10); var cvModel = (CrossValidatorModel)cv.Fit(training); var predictions = cvModel.transform(test); bce.evaluate(predictions); accuracy = bce.Accuracy; } else { var naiBayModel = (NaiveBayesModel)naiBay.Fit(training); var predictions = naiBayModel.transform(test); bce.evaluate(predictions); accuracy = bce.Accuracy; } } return(accuracy); }