/// <summary>
        /// Represents the original data in terms of principal components.
        /// </summary>
        /// <returns>A multivariate sample whose columns are the weights of each principal component in each entry of the
        /// originally analyzed sample.</returns>
        public MultivariateSample TransformedSample()
        {
            double[]           entry  = new double[Dimension];
            MultivariateSample scores = new MultivariateSample(Dimension);

            for (int i = 0; i < rows; i++)
            {
                Array.Copy(utStore, rows * i, entry, 0, entry.Length);
                scores.Add(entry);
            }
            return(scores);
        }
        public static Tuple<Point, Vector> Compute(Point[] points)
        {
            var avgX = points.Select(p => p.X).Average();
            var avgY = points.Select(p => p.Y).Average();

            var shifted = points.Select(p => p - new Vector(avgX, avgY));
            var mvSample = new MultivariateSample(2);
            foreach (var p in shifted)
                mvSample.Add(p.X, p.Y);

            var pca = mvSample.PrincipalComponentAnalysis();
            var firstComponentVector = pca.Component(0).NormalizedVector();

            return Tuple.Create(
                new Point(avgX, avgY),
                new Vector(firstComponentVector[0], firstComponentVector[1]));
        }
Esempio n. 3
0
        /// <summary>
        /// Beta arvutus
        /// </summary>
        /// <param name="finDataAdapter">Adapter kõigi finantsandmete kättesaamise jaoks</param>
        /// <param name="dcfInput">DCF arvutuste eelduste hoidja</param>
        public static void CalculateBeta(FinDataAdapter finDataAdapter, DcfInput dcfInput)
        {
            BivariateSample bivariate = new BivariateSample();
            MultivariateSample mv = new MultivariateSample(2);
            decimal prevPrice = 0;
            double? prevIndex = null;
            decimal curPrice = 0;
            double? curIndex = null;
            int k = 0;
            for (int i = 0; i < finDataAdapter.PriceDataDao.PriceDatas.Count; i = i + 22)
            {
                if (k < 36)
                {
                    PriceData pd = finDataAdapter.PriceDataDao.PriceDatas[i];
                    curPrice = pd.AdjClose;
                    curIndex = finDataAdapter.PriceDataDao.GetClosePrice(pd.PriceDate, finDataAdapter.PriceDataDao.IndexDatas)[0];
                    if (curPrice != 0 && curIndex != null && prevPrice != 0 && prevIndex != null)
                    {
                        //MessageBox.Show("s:" + ((double)(prevPrice / curPrice) - 1));
                        //MessageBox.Show("i:" + ((double)(prevIndex / curIndex) - 1));
                        ////bivariate.Add((double) (prevPrice/curPrice)-1,(double) (prevIndex/curIndex)-1);
                        double[] db = new double[2];
                        db[0] = ((double)(prevPrice / curPrice) - 1);
                        db[1] = ((double)(prevIndex / curIndex) - 1);
                        mv.Add(db);
                    }
                    prevPrice = curPrice;
                    prevIndex = curIndex;

                    //DateTime dt = finDataAdapter.PriceDataDao.PriceDatas[i].PriceDate;

                    //MessageBox.Show(finDataAdapter.PriceDataDao.PriceDatas[i].AdjClose + " " +
                    //                dt.ToShortDateString());
                    //MessageBox.Show(finDataAdapter.PriceDataDao.GetClosePrice(dt, finDataAdapter.PriceDataDao.IndexDatas)[0].ToString());
                }
                k++;
            }

            if (mv.Count > 10)
            {
                //FitResult fitResult = bivariate.LinearRegression();
                FitResult fitResult = mv.LinearRegression(0);
                dcfInput.Beta = fitResult.Parameter(1).Value;
                List<FinData> finDatas = finDataAdapter.FinDataDao.FinDatas;

                dcfInput.CostOfEquity = dcfInput.RiskFreeRate + dcfInput.Beta * dcfInput.MarketRiskPremium;
                double debt = 0;
                if (finDatas[finDatas.Count - 1].BsCurrentPortionOfLongTermDebt != null)
                {
                    debt += (double)finDatas[finDatas.Count - 1].BsCurrentPortionOfLongTermDebt;
                }
                if (finDatas[finDatas.Count - 1].BsTotalLongTermDebt != null)
                {
                    debt += (double)finDatas[finDatas.Count - 1].BsTotalLongTermDebt;
                }
                double total = 0.0;
                if (finDatas[finDatas.Count - 1].BsShareholdersEquity1 != null)
                {
                    total += (double)(finDatas[finDatas.Count - 1].BsShareholdersEquity1);
                }
                total += debt;

                try
                {
                    dcfInput.Wacc = dcfInput.CostOfEquity *
                                    (double)(finDatas[finDatas.Count - 1].BsShareholdersEquity1 / total) +
                                    dcfInput.CostOfDebt * (double)(debt / total) * (1 - dcfInput.TaxRate);
                }
                catch (InvalidOperationException) { }
                //MessageBox.Show("beta: "+fitResult.Parameter(1).Value.ToString());
                //double[] pars = fitResult.Parameters();
                //foreach (var par in pars)
                //{
                //    MessageBox.Show(par.ToString());

                //}

                //MessageBox.Show(fitResult.CorrelationCoefficient(0,1).ToString());
                //double[] gfit = fitResult.
                //MessageBox.Show(fitResult.);

                //MessageBox.Show(fitResult.Parameter(2).ToString());
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Korrelatsiooni arvutus valitud näitajate jaoks (Total Assets, Total Liabilities, Total Current Assets, Total Current Liabilities: vs Reveneue)
        /// </summary>
        /// <param name="finDataAdapter">Adapter kõigi finantsandmete kättesaamise jaoks</param>
        /// <param name="dcfInput">DCF arvutuste eelduste hoidja</param>
        public static void CalculateCorrelation(FinDataAdapter finDataAdapter, DcfInput dcfInput)
        {
            MultivariateSample mvTA = new MultivariateSample(2);
            MultivariateSample mvTL = new MultivariateSample(2);
            MultivariateSample mvCA = new MultivariateSample(2);
            MultivariateSample mvCL = new MultivariateSample(2);
            int k = 0;
            List<FinData> finDatas = finDataAdapter.FinDataDao.FinDatas;
            for (int i = finDataAdapter.FinDataDao.FinDatas.Count - 1; i >= 0; i--)
            {
                if (k < 20)
                {
                    double revChange = 0;
                    try
                    {
                        //revChange = (double) (finDatas[i].IsRevenue/finDatas[i - 5].IsRevenue);
                        revChange = (double)finDatas[i].IsRevenue;
                    }
                    catch (InvalidOperationException) { }
                    double caChange = 0;
                    try
                    {
                        //secChange = (double)(finDatas[i].BsTotalCurrentAssets / finDatas[i - 5].BsTotalCurrentAssets);
                        caChange = (double)finDatas[i].BsTotalCurrentAssets;
                    }
                    catch (InvalidOperationException) { }

                    double clChange = 0;
                    try
                    {
                        //secChange = (double)(finDatas[i].BsTotalCurrentAssets / finDatas[i - 5].BsTotalCurrentAssets);
                        clChange = (double)finDatas[i].BsTotalCurrentLiabilities;
                    }
                    catch (InvalidOperationException) { }
                    double taChange = 0;
                    try
                    {
                        //secChange = (double)(finDatas[i].BsTotalCurrentAssets / finDatas[i - 5].BsTotalCurrentAssets);
                        taChange = (double)finDatas[i].BsTotalAssets;
                    }
                    catch (InvalidOperationException) { }
                    double tlChange = 0;
                    try
                    {
                        //secChange = (double)(finDatas[i].BsTotalCurrentAssets / finDatas[i - 5].BsTotalCurrentAssets);
                        tlChange = (double)finDatas[i].BsTotalLiabilities;
                    }
                    catch (InvalidOperationException) { }

                    //MessageBox.Show("s:" + ((double)(prevPrice / curPrice) - 1));
                    //MessageBox.Show("i:" + ((double)(prevIndex / curIndex) - 1));
                    ////bivariate.Add((double) (prevPrice/curPrice)-1,(double) (prevIndex/curIndex)-1);
                    double[] db = new double[2];
                    db[0] = caChange;
                    db[1] = revChange;
                    if (db[0] != 0 || db[1] != 0)
                        mvCA.Add(db);

                    db = new double[2];
                    db[0] = clChange;
                    db[1] = revChange;
                    if (db[0] != 0 || db[1] != 0)
                        mvCL.Add(db);

                    db = new double[2];
                    db[0] = taChange;
                    db[1] = revChange;
                    if (db[0] != 0 || db[1] != 0)
                        mvTA.Add(db);

                    db = new double[2];
                    db[0] = tlChange;
                    db[1] = revChange;
                    if (db[0] != 0 || db[1] != 0)
                        mvTL.Add(db);

                    //DateTime dt = finDataAdapter.PriceDataDao.PriceDatas[i].PriceDate;

                    //MessageBox.Show(finDataAdapter.PriceDataDao.PriceDatas[i].AdjClose + " " +
                    //                dt.ToShortDateString());
                    //MessageBox.Show(finDataAdapter.PriceDataDao.GetClosePrice(dt, finDataAdapter.PriceDataDao.IndexDatas)[0].ToString());
                }
                k++;
            }

            // peab vähemalt olema 3 vaatlust
            if (mvCA.Count > 2)
            {
                //FitResult fitResult = bivariate.LinearRegression();

                FitResult fitResult = mvCA.LinearRegression(0);

                dcfInput.TotalCurrentAssetsBeta = fitResult.Parameter(1).Value;
                dcfInput.TotalCurrentAssetsAlpha = fitResult.Parameter(0).Value;

                fitResult = mvCL.LinearRegression(0);
                dcfInput.TotalCurrentLiabilitiesBeta = fitResult.Parameter(1).Value;
                dcfInput.TotalCurrentLiabilitiesAlpha = fitResult.Parameter(0).Value;

                fitResult = mvTA.LinearRegression(0);
                dcfInput.TotalAssetsBeta = fitResult.Parameter(1).Value;
                dcfInput.TotalAssetsAlpha = fitResult.Parameter(0).Value;

                fitResult = mvTL.LinearRegression(0);
                dcfInput.TotalLiabilitiesBeta = fitResult.Parameter(1).Value;
                dcfInput.TotalLiabilitiesAlpha = fitResult.Parameter(0).Value;

                //MessageBox.Show("alfa: " + fitResult.Parameter(0).Value.ToString());
                //MessageBox.Show("beta: "+fitResult.Parameter(1).Value.ToString());
                //double[] pars = fitResult.Parameters();
                //foreach (var par in pars)
                //{
                //    MessageBox.Show(par.ToString());

                //}

                //MessageBox.Show(fitResult.CorrelationCoefficient(0,1).ToString());
                //double[] gfit = fitResult.
                //MessageBox.Show(fitResult.);

                //MessageBox.Show(fitResult.Parameter(2).ToString());
            }
        }
        public void MultivariateLinearRegressionNullDistribution()
        {
            int d = 4;

            Random rng = new Random(1);
            NormalDistribution n = new NormalDistribution();

            Sample fs = new Sample();

            for (int i = 0; i < 64; i++) {
                MultivariateSample ms = new MultivariateSample(d);
                for (int j = 0; j < 8; j++) {
                    double[] x = new double[d];
                    for (int k = 0; k < d; k++) {
                        x[k] = n.GetRandomValue(rng);
                    }
                    ms.Add(x);
                }
                FitResult r = ms.LinearRegression(0);
                fs.Add(r.GoodnessOfFit.Statistic);
            }

            // conduct a KS test to check that F follows the expected distribution
            TestResult ks = fs.KolmogorovSmirnovTest(new FisherDistribution(3, 4));
            Assert.IsTrue(ks.LeftProbability < 0.95);
        }
Esempio n. 6
0
        public void BivariateLogisticRegression()
        {
            double[] c = new double[] { -0.1, 1.0 };

            Random rng = new Random(1);
            UniformDistribution pointDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 4.0));

            BivariateSample sample1 = new BivariateSample();
            MultivariateSample sample2 = new MultivariateSample(2);
            for (int k = 0; k < 1000; k++) {
                double x = pointDistribution.GetRandomValue(rng);
                double z = c[0] * x + c[1];
                double ez = Math.Exp(z);
                double p = ez / (1.0 + ez);
                double y = (rng.NextDouble() < p) ? 1.0 : 0.0;
                sample1.Add(x, y);
                sample2.Add(x, y);
            }

            Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Mean / (1.0 - sample1.Y.Mean));
            Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Variance);

            FitResult result1 = sample1.LinearLogisticRegression();
            FitResult result2 = sample2.TwoColumns(0, 1).LinearLogisticRegression();
            FitResult result3 = sample2.LogisticLinearRegression(1);

            for (int i = 0; i < result1.Dimension; i++) {
                Console.WriteLine("{0} {1} {2}", i, result1.Parameter(i), result3.Parameter(i) );
            }
        }
Esempio n. 7
0
        public void SamplePopulationMomentEstimateVariances()
        {
            Distribution d = new LognormalDistribution();

            // for various sample sizes...
            foreach (int n in TestUtilities.GenerateIntegerValues(4, 32, 8)) {

                Console.WriteLine("n={0}", n);

                // we are going to store values for a bunch of estimators and their uncertainties
                MultivariateSample estimates = new MultivariateSample("M1", "C2", "C3", "C4");
                MultivariateSample variances = new MultivariateSample("M1", "C2", "C3", "C4");

                // create a bunch of samples
                for (int i = 0; i < 256; i++) {

                    Sample s = TestUtilities.CreateSample(d, n, 512 * n + i + 1);

                    UncertainValue M1 = s.PopulationMean;
                    UncertainValue C2 = s.PopulationVariance;
                    UncertainValue C3 = s.PopulationMomentAboutMean(3);
                    UncertainValue C4 = s.PopulationMomentAboutMean(4);
                    estimates.Add(M1.Value, C2.Value, C3.Value, C4.Value);
                    variances.Add(MoreMath.Sqr(M1.Uncertainty), MoreMath.Sqr(C2.Uncertainty), MoreMath.Sqr(C3.Uncertainty), MoreMath.Sqr(C4.Uncertainty));

                }

                // the claimed variance should agree with the measured variance of the estimators
                for (int c = 0; c < estimates.Dimension; c++) {
                    Console.WriteLine("{0} {1} {2}", estimates.Column(c).Name, estimates.Column(c).PopulationVariance, variances.Column(c).Mean);
                    Assert.IsTrue(estimates.Column(c).PopulationVariance.ConfidenceInterval(0.95).ClosedContains(variances.Column(c).Mean));
                }

            }
        }
        public MultivariateSample CreateMultivariateNormalSample(ColumnVector M, SymmetricMatrix C, int n)
        {
            int d = M.Dimension;

            MultivariateSample S = new MultivariateSample(d);

            SquareMatrix A = C.CholeskyDecomposition().SquareRootMatrix();

            Random rng = new Random(1);
            Distribution normal = new NormalDistribution();

            for (int i = 0; i < n; i++) {

                // create a vector of normal deviates
                ColumnVector V = new ColumnVector(d);
                for (int j = 0; j < d; j++) {
                    double y = rng.NextDouble();
                    double z = normal.InverseLeftProbability(y);
                    V[j] = z;
                }

                // form the multivariate distributed vector
                ColumnVector X = M + A * V;

                // add it to the sample
                S.Add(X);

            }

            return (S);
        }
        public void MultivariateLinearRegressionAgreement()
        {
            Random rng = new Random(1);

            MultivariateSample SA = new MultivariateSample(2);
            for (int i = 0; i < 10; i++) {
                SA.Add(rng.NextDouble(), rng.NextDouble());
            }
            FitResult RA = SA.LinearRegression(0);
            ColumnVector PA = RA.Parameters;
            SymmetricMatrix CA = RA.CovarianceMatrix;

            MultivariateSample SB = SA.Columns(1, 0);
            FitResult RB = SB.LinearRegression(1);
            ColumnVector PB = RB.Parameters;
            SymmetricMatrix CB = RB.CovarianceMatrix;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[0], PB[1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[1], PB[0]));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 0], CB[1, 1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 1], CB[1, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[1, 1], CB[0, 0]));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(RA.GoodnessOfFit.Statistic, RB.GoodnessOfFit.Statistic));

            BivariateSample SC = SA.TwoColumns(1, 0);
            FitResult RC = SC.LinearRegression();
            ColumnVector PC = RC.Parameters;
            SymmetricMatrix CC = RC.CovarianceMatrix;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA, PC));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA, CC));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(RA.GoodnessOfFit.Statistic, RC.GoodnessOfFit.Statistic));
        }
        public void OldMultivariateLinearRegressionTest()
        {
            MultivariateSample sample = new MultivariateSample(3);

            sample.Add(98322, 81449, 269465);
            sample.Add(65060, 31749, 121900);
            sample.Add(36052, 14631, 37004);
            sample.Add(31829, 27732, 91400);
            sample.Add(7101, 9693, 54900);
            sample.Add(41294, 4268, 16160);
            sample.Add(16614, 4697, 21500);
            sample.Add(3449, 4233, 9306);
            sample.Add(3386, 5293, 38300);
            sample.Add(6242, 2039, 13369);
            sample.Add(14036, 7893, 29901);
            sample.Add(2636, 3345, 10930);
            sample.Add(869, 1135, 5100);
            sample.Add(452, 727, 7653);

            /*
            sample.Add(41.9, 29.1, 251.3);
            sample.Add(43.4, 29.3, 251.3);
            sample.Add(43.9, 29.5, 248.3);
            sample.Add(44.5, 29.7, 267.5);
            sample.Add(47.3, 29.9, 273.0);
            sample.Add(47.5, 30.3, 276.5);
            sample.Add(47.9, 30.5, 270.3);
            sample.Add(50.2, 30.7, 274.9);
            sample.Add(52.8, 30.8, 285.0);
            sample.Add(53.2, 30.9, 290.0);
            sample.Add(56.7, 31.5, 297.0);
            sample.Add(57.0, 31.7, 302.5);
            sample.Add(63.5, 31.9, 304.5);
            sample.Add(65.3, 32.0, 309.3);
            sample.Add(71.1, 32.1, 321.7);
            sample.Add(77.0, 32.5, 330.7);
            sample.Add(77.8, 32.9, 349.0);
            */

            Console.WriteLine(sample.Count);

            //sample.LinearRegression(0);
            sample.LinearRegression(0);
        }
        public void PrincipalComponentAnalysis()
        {
            int D = 3;
            int N = 10;

            // construct a sample
            Random rng = new Random(1);
            MultivariateSample sample = new MultivariateSample(D);
            for (int i = 0; i < N; i++) {
                double x = 1.0 * rng.NextDouble() - 1.0;
                double y = 4.0 * rng.NextDouble() - 2.0;
                double z = 9.0 * rng.NextDouble() - 3.0;
                sample.Add(x, y, z);
            }

            // get its column means
            RowVector mu = new RowVector(D);
            for (int i = 0; i < D; i++) {
                mu[i] = sample.Column(i).Mean;
            }

            // get total variance
            double tVariance = GetTotalVariance(sample);
            Console.WriteLine(tVariance);

            // do a principal component analysis
            PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis();
            Assert.IsTrue(pca.Dimension == sample.Dimension);
            Assert.IsTrue(pca.Count == sample.Count);

            // check that the PCs behave as expected
            for (int i = 0; i < pca.Dimension; i++) {
                PrincipalComponent pc = pca.Component(i);
                Assert.IsTrue(pc.Index == i);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector()));
                Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0));
                if (i == 0) {
                    Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction);
                } else {
                    PrincipalComponent ppc = pca.Component(i - 1);
                    Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction);
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction));
                }
            }

            // express the sample in terms of principal components
            MultivariateSample csample = pca.TransformedSample();

            // check that the explained variances are as claimed
            for (int rD = 1; rD <= D; rD++) {
                MultivariateSample rSample = new MultivariateSample(D);
                foreach (double[] cEntry in csample) {
                    RowVector x = mu.Copy();
                    for (int i = 0; i < rD; i++) {
                        PrincipalComponent pc = pca.Component(i);
                        x += (cEntry[i] * pc.Weight) * pc.NormalizedVector();
                    }
                    rSample.Add(x);
                }
                double rVariance = GetTotalVariance(rSample);
                Console.WriteLine("{0} {1}", rD, rVariance);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD-1).CumulativeVarianceFraction));
            }
        }
        public void MultivariateMoments()
        {
            // create a random sample
            MultivariateSample M = new MultivariateSample(3);
            Distribution d0 = new NormalDistribution();
            Distribution d1 = new ExponentialDistribution();
            Distribution d2 = new UniformDistribution();
            Random rng = new Random(1);
            int n = 10;
            for (int i = 0; i < n; i++) {
                M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng));
            }

            // test that moments agree
            for (int i = 0; i < 3; i++) {
                int[] p = new int[3];
                p[i] = 1;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.Moment(p)));
                p[i] = 2;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.MomentAboutMean(p)));
                for (int j = 0; j < i; j++) {
                    int[] q = new int[3];
                    q[i] = 1;
                    q[j] = 1;
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.MomentAboutMean(q)));
                }
            }
        }
        public void MultivariateManipulations()
        {
            MultivariateSample S = new MultivariateSample(3);

            Assert.IsTrue(S.Dimension == 3);

            Assert.IsTrue(S.Count == 0);

            S.Add(1.1, 1.2, 1.3);
            S.Add(2.1, 2.2, 2.3);

            Assert.IsTrue(S.Count == 2);

            // check that an entry is there, remove it, check that it is not there
            Assert.IsTrue(S.Contains(1.1, 1.2, 1.3));
            Assert.IsTrue(S.Remove(1.1, 1.2, 1.3));
            Assert.IsFalse(S.Contains(1.1, 1.2, 1.3));

            // clear it and check that the count went to zero
            S.Clear();
            Assert.IsTrue(S.Count == 0);
        }
        public void MultivariateLinearRegressionTest()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a = 1.0;
            double b0 = -2.0;
            double b1 = 3.0;
            Distribution noise = new NormalDistribution(0.0, 10.0);

            // draw a sample from the model
            Random rng = new Random(1);
            MultivariateSample sample = new MultivariateSample(3);
            for (int i = 0; i < 100; i++) {
                double x0 = -10.0 + 20.0 * rng.NextDouble();
                double x1 = -10.0 + 20.0 * rng.NextDouble();
                double eps = noise.InverseLeftProbability(rng.NextDouble());
                double y = a + b0 * x0 + b1 * x1 + eps;
                sample.Add(x0, x1, y);
            }

            // do a linear regression fit on the model
            FitResult result = sample.LinearRegression(2);

            // the result should have the appropriate dimension
            Assert.IsTrue(result.Dimension == 3);

            // the result should be significant
            Console.WriteLine("{0} {1}", result.GoodnessOfFit.Statistic, result.GoodnessOfFit.LeftProbability);
            Assert.IsTrue(result.GoodnessOfFit.LeftProbability > 0.95);

            // the parameters should match the model
            Console.WriteLine(result.Parameter(0));
            Assert.IsTrue(result.Parameter(0).ConfidenceInterval(0.90).ClosedContains(b0));
            Console.WriteLine(result.Parameter(1));
            Assert.IsTrue(result.Parameter(1).ConfidenceInterval(0.90).ClosedContains(b1));
            Console.WriteLine(result.Parameter(2));
            Assert.IsTrue(result.Parameter(2).ConfidenceInterval(0.90).ClosedContains(a));
        }
Esempio n. 15
0
        private static void PredictNextGames()
        {
            var statlines = _statlineRepository.GetStatlines();
            var variables = statlines.First().KeyStats; // get mapping

            var sample = new MultivariateSample(variables.Count);
            var statsOfVariables = new double[variables.Count];

            foreach (var statline in statlines)
            {
                for (var i = 0; i < variables.Count; i++)
                {
                    statsOfVariables[i] = statline.GetByName(variables[i].Name);
                }
                sample.Add(statsOfVariables);
            }

            var correlations = _predictorService.FindCorrelationsViaLinearRegression(sample, variables.Count);

            var nextGames = _gameRepository.GetNextGames();
            foreach (var game in nextGames)
            {
                Console.WriteLine("{0} at {1}", game.AwayTeam, game.HomeTeam);

                var gameStatlines = _statlineRepository.GetStatlinesForGame(game);

                Game game1 = game;
                var home = gameStatlines.Single(s => s.Team == game1.HomeTeam);
                var away = gameStatlines.Single(s => s.Team == game1.AwayTeam);

                // Here we can choose the independent variable (e.g. 0). Assumes larger Column A is desirable.
                //var correlationsVsIndependentVariable = correlations.Where(c => c.ColumnA == 0);
                var bestCorrelationTypeAvailable = CorrelationType.Significant;
                var significantCorrelations = correlations.Count(c => c.CorrelationType == CorrelationType.Significant);
                if (significantCorrelations == 0)
                {
                    bestCorrelationTypeAvailable = CorrelationType.Borderline;
                }

                var correlationsVsIndependentVariable = correlations.Where(c => c.CorrelationType == bestCorrelationTypeAvailable);
                var predictions = new List<bool>();
                foreach (var correlation in correlationsVsIndependentVariable)
                {
                    var independentVariable = home.GetName(correlation.ColumnA, variables);
                    var dependentVariable = home.GetName(correlation.ColumnB, variables);

                    bool homeWins;
                    if (correlation.Coefficient < 0)
                    {
                        homeWins = home.GetById(correlation.ColumnB, variables) < away.GetById(correlation.ColumnB, variables);
                    }
                    else
                    {
                        homeWins = home.GetById(correlation.ColumnB, variables) > away.GetById(correlation.ColumnB, variables);
                    }

                    Console.WriteLine("{0} to have {1} {2} based on {3} {4}",
                        homeWins ? home.Team : away.Team, "more", independentVariable, correlation.Coefficient < 0 ? "lower" : "higher", dependentVariable);

                    predictions.Add(homeWins);
                }

                // If all predictions reach the same conclusion
                if (predictions.Count > 0 && (predictions.Count == predictions.Count(p => p) || predictions.Count == predictions.Count(p => !p)))
                {
                    var winner = predictions.First() ? home : away;
                    Console.WriteLine("----> Our predicted winner is {0} <----", winner.Team);
                    _predictionRepository.Log(game1, winner);
                }
                else
                {
                    Console.WriteLine("Too close to call {0} at {1}", away.Team, home.Team);
                }
            }
        }
 /// <summary>
 /// Represents the original data in terms of principal components.
 /// </summary>
 /// <returns>A multivariate sample whose columns are the weights of each principal component in each entry of the
 /// originally analyzed sample.</returns>
 public MultivariateSample TransformedSample()
 {
     double[] entry = new double[Dimension];
     MultivariateSample scores = new MultivariateSample(Dimension);
     for (int i = 0; i < rows; i++) {
         Array.Copy(utStore, rows * i, entry, 0, entry.Length);
         scores.Add(entry);
     }
     return (scores);
 }
Esempio n. 17
0
        public void WeibullFitUncertainties()
        {
            // check that the uncertainty in reported fit parameters is actually meaningful
            // it should be the standard deviation of fit parameter values in a sample of many fits

            // define a population distribution
            Distribution distribution = new WeibullDistribution(2.5, 1.5);

            // draw a lot of samples from it; fit each sample and
            // record the reported parameter value and error of each
            BivariateSample values = new BivariateSample();
            MultivariateSample uncertainties = new MultivariateSample(3);
            for (int i = 0; i < 50; i++) {
                Sample sample = CreateSample(distribution, 10, i);
                FitResult fit = WeibullDistribution.FitToSample(sample);
                UncertainValue a = fit.Parameter(0);
                UncertainValue b = fit.Parameter(1);
                values.Add(a.Value, b.Value);
                uncertainties.Add(a.Uncertainty, b.Uncertainty, fit.Covariance(0,1));
            }

            // the reported errors should agree with the standard deviation of the reported parameters
            Assert.IsTrue(values.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(0).Mean));
            Assert.IsTrue(values.Y.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(1).Mean));
            //Console.WriteLine("{0} {1}", values.PopulationCovariance, uncertainties.Column(2).Mean);
            //Assert.IsTrue(values.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(2).Mean));
        }
Esempio n. 18
0
 public void Bug6391()
 {
     // this simple PCA caused a NonConvergenceException
     var mvSample = new MultivariateSample(2);
     mvSample.Add(0, 1);
     mvSample.Add(0, -1);
     var pca = mvSample.PrincipalComponentAnalysis();
 }
Esempio n. 19
0
        public void MultivariateLinearRegression()
        {
            int outputIndex = 2;

            double[] c = new double[] { -1.0, 2.0, -3.0, 4.0 };

            Random rng = new Random(1001110000);
            UniformDistribution pointDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 4.0));

            MultivariateSample sample = new MultivariateSample(c.Length);

            for (int k = 0; k < 1000; k++) {
                double[] row = new double[sample.Dimension];
                double z = 0.0;
                for (int i = 0; i < row.Length; i++) {
                    if (i == outputIndex) {
                        z += c[i];
                    } else {
                        row[i] = pointDistribution.GetRandomValue(rng);
                        z += row[i] * c[i];
                    }
                }
                double ez = Math.Exp(z);
                double p = ez / (1.0 + ez);
                row[outputIndex] = (rng.NextDouble() < p) ? 1.0 : 0.0;
                sample.Add(row);
            }

            FitResult result = sample.LogisticLinearRegression(outputIndex);

            for (int i = 0; i < result.Dimension; i++) {
                Console.WriteLine(result.Parameter(i));
                Assert.IsTrue(result.Parameter(i).ConfidenceInterval(0.99).ClosedContains(c[i]));
            }
        }
        public void PC()
        {
            Random rng = new Random(1);
            double s = 1.0 / Math.Sqrt(2.0);

            MultivariateSample MS = new MultivariateSample(2);
            RectangularMatrix R = new RectangularMatrix(1000, 2);
            for (int i = 0; i < 1000; i++) {
                double r1 = 2.0 * rng.NextDouble() - 1.0;
                double r2 = 2.0 * rng.NextDouble() - 1.0;
                double x = r1 * 4.0 * s - r2 * 9.0 * s;
                double y = r1 * 4.0 * s + r2 * 9.0 * s;
                R[i, 0] = x; R[i, 1] = y;
                MS.Add(x, y);
            }

            Console.WriteLine("x {0} {1}", MS.Column(0).Mean, MS.Column(0).Variance);
            Console.WriteLine("y {0} {1}", MS.Column(1).Mean, MS.Column(1).Variance);

            Console.WriteLine("SVD");

            SingularValueDecomposition SVD = R.SingularValueDecomposition();
            for (int i = 0; i < SVD.Dimension; i++) {
                Console.WriteLine("{0} {1}", i, SVD.SingularValue(i));
                ColumnVector v = SVD.RightSingularVector(i);
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            Console.WriteLine("PCA");

            PrincipalComponentAnalysis PCA = MS.PrincipalComponentAnalysis();
            Console.WriteLine("Dimension = {0} Count = {1}", PCA.Dimension, PCA.Count);
            for (int i = 0; i < PCA.Dimension; i++) {
                PrincipalComponent PC = PCA.Component(i);
                Console.WriteLine("  {0} {1} {2} {3}", PC.Index, PC.Weight, PC.VarianceFraction, PC.CumulativeVarianceFraction);
                RowVector v = PC.NormalizedVector();
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            // reconstruct
            SquareMatrix U = SVD.LeftTransformMatrix();
            SquareMatrix V = SVD.RightTransformMatrix();
            double x1 = U[0, 0] * SVD.SingularValue(0) * V[0, 0] + U[0, 1] * SVD.SingularValue(1) * V[0, 1];
            Console.WriteLine("x1 = {0} {1}", x1, R[0, 0]);
            double y1 = U[0, 0] * SVD.SingularValue(0) * V[1, 0] + U[0, 1] * SVD.SingularValue(1) * V[1, 1];
            Console.WriteLine("y1 = {0} {1}", y1, R[0, 1]);
            double x100 = U[100, 0] * SVD.SingularValue(0) * V[0, 0] + U[100, 1] * SVD.SingularValue(1) * V[0, 1];
            Console.WriteLine("x100 = {0} {1}", x100, R[100, 0]);
            double y100 = U[100, 0] * SVD.SingularValue(0) * V[1, 0] + U[100, 1] * SVD.SingularValue(1) * V[1, 1];
            Console.WriteLine("y100 = {0} {1}", y100, R[100, 1]);

            ColumnVector d1 = U[0,0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                U[0, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);
            Console.WriteLine("d1 = ({0} {1})", d1[0], d1[1]);
            ColumnVector d100 = U[100, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                U[100, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);
            Console.WriteLine("d100 = ({0} {1})", d100[0], d100[1]);

            Console.WriteLine("compare");
            MultivariateSample RS = PCA.TransformedSample();
            IEnumerator<double[]> RSE = RS.GetEnumerator();
            RSE.MoveNext();
            double[] dv1 = RSE.Current;
            Console.WriteLine("{0} {1}", dv1[0], dv1[1]);
            Console.WriteLine("{0} {1}", U[0, 0], U[0, 1]);
            RSE.Dispose();
        }
        public void BivariatePolynomialRegression()
        {
            // do a set of polynomial regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as claimed

            Random rng = new Random(271828);

            // define logistic parameters
            double[] a = new double[] { 0.0, -1.0, 2.0, -3.0 };

            // keep track of sample of returned a and b fit parameters
            MultivariateSample A = new MultivariateSample(a.Length);

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            SymmetricMatrix C = new SymmetricMatrix(a.Length);

            // also keep track of test statistics
            Sample F = new Sample();

            // do 100 fits
            for (int k = 0; k < 100; k++) {

                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                Distribution xd = new CauchyDistribution();
                Distribution nd = new NormalDistribution(0.0, 4.0);

                // generate a synthetic data set
                BivariateSample s = new BivariateSample();
                for (int j = 0; j < 20; j++) {
                    double x = xd.GetRandomValue(rng);
                    double y = nd.GetRandomValue(rng);
                    for (int i = 0; i < a.Length; i++) {
                        y += a[i] * MoreMath.Pow(x, i);
                    }
                    s.Add(x, y);
                }

                // do the regression
                FitResult r = s.PolynomialRegression(a.Length - 1);

                ColumnVector ps = r.Parameters;
                //Console.WriteLine("{0} {1} {2}", ps[0], ps[1], ps[2]);

                // record best fit parameters
                A.Add(ps);

                // record estimated covariances
                C += r.CovarianceMatrix;

                // record the fit statistic
                F.Add(r.GoodnessOfFit.Statistic);
                //Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic);

            }

            C = (1.0 / A.Count) * C; // allow matrix division by real numbers

            // check that mean parameter estimates are what they should be: the underlying population parameters
            for (int i = 0; i < A.Dimension; i++) {
                Console.WriteLine("{0} {1}", A.Column(i).PopulationMean, a[i]);
                Assert.IsTrue(A.Column(i).PopulationMean.ConfidenceInterval(0.95).ClosedContains(a[i]));
            }

            // check that parameter covarainces are what they should be: the reported covariance estimates
            for (int i = 0; i < A.Dimension; i++) {
                for (int j = i; j < A.Dimension; j++) {
                    Console.WriteLine("{0} {1} {2} {3}", i, j, C[i, j], A.TwoColumns(i, j).PopulationCovariance);
                    Assert.IsTrue(A.TwoColumns(i, j).PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(C[i, j]));
                }
            }

            // check that F is distributed as it should be
            //Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability);
        }
        public void MultivariateLinearRegressionBadInputTest()
        {
            // create a sample
            MultivariateSample sample = new MultivariateSample(3);
            sample.Add(1, 2, 3);
            sample.Add(2, 3, 4);

            // try to predict with too little data
            try {
                sample.LinearRegression(2);
                Assert.IsTrue(false);
            } catch (InvalidOperationException) {
                Assert.IsTrue(true);
            }

            // add enough data
            sample.Add(3, 4, 5);
            sample.Add(4, 5, 6);

            // try to predict a non-existent variable
            try {
                sample.LinearRegression(-1);
                Assert.IsTrue(false);
            } catch (ArgumentOutOfRangeException) {
                Assert.IsTrue(true);
            }

            try {
                sample.LinearRegression(3);
                Assert.IsTrue(false);
            } catch (ArgumentOutOfRangeException) {
                Assert.IsTrue(true);
            }
        }