public void TestDistributionFxn()
        {
            //trickier case, when we barely see it but we dont have enough reads...
            var binomialHetAltExpected = new MathNet.Numerics.Distributions.Binomial(0.20, 100);


            //sps you saw a variant at {15%,20%,25%}. is that real? given diploid expectations?
            double ChanceYouGetUpTo15 = binomialHetAltExpected.CumulativeDistribution(15); //should be about half the time
            double ChanceYouGetUpTo20 = binomialHetAltExpected.CumulativeDistribution(20); //should be about half the time
            double ChanceYouGetUpTo25 = binomialHetAltExpected.CumulativeDistribution(25); //should be about half the time

            Assert.Equal(ChanceYouGetUpTo15, 0.129, 3);
            Assert.Equal(ChanceYouGetUpTo20, 0.559, 3);
            Assert.Equal(ChanceYouGetUpTo25, 0.913, 3);
        }
Example #2
0
        /// <summary>
        ///     Assign a q-score for a genotype call.
        /// </summary>
        public static int Compute(CalledAllele allele, int minQScore, int maxQScore)
        {
            if (allele.TotalCoverage == 0)
            {
                return(minQScore);
            }

            Genotype calledGT = allele.Genotype;

            //parameters
            float noiseHomRef     = 0.05f;
            float noiseHomAlt     = 0.075f;
            float expectedHetFreq = 0.40f;              //a real 50% typically shows up at <50%, more like 40% or 45%
            float depth           = (float)allele.TotalCoverage;

            //distributions
            var poissonHomRefNoise     = new MathNet.Numerics.Distributions.Poisson(noiseHomRef * depth);
            var poissonHomAltNoise     = new MathNet.Numerics.Distributions.Poisson(noiseHomAlt * depth);
            var binomialHomAltExpected = new MathNet.Numerics.Distributions.Binomial(expectedHetFreq, allele.TotalCoverage);
            var nonAlleleCalls         = Math.Max(allele.TotalCoverage - allele.AlleleSupport, 0); //sanitize for funny insertion cases

            double LnPofH0GT = 0;                                                                  //H0 is the null hypothesis. The working assumption that the GT given to the allele is correct
            double LnPofH1GT = 0;                                                                  //H1 is the alternate hypothesis. The possibility that H0 is wrong, and the second-best GT was actually the right one


            //the GT Q model measures how much *more* likely H0 is than H1, given the observations.

            switch (calledGT)
            {
            case Genotype.HemizygousRef:
                LnPofH0GT = poissonHomRefNoise.ProbabilityLn(nonAlleleCalls);
                LnPofH1GT = binomialHomAltExpected.ProbabilityLn(nonAlleleCalls);
                break;

            case Genotype.HemizygousAlt:
                LnPofH0GT = poissonHomAltNoise.ProbabilityLn(nonAlleleCalls);
                LnPofH1GT = binomialHomAltExpected.ProbabilityLn(allele.AlleleSupport);
                break;


            default:
                return(minQScore);
            }

            var qScore = (int)Math.Floor(10.0 * Math.Log10(Math.E) * (LnPofH0GT - LnPofH1GT));

            return(Math.Max(Math.Min(qScore, maxQScore), minQScore));
        }
        public double TakeSamples()
        {
            var dateTimeElapsed = 0.0;
            var dateTime        = DateTime.Now;

            if (this.DistributionName == "Binomial")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}-Trials:{TrialsNumber}";

                var binomaial        = new MathNet.Numerics.Distributions.Binomial(0.5, this.TrialsNumber);
                var generatedsamples = binomaial.Samples().Take(SamplesNumber).ToArray();
            }
            else if (this.DistributionName == "Geometric")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}";

                var geometric        = new MathNet.Numerics.Distributions.Geometric(0.5);
                var generatedsamples = geometric.Samples().Take(SamplesNumber).ToArray();
            }
            else if (this.DistributionName == "Poisson")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}";

                var poisson          = new MathNet.Numerics.Distributions.Poisson(0.5);
                var generatedsamples = poisson.Samples().Take(SamplesNumber).ToArray();
            }
            else if (this.DistributionName == "Normal")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}";

                var normal           = new MathNet.Numerics.Distributions.Normal(0.5, 2);
                var generatedsamples = normal.Samples().Take(SamplesNumber).ToArray();
            }

            dateTimeElapsed = (DateTime.Now - dateTime).TotalMilliseconds;
            return(dateTimeElapsed);
        }
Example #4
0
        public static void PopulateDiploidStats(StrandBiasStats stats, double noiseFreq, double minDetectableSNP)
        {
            //expectation we ought to see the 20% variant on this strand:

            //save ourself some time here..
            if (stats.Frequency >= minDetectableSNP)
            {
                stats.ChanceFalseNeg = 1; // TP if we called it
                stats.ChanceFalsePos = 0; //FP if we called if
                stats.ChanceVarFreqGreaterThanZero = 1;
                return;
            }

            //trickier case, when we barely see it but we dont have enough reads...
            var binomialHetAltExpected = new MathNet.Numerics.Distributions.Binomial(minDetectableSNP, (int)stats.Coverage);

            //this is a real variant ( a false neg if we filtered it)
            stats.ChanceFalseNeg = Math.Max(binomialHetAltExpected.CumulativeDistribution(stats.Support), 0); //if this was a het variant, would we ever see it this low?

            //chance this is due to noise ( a false pos if we left it in)
            stats.ChanceFalsePos = Math.Max(0.0, 1 - Poisson.Cdf(stats.Support, stats.Coverage * 0.1)); //chance this varaint is due to noise, we could see this much or more

            stats.ChanceVarFreqGreaterThanZero = stats.ChanceFalseNeg;
        }
        /// <summary>
        ///     Assign a q-score for a genotoype call.
        /// </summary>
        public static int Compute(CalledAllele allele, int minQScore, int maxQScore)
        {
            if (allele.TotalCoverage == 0)
            {
                return(minQScore);
            }

            Genotype calledGT = allele.Genotype;

            //parameters
            float noiseHomRef     = 0.05f;
            float noiseHomAlt     = 0.075f;
            float noiseHetAlt     = 0.10f;
            float expectedHetFreq = 0.40f;  //a ref 50% typically shows up at <50%, more like 40% or 45%
            float depth           = (float)allele.TotalCoverage;
            float support         = (float)allele.AlleleSupport;

            //distributions
            var poissonHomRefNoise     = new MathNet.Numerics.Distributions.Poisson(noiseHomRef * depth);
            var poissonHomAltNoise     = new MathNet.Numerics.Distributions.Poisson(noiseHomAlt * depth);
            var binomialHomAltExpected = new MathNet.Numerics.Distributions.Binomial(expectedHetFreq, allele.TotalCoverage);
            var binomialHomRefNoise    = new MathNet.Numerics.Distributions.Binomial(noiseHetAlt, allele.TotalCoverage);
            var binomialHomAltNoise    = new MathNet.Numerics.Distributions.Binomial((1 - noiseHetAlt), allele.TotalCoverage);
            var nonAlleleCalls         = Math.Max(allele.TotalCoverage - allele.AlleleSupport, 0); //sanitize for funny insertion cases

            double LnPofH0GT = 0;                                                                  //H0 is the null hypothesis. The working assumption that the GT given to the allele is correct
            double LnPofH1GT = 0;                                                                  //H1 is the alternate hypothesis. The possibility that H0 is wrong, and the second-best GT was actually the right one


            //the GT Q model measures how much *more* likely H0 is than H1, given the observations.

            switch (calledGT)
            {
            case Genotype.HomozygousRef:
                LnPofH0GT = poissonHomRefNoise.ProbabilityLn(nonAlleleCalls);
                LnPofH1GT = binomialHomAltExpected.ProbabilityLn(nonAlleleCalls);
                break;

            case Genotype.HomozygousAlt:
                LnPofH0GT = poissonHomAltNoise.ProbabilityLn(nonAlleleCalls);
                LnPofH1GT = binomialHomAltExpected.ProbabilityLn(allele.AlleleSupport);
                break;

            case Genotype.HeterozygousAlt1Alt2:
            case Genotype.HeterozygousAltRef:
                if (allele.Frequency >= 0.50)
                {
                    //test alternate GT as being homAlt
                    LnPofH0GT = binomialHomAltExpected.ProbabilityLn((int)(depth * allele.Frequency));
                    LnPofH1GT = binomialHomAltNoise.ProbabilityLn((int)(depth * allele.Frequency));
                }
                else
                {       //test alternate GT as being homRef
                    LnPofH0GT = binomialHomAltExpected.ProbabilityLn((int)(depth * allele.Frequency));
                    LnPofH1GT = binomialHomRefNoise.ProbabilityLn((int)(depth * allele.Frequency));
                }
                break;

            default:
                return(minQScore);
            }

            //note, Ln(X)=Log10 (X) / Log10 (e).
            // ->
            //Log10(A)-Log10(B) = Log10 (e) (ln (A) - ln (B)) = Log10(A/B)

            /* for debugging..
             * var LogPofCalledGT = Math.Log10(Math.E) * (LnPofCalledGT);
             * var LogPofAltGT = Math.Log10(Math.E) * (LnPofAltGT);
             * Console.WriteLine(LogPofCalledGT);
             * Console.WriteLine(LogPofAltGT);
             */

            var qScore = (int)Math.Floor(10.0 * Math.Log10(Math.E) * (LnPofH0GT - LnPofH1GT));

            if ((LnPofH1GT <= int.MinValue) && (LnPofH0GT > LnPofH1GT)) //H1 infinitely more likely
            {
                return(maxQScore);
            }


            if ((LnPofH0GT <= int.MinValue) && (LnPofH0GT < LnPofH1GT)) //H0 infinitely more likely
            {
                return(minQScore);
            }



            return(Math.Max(Math.Min(qScore, maxQScore), minQScore));
        }
        public double TakeSamples()
        {
            var dateTimeElapsed = 0.0;
            var dateTime        = DateTime.Now;

            //IEnumerable<int> generatedSamplesEnumerable = Enumerable.Empty<int>();
            //IEnumerable<double> generatedSamplesDoubleEnumerable = Enumerable.Empty<double>();

            if (this.DistributionName == "Binomial")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}-Trials:{TrialsNumber}";

                var binomaial        = new MathNet.Numerics.Distributions.Binomial(0.5, this.TrialsNumber);
                var generatedsamples = binomaial.Samples().Take(SamplesNumber).ToArray();

                //generatedSamplesEnumerable = binomaial.Samples().Take(SamplesNumber);
                //foreach (var item in generatedSamplesEnumerable)
                //{
                //    var test = item;
                //}
            }
            else if (this.DistributionName == "Geometric")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}";

                var geometric        = new MathNet.Numerics.Distributions.Geometric(0.5);
                var generatedsamples = geometric.Samples().Take(SamplesNumber).ToArray();

                //generatedSamplesEnumerable = geometric.Samples().Take(SamplesNumber);
                //foreach (var item in generatedSamplesEnumerable)
                //{
                //    var test = item;
                //}
            }
            else if (this.DistributionName == "Poisson")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}";

                var poisson          = new MathNet.Numerics.Distributions.Poisson(0.5);
                var generatedsamples = poisson.Samples().Take(SamplesNumber).ToArray();

                //generatedSamplesEnumerable = poisson.Samples().Take(SamplesNumber);
                //foreach (var item in generatedSamplesEnumerable)
                //{
                //    var test = item;
                //}
            }
            else if (this.DistributionName == "Normal")
            {
                fullName = $"{DistributionName}-Samples:{SamplesNumber}";

                var normal           = new MathNet.Numerics.Distributions.Normal(0.5, 2);
                var generatedsamples = normal.Samples().Take(SamplesNumber).ToArray();

                //generatedSamplesDoubleEnumerable = normal.Samples().Take(SamplesNumber);
                //foreach(var item in generatedSamplesDoubleEnumerable)
                //{
                //    var test = item;
                //}
            }

            dateTimeElapsed = (DateTime.Now - dateTime).TotalMilliseconds;
            return(dateTimeElapsed);
        }
        public void LocateControls(Form form, ConsoleHandler console)
        {
            form.Text = "Задание № 1";
            form.SetDefaultVals(new System.Drawing.Size(800, 500));

            form.Controls.Add(BeautyfyForms.AddButton(" Суть ", new Point(370, 10), (o, k) =>
            {
                MessageBox.Show("Задача № 1. Игра с Геометрическим, Биномиальным и Клиновидным распределениями!");
            }));

            logGeometric = BeautyfyForms.AddListBox(new Point(10, 100), new Size(200, 450));
            form.Controls.Add(logGeometric);

            _geometric = BeautyfyForms.CreateTextBox(new Point(40, 420), true);
            form.Controls.Add(_geometric);

            logBinominal = BeautyfyForms.AddListBox(new Point(300, 100), new Size(200, 450));
            form.Controls.Add(logBinominal);

            _binominal = BeautyfyForms.CreateTextBox(new Point(340, 420), true);
            form.Controls.Add(_binominal);

            logWedgeShaped = BeautyfyForms.AddListBox(new Point(590, 100), new Size(200, 450));
            form.Controls.Add(logWedgeShaped);

            _wedgeShaped = BeautyfyForms.CreateTextBox(new Point(625, 420), true);
            form.Controls.Add(_wedgeShaped);

            form.Controls.Add(BeautyfyForms.AddButton("Геометрическое", new Point(30, 70), (o, k) =>
            {
                logGeometric.Items.Clear();
                Task.Run(() =>
                {
                    var listOfElements = new List <KeyValueItem>();
                    var geom           = new MathNet.Numerics.Distributions.Geometric(0.6);
                    for (int i = 0; i <= 1000; i++)
                    {
                        listOfElements.Add(new KeyValueItem(i, geom.Probability(i)));
                    }
                    var sum        = (from element in listOfElements.AsParallel() select element.Probability).Sum();
                    listOfElements = (from element in listOfElements.AsParallel() orderby element.Probability descending select element).ToList();
                    foreach (var item in listOfElements)
                    {
                        logGeometric.BeginInvoke(new MethodInvoker(() => logGeometric.Items.Add(item)));
                    }

                    double average = 0;
                    for (int i = 0; i < 100000; i++)
                    {
                        var search = geom.Sample();
                        for (int j = 0; j < listOfElements.Count; j++)
                        {
                            if (listOfElements[j].Key == search)
                            {
                                average += j;
                            }
                        }
                    }

                    average /= 100000;
                    _geometric.BeginInvoke(new MethodInvoker(() => _geometric.Text = Math.Round(average).ToString()));
                });
            }));

            form.Controls.Add(BeautyfyForms.AddButton("Биноминальное", new Point(335, 70), (o, k) =>
            {
                logBinominal.Items.Clear();
                Task.Run(() =>
                {
                    var listOfElements = new List <KeyValueItem>();
                    var binom          = new MathNet.Numerics.Distributions.Binomial(0.4, 1000);
                    for (int i = 0; i <= 1000; i++)
                    {
                        listOfElements.Add(new KeyValueItem(i, binom.Probability(i)));
                    }
                    listOfElements = (from element in listOfElements.AsParallel() orderby element.Probability descending select element).ToList();
                    foreach (var item in listOfElements)
                    {
                        logBinominal.BeginInvoke(new MethodInvoker(() => logBinominal.Items.Add(item)));
                    }

                    long average = 0;

                    for (int i = 0; i < 100000; i++)
                    {
                        var search = binom.Sample();
                        for (int j = 0; j < listOfElements.Count; j++)
                        {
                            if (listOfElements[j].Key == search)
                            {
                                average += j;
                            }
                        }
                    }

                    average /= 100000;
                    _binominal.BeginInvoke(new MethodInvoker(() => _binominal.Text = Math.Round((decimal)average).ToString()));
                });
            }));

            form.Controls.Add(BeautyfyForms.AddButton("Клиновидное", new Point(630, 70), (o, k) =>
            {
                logWedgeShaped.Items.Clear();
                Task.Run(() =>
                {
                    var listOfElements = new List <KeyValueItem>();
                    Random random      = new Random((int)DateTime.Now.ToBinary());
                    int N = 1000;
                    for (int i = 0; i < 1000; i++)
                    {
                        listOfElements.Add(new KeyValueItem(i, (N - i) * (2.0 / (N * (N + 1)))));
                    }
                    var sum        = (from element in listOfElements.AsParallel() select element.Probability).Sum();
                    listOfElements = (from element in listOfElements.AsParallel() orderby element.Probability descending select element).ToList();
                    foreach (var item in listOfElements)
                    {
                        logWedgeShaped.BeginInvoke(new MethodInvoker(() => logWedgeShaped.Items.Add(item)));
                    }

                    double average = 0;

                    for (int i = 0; i < 100000; i++)
                    {
                        var p      = (N - random.Next(0, 1000)) * (2.0 / (N * (N + 1)));
                        var search = -(1 / 2.0) * N * (N * p + p - 2);
                        for (int j = 0; j < listOfElements.Count; j++)
                        {
                            if (listOfElements[j].Key == search)
                            {
                                average += j;
                            }
                        }
                    }
                    average /= 100000;
                    _wedgeShaped.BeginInvoke(new MethodInvoker(() => _wedgeShaped.Text = Math.Round(average).ToString()));
                });
            }));
        }
Example #8
0
        public void TestRandomisingAlgorithm()
        {
            var moqRand = new Mock <IRandom>(MockBehavior.Strict);
            var rnd     = new Random();

            moqRand.Setup(m => m.NextDouble()).Returns(rnd.NextDouble);
            int minAllocations = int.MaxValue;

            foreach (var r in GetRatio())
            {
                const int maxRptIncr   = 3;
                const int sameBlockRpt = 1000;
                for (int incr = 0; incr < maxRptIncr; incr++)
                {
                    var allArms     = new List <List <RandomisationArm> >(sameBlockRpt);
                    var rClone      = r.Clone((byte)(r.Repeats + incr));
                    int allocations = rClone.TotalBlockSize();
                    if (allocations < minAllocations)
                    {
                        minAllocations = allocations;
                    }
                    foreach (var dummy in Enumerable.Repeat(0, sameBlockRpt))
                    {
                        var arms = new List <RandomisationArm>(allocations);
                        while (arms.Count < allocations)
                        {
                            var newArm = BlockRandomisation.NextAllocation(arms, rClone, moqRand.Object);
                            arms.Add(newArm);
                        }
                        CollectionAssert.AreEquivalent(rClone.ToList(), arms);

                        allArms.Add(arms);
                    }
                    //keep this simple 2 arms, block size 4 - total permutations = 2^2, block size 6 = 6*5*4
                    //block 3 arms, block size 6 - permutations = 6*5 + 4*3
                    //blokk 3 arms, 2:1:1 size 8 = 8*7*6*5 + 4*3
                    //

                    var ocurrences = new Counter <IList <RandomisationArm> >(new EnumListEqualityComparer <RandomisationArm>());
                    foreach (var block in allArms)
                    {
                        ocurrences[block]++;
                    }

                    double permutations = SpecialFunctions.Factorial(allocations)
                                          /
                                          rClone.Ratios.Values.Aggregate(1, (prev, cur) => (int)(prev * SpecialFunctions.Factorial(cur * rClone.Repeats)));

                    double       expectedOccurence = sameBlockRpt / permutations;
                    string       assertMessage     = null;
                    const double accceptableP      = 0.00001;
                    var          bin = new MathNet.Numerics.Distributions.Binomial(1 / permutations, allArms.Count);
                    foreach (var kv in ocurrences)
                    {
                        string msg;
                        double p;
                        if (kv.Value > expectedOccurence)
                        {
                            p   = 1 - bin.CumulativeDistribution(kv.Value - 1);
                            msg = "(as or more extreme)";
                        }
                        else
                        {
                            p   = bin.CumulativeDistribution(kv.Value);
                            msg = "(as or less extreme)";
                        }
                        msg = String.Format("{{{0}}} occured {1} times: p {2:N4} " + msg,
                                            string.Join(",", kv.Key.Select(a => ((int)a).ToString())),
                                            kv.Value,
                                            p);
                        Console.WriteLine(msg);
                        if (p < accceptableP)
                        {
                            assertMessage = msg;
                        }
                    }
                    int unusedPermutations = (int)permutations - ocurrences.Count;
                    var cc = ocurrences.CountOfCounts();
                    cc[0] = unusedPermutations;
                    var mean     = (double)cc.Sum(kv => kv.Key * kv.Value) / permutations;
                    var variance = (double)cc.Sum(kv => Math.Pow(kv.Key - mean, 2) * kv.Value) / permutations;
                    Console.WriteLine("mean: {0} variance: {1}", mean, variance);
                    if (unusedPermutations > 0)
                    {
                        //double p0 = MathNet.Numerics.Distributions.Binomial.PMF(unusedPermutations / permutations, (int)permutations * sameBlockRpt, 0);
                        double lambda = sameBlockRpt / permutations;
                        double p0     = MathNet.Numerics.Distributions.Poisson.PMF(lambda, 0);
                        double z      = (unusedPermutations / sameBlockRpt - p0) / Math.Sqrt(lambda);
                        string msg    = String.Format("{0} other permutations were never used, p {1:N4}",
                                                      unusedPermutations,
                                                      p0);
                        Console.WriteLine(msg);
                    }

                    /*
                     * //to do - get poisson deviance and pearson statistic for binomial in order to validate gof
                     * var pBin = MathNet.Numerics.Distributions.ChiSquared.CDF(cc.Count,binCor);
                     * var poi = new MathNet.Numerics.Distributions.Poisson(1 / permutations);
                     * //
                     * var pPoi = MathNet.Numerics.Distributions.ChiSquared.CDF(cc.Count, poiCor);
                     * */
                    if (assertMessage != null)
                    {
                        throw new AssertFailedException("unacceptably low probability " + assertMessage);
                    }
                }
            }
        }