Exemple #1
0
        /// <summary>
        /// tail probability of circular binary segmentation statistic
        /// from Siegmund (1988) or Yao (1989) paper
        /// </summary>
        /// <param name="b"></param>
        /// <param name="delta"></param>
        /// <param name="m"></param>
        /// <param name="nGrid"></param>
        /// <param name="tol"></param>
        /// <returns></returns>
        public static double TailP(double b, double delta, int m, int nGrid, double tol)
        {
            double t, tl, dincr, bsqrtm, x, nux;

            dincr  = (0.5 - delta) / nGrid;
            bsqrtm = b / Math.Sqrt(m);

            tl = 0.5 - dincr;
            t  = 0.5 - 0.5 * dincr;
            double tailP = 0.0;

            for (int i = 0; i < nGrid; i++)
            {
                tl    = tl + dincr;
                t     = t + dincr;
                x     = bsqrtm / Math.Sqrt(t * (1 - t));
                nux   = TailProbability.Nu(x, tol);
                tailP = tailP + Math.Pow(nux, 2) * TailProbability.IntegralInvT1tSq(tl, dincr);
            }
            tailP = 9.973557E-2 * Math.Pow(b, 3) * Math.Exp(-Math.Pow(b, 2) / 2) * tailP;
            // since test is two-sided need to multiply tailp by 2
            tailP = 2.0 * tailP;

            return(tailP);
        }
Exemple #2
0
        /// <summary>
        /// Fortran subroutine fndcpt.
        /// Ternary segmentation with permutation reference distribution
        /// </summary>
        /// <param name="genomeData"></param>
        /// <param name="totalSumOfSquares"></param>
        /// <param name="nPerm"></param>
        /// <param name="cutoffPValue"></param>
        /// <param name="nChangePoints"></param>
        /// <param name="iChangePoint"></param>
        /// <param name="isBinary"></param>
        /// <param name="hybrid"></param>
        /// <param name="al0"></param>
        /// <param name="hk"></param>
        /// <param name="delta"></param>
        /// <param name="nGrid"></param>
        /// <param name="sbdry"></param>
        /// <param name="tol"></param>
        private static void FindChangePoints(double[] genomeData, double totalSumOfSquares, uint nPerm,
                                             double cutoffPValue, out int nChangePoints, out int[] iChangePoint, bool isBinary,
                                             bool hybrid, int al0, int hk, double delta, int nGrid, uint[] sbdry, double tol, Random rnd)
        {
            double[] px = new double[genomeData.Length]; // permuted genomeData
            double[] sx = new double[genomeData.Length];
            iChangePoint = new int[2];                   // up to 2 change points

            // nrej: # of non-rejected tests
            // nrejc: # of non-rejected tests cutoff
            int np, nrej, nrejc, n1, n2, n12, l, k;

            int[] iseg = new int[2];
            // segment lengths: iseg[0], iseg[1] - iseg[0], genomeData.Length - iseg[1]
            double ostat, ostat1, pstat, tPValue, pValue1, pValue2;

            nrej          = 0;
            nChangePoints = 0;

            CBSTStatistic.TMaxO(genomeData, totalSumOfSquares, sx, iseg, out ostat, al0, isBinary);
            ostat1 = Math.Sqrt(ostat);
            ostat *= 0.99999;
            // if maximal t-statistic is too small (for now use 0.1) don't split
            if (ostat1 <= 0.1)
            {
                return;
            }                              // call rndend() before return?
            // if maximal t-statistic is too large (for now use 7.0) split
            // also make sure it's not affected by outliers i.e. small seglength
            l = Math.Min(iseg[1] - iseg[0], genomeData.Length - iseg[1] + iseg[0]);
            if (!((ostat1 >= 7.0) && (l >= 10)))
            {
                // o.w calculate p-value and decide if & how data are segmented
                if (hybrid)
                {
                    pValue1 = TailProbability.TailP(ostat1, delta, genomeData.Length, nGrid, tol);
                    if (pValue1 > cutoffPValue)
                    {
                        return;
                    }                                       // pValue1 is the lower bound
                    pValue2 = cutoffPValue - pValue1;
                    nrejc   = (int)(pValue2 * nPerm);
                    k       = nrejc * (nrejc + 1) / 2 + 1;
                    for (np = 1; np <= nPerm; np++)
                    {
                        XPerm(genomeData, px, rnd);
                        pstat = CBSTStatistic.HTMaxP(hk, totalSumOfSquares, px, sx, al0, isBinary);
                        if (ostat <= pstat)
                        {
                            nrej++;
                            k++;
                        }
                        if (nrej > nrejc)
                        {
                            return;
                        }
                        if (np >= sbdry[k - 1])
                        {
                            break;
                        }
                    }
                }
                else
                {
                    nrejc = (int)(cutoffPValue * nPerm);
                    k     = nrejc * (nrejc + 1) / 2 + 1;
                    for (np = 1; np <= nPerm; np++)
                    {
                        XPerm(genomeData, px, rnd);
                        pstat = CBSTStatistic.TMaxP(totalSumOfSquares, px, sx, al0, isBinary);
                        if (ostat <= pstat)
                        {
                            nrej++;
                            k++;
                        }
                        if (nrej > nrejc)
                        {
                            return;
                        }
                        if (np >= sbdry[k - 1])
                        {
                            break;
                        }
                    }
                }
            }
            // 200
            if (iseg[1] == genomeData.Length) // The second change point is the right boundary
            {
                nChangePoints   = 1;
                iChangePoint[0] = iseg[0];
            }
            else
            {
                if (iseg[0] == 0) // The first change point is the left boundary
                {
                    nChangePoints   = 1;
                    iChangePoint[0] = iseg[1];
                }
                else
                {
                    l   = 0;
                    n1  = iseg[0];
                    n12 = iseg[1];
                    n2  = n12 - n1;
                    // |-- n1 = iseg[0] --|-- n2 = n12 - n1 --|
                    // |--            n12 = iseg[1]         --|
                    tPValue = CBSTStatistic.TPermP(n1, n2, n12, genomeData, l, px, nPerm, rnd);
                    if (tPValue <= cutoffPValue)
                    {
                        nChangePoints   = 1;
                        iChangePoint[0] = iseg[0];
                    }
                    l   = iseg[0];
                    n12 = genomeData.Length - iseg[0];
                    n2  = genomeData.Length - iseg[1];
                    n1  = n12 - n2;
                    // |-- n1 = n12 - n2 --|-- n2 = n - iseg[1] --|
                    // |--         n12 = n - iseg[0]            --|
                    tPValue = CBSTStatistic.TPermP(n1, n2, n12, genomeData, l, px, nPerm, rnd);
                    if (tPValue <= cutoffPValue)
                    {
                        nChangePoints++;
                        iChangePoint[nChangePoints - 1] = iseg[1];
                    }
                }
            }
            // 500
        }