/// <summary> /// tail probability of circular binary segmentation statistic /// from Siegmund (1988) or Yao (1989) paper /// </summary> /// <param name="b"></param> /// <param name="delta"></param> /// <param name="m"></param> /// <param name="nGrid"></param> /// <param name="tol"></param> /// <returns></returns> public static double TailP(double b, double delta, int m, int nGrid, double tol) { double t, tl, dincr, bsqrtm, x, nux; dincr = (0.5 - delta) / nGrid; bsqrtm = b / Math.Sqrt(m); tl = 0.5 - dincr; t = 0.5 - 0.5 * dincr; double tailP = 0.0; for (int i = 0; i < nGrid; i++) { tl = tl + dincr; t = t + dincr; x = bsqrtm / Math.Sqrt(t * (1 - t)); nux = TailProbability.Nu(x, tol); tailP = tailP + Math.Pow(nux, 2) * TailProbability.IntegralInvT1tSq(tl, dincr); } tailP = 9.973557E-2 * Math.Pow(b, 3) * Math.Exp(-Math.Pow(b, 2) / 2) * tailP; // since test is two-sided need to multiply tailp by 2 tailP = 2.0 * tailP; return(tailP); }
/// <summary> /// Fortran subroutine fndcpt. /// Ternary segmentation with permutation reference distribution /// </summary> /// <param name="genomeData"></param> /// <param name="totalSumOfSquares"></param> /// <param name="nPerm"></param> /// <param name="cutoffPValue"></param> /// <param name="nChangePoints"></param> /// <param name="iChangePoint"></param> /// <param name="isBinary"></param> /// <param name="hybrid"></param> /// <param name="al0"></param> /// <param name="hk"></param> /// <param name="delta"></param> /// <param name="nGrid"></param> /// <param name="sbdry"></param> /// <param name="tol"></param> private static void FindChangePoints(double[] genomeData, double totalSumOfSquares, uint nPerm, double cutoffPValue, out int nChangePoints, out int[] iChangePoint, bool isBinary, bool hybrid, int al0, int hk, double delta, int nGrid, uint[] sbdry, double tol, Random rnd) { double[] px = new double[genomeData.Length]; // permuted genomeData double[] sx = new double[genomeData.Length]; iChangePoint = new int[2]; // up to 2 change points // nrej: # of non-rejected tests // nrejc: # of non-rejected tests cutoff int np, nrej, nrejc, n1, n2, n12, l, k; int[] iseg = new int[2]; // segment lengths: iseg[0], iseg[1] - iseg[0], genomeData.Length - iseg[1] double ostat, ostat1, pstat, tPValue, pValue1, pValue2; nrej = 0; nChangePoints = 0; CBSTStatistic.TMaxO(genomeData, totalSumOfSquares, sx, iseg, out ostat, al0, isBinary); ostat1 = Math.Sqrt(ostat); ostat *= 0.99999; // if maximal t-statistic is too small (for now use 0.1) don't split if (ostat1 <= 0.1) { return; } // call rndend() before return? // if maximal t-statistic is too large (for now use 7.0) split // also make sure it's not affected by outliers i.e. small seglength l = Math.Min(iseg[1] - iseg[0], genomeData.Length - iseg[1] + iseg[0]); if (!((ostat1 >= 7.0) && (l >= 10))) { // o.w calculate p-value and decide if & how data are segmented if (hybrid) { pValue1 = TailProbability.TailP(ostat1, delta, genomeData.Length, nGrid, tol); if (pValue1 > cutoffPValue) { return; } // pValue1 is the lower bound pValue2 = cutoffPValue - pValue1; nrejc = (int)(pValue2 * nPerm); k = nrejc * (nrejc + 1) / 2 + 1; for (np = 1; np <= nPerm; np++) { XPerm(genomeData, px, rnd); pstat = CBSTStatistic.HTMaxP(hk, totalSumOfSquares, px, sx, al0, isBinary); if (ostat <= pstat) { nrej++; k++; } if (nrej > nrejc) { return; } if (np >= sbdry[k - 1]) { break; } } } else { nrejc = (int)(cutoffPValue * nPerm); k = nrejc * (nrejc + 1) / 2 + 1; for (np = 1; np <= nPerm; np++) { XPerm(genomeData, px, rnd); pstat = CBSTStatistic.TMaxP(totalSumOfSquares, px, sx, al0, isBinary); if (ostat <= pstat) { nrej++; k++; } if (nrej > nrejc) { return; } if (np >= sbdry[k - 1]) { break; } } } } // 200 if (iseg[1] == genomeData.Length) // The second change point is the right boundary { nChangePoints = 1; iChangePoint[0] = iseg[0]; } else { if (iseg[0] == 0) // The first change point is the left boundary { nChangePoints = 1; iChangePoint[0] = iseg[1]; } else { l = 0; n1 = iseg[0]; n12 = iseg[1]; n2 = n12 - n1; // |-- n1 = iseg[0] --|-- n2 = n12 - n1 --| // |-- n12 = iseg[1] --| tPValue = CBSTStatistic.TPermP(n1, n2, n12, genomeData, l, px, nPerm, rnd); if (tPValue <= cutoffPValue) { nChangePoints = 1; iChangePoint[0] = iseg[0]; } l = iseg[0]; n12 = genomeData.Length - iseg[0]; n2 = genomeData.Length - iseg[1]; n1 = n12 - n2; // |-- n1 = n12 - n2 --|-- n2 = n - iseg[1] --| // |-- n12 = n - iseg[0] --| tPValue = CBSTStatistic.TPermP(n1, n2, n12, genomeData, l, px, nPerm, rnd); if (tPValue <= cutoffPValue) { nChangePoints++; iChangePoint[nChangePoints - 1] = iseg[1]; } } } // 500 }