예제 #1
0
        public static double TrimmedVariance(IDictionary <string, double[]> scoresByChr, double trim = 0.025)
        {
            int n = 0;

            foreach (string chr in scoresByChr.Keys)
            {
                n += scoresByChr[chr].Length;
            }
            double[] diff = new double[n - 1];
            int      i    = 0;
            double   last = Double.NaN;

            foreach (string chr in scoresByChr.Keys)
            {
                if (scoresByChr[chr].Length <= 0)
                {
                    continue;
                }
                if (i > 0)
                {
                    diff[i] = scoresByChr[chr][0] - last;
                    i++;
                }
                Array.Copy(Helper.Diff(scoresByChr[chr]), 0, diff, i, scoresByChr[chr].Length - 1);
                i   += (scoresByChr[chr].Length - 1);
                last = scoresByChr[chr][scoresByChr[chr].Length - 1];
            }
            int nKeep = Convert.ToInt32(Math.Round((1 - 2 * trim) * (n - 1)));

            // R code: inflfact(trim)*sum((sort(abs(diff(genomdat)))[1:n.keep])^2 / (2*n.keep))

            Helper.InplaceAbs(diff);
            Array.Sort(diff);

            return(ChangePoint.InflationFactor(trim) * Helper.PartialSumOfPowers(diff, 2, 0, nKeep) / (2 * nKeep));
        }
예제 #2
0
        /// <summary>
        /// CBS: circular binary segmentation porting the R function segment in DNAcopy
        /// </summary>
        /// <param name="alpha">Now in this.Alpha</param>
        /// <param name="nPerm"></param>
        /// <param name="pMethod">"hybrid" or "perm"</param>
        /// <param name="minWidth"></param>
        /// <param name="kMax"></param>
        /// <param name="nMin"></param>
        /// <param name="eta"></param>
        /// <param name="sbdry"></param>
        /// <param name="trim"></param>
        /// <param name="undoSplit">"none" or "prune" or "sdundo"; now in this.UndoMethod</param>
        /// <param name="undoPrune"></param>
        /// <param name="undoSD"></param>
        /// <param name="verbose"></param>
        public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation, uint nPerm = 10000, string pMethod = "hybrid", int minWidth = 2, int kMax = 25,
                                                               uint nMin        = 200, double eta     = 0.05, uint[] sbdry = null, double trim = 0.025,
                                                               double undoPrune = 0.05, double undoSD = 3, int verbose     = 1)
        {
            if (minWidth < 2 || minWidth > 5)
            {
                Console.Error.WriteLine("Minimum segment width should be between 2 and 5");
                Environment.Exit(1);
            }
            if (nMin < 4 * kMax)
            {
                Console.Error.WriteLine("nMin should be >= 4 * kMax");
                Environment.Exit(1);
            }
            if (sbdry == null)
            {
                GetBoundary.ComputeBoundary(nPerm, this._alpha, eta, out sbdry);
            }

            Dictionary <string, int[]>    inaByChr          = new Dictionary <string, int[]>();
            Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>();

            List <ThreadStart> tasks = new List <ThreadStart>();

            foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    string chr = scoreByChrKVP.Key;
                    int[] ina;
                    Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf

                    double[] scores;
                    if (ina.Length == scoreByChrKVP.Value.Length)
                    {
                        scores = scoreByChrKVP.Value;
                    }
                    else
                    {
                        Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                    }

                    lock (finiteScoresByChr)
                    {
                        finiteScoresByChr[chr] = scores;
                        inaByChr[chr]          = ina;
                    }
                }));
            }
            Parallel.ForEach(tasks, task => task.Invoke());

            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = 0;

            foreach (var list in finiteScoresByChr.Values)
            {
                n += list.Length;
            }
            if (n == 0)
            {
                return(new Dictionary <string, Segmentation.Segment[]>());
            }

            double trimmedSD = Math.Sqrt(ChangePoint.TrimmedVariance(finiteScoresByChr, trim: trim));

            Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>();

            // when parallelizing we need an RNG for each chromosome to get deterministic results
            Random seedGenerator = new MersenneTwister(0);
            Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>();

            foreach (string chr in segmentation.ScoreByChr.Keys)
            {
                perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true);
            }

            tasks = new List <ThreadStart>();
            foreach (string chr in segmentation.ScoreByChr.Keys)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    int[] ina = inaByChr[chr];
                    int[] lengthSeg;
                    double[] segmentMeans;
                    ChangePoint.ChangePoints(segmentation.ScoreByChr[chr], sbdry, out lengthSeg, out segmentMeans, perChromosomeRandom[chr],
                                             dataType: "logratio", alpha: this._alpha, nPerm: nPerm,
                                             pMethod: pMethod, minWidth: minWidth, kMax: kMax, nMin: nMin, trimmedSD: trimmedSD,
                                             undoSplits: this._undoMethod, undoPrune: undoPrune, undoSD: undoSD, verbose: verbose);

                    Segmentation.Segment[] segments = new Segmentation.Segment[lengthSeg.Length];
                    int cs1 = 0, cs2 = -1; // cumulative sum
                    for (int i = 0; i < lengthSeg.Length; i++)
                    {
                        cs2              += lengthSeg[i];
                        int start         = ina[cs1];
                        int end           = ina[cs2];
                        segments[i]       = new Segmentation.Segment();
                        segments[i].start = segmentation.StartByChr[chr][start]; // Genomic start
                        segments[i].end   = segmentation.EndByChr[chr][end];     // Genomic end
                        cs1              += lengthSeg[i];
                    }

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }));
            }

            Parallel.ForEach(tasks, task => task.Invoke());
            // segmentation.SegmentationResults = new Segmentation.GenomeSegmentationResults(segmentByChr);
            Console.WriteLine("{0} Completed CBS tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }
예제 #3
0
        /// <summary>
        /// Outputs:
        ///     lengthSeg
        ///     segmentMeans
        /// </summary>
        /// <param name="genomeData"></param>
        /// <param name="sbdry"></param>
        /// <param name="lengthSeg">segment lengths</param>
        /// <param name="segmentMeans">segment means</param>
        /// <param name="dataType">"logratio" or "binary"</param>
        /// <param name="alpha"></param>
        /// <param name="nPerm"></param>
        /// <param name="pMethod"></param>
        /// <param name="minWidth"></param>
        /// <param name="kMax"></param>
        /// <param name="nMin"></param>
        /// <param name="trimmedSD"></param>
        /// <param name="undoSplits">"none" or "prune" or "sdundo"</param>
        /// <param name="undoPrune"></param>
        /// <param name="undoSD"></param>
        /// <param name="verbose"></param>
        /// <param name="nGrid"></param>
        /// <param name="tol"></param>
        public static void ChangePoints(double[] genomeData, uint[] sbdry,
                                        out int[] lengthSeg, out double[] segmentMeans, Random rnd,
                                        string dataType  = "logratio", double alpha = 0.01,
                                        uint nPerm       = 10000, string pMethod    = "hybrid", int minWidth          = 2, int kMax = 25,
                                        uint nMin        = 200, double trimmedSD    = -1, SegmentSplitUndo undoSplits = SegmentSplitUndo.None,
                                        double undoPrune = 0.05, double undoSD      = 3, int verbose                  = 1,
                                        int nGrid        = 100, double tol = 1E-6)
        {
            if (trimmedSD <= 0)
            {
                trimmedSD = Helper.MedianAbsoluteDeviation(Helper.Diff(genomeData)) / Math.Sqrt(2);
            }
            // start with the whole
            List <int> segEnd = new List <int>();

            segEnd.Add(0);                 // inclusive
            segEnd.Add(genomeData.Length); // exclusive
            int        k = segEnd.Count;
            List <int> changeLocations = new List <int>();
            int        nChangePoints   = 0;

            int[] iChangePoint = null;
            while (k > 1)
            {
                int currentN = segEnd[k - 1] - segEnd[k - 2];
                if (verbose >= 3)
                {
                    Console.Write(".... current segment: {0} - {1} \n", segEnd[k - 2] + 1, segEnd[k - 1]);
                }
                if (currentN >= 2 * minWidth)
                {
                    double[] currentGenomeData = new double[currentN];
                    Array.Copy(genomeData, segEnd[k - 2], currentGenomeData, 0, currentN);
                    // check whether hybrid method needs to be used
                    bool   hybrid = false;
                    double delta  = 0.0;
                    if (pMethod.Equals("hybrid") && nMin < currentN)
                    {
                        hybrid = true;
                        delta  = (kMax + 1.0) / currentN;
                    }

                    // if all values of current.genomdat are the same don't segment
                    if (currentGenomeData.Max() == currentGenomeData.Min())
                    {
                        nChangePoints = 0;
                    }
                    else
                    {
                        // centering the current data will save a lot of computations later
                        double currentAverage = currentGenomeData.Average();
                        Helper.InplaceSub(currentGenomeData, currentAverage);
                        // need total sum of squares too
                        double currentTSS = Helper.WeightedSumOfSquares(currentGenomeData, null);
                        ChangePoint.FindChangePoints(currentGenomeData, currentTSS, nPerm, alpha,
                                                     out nChangePoints, out iChangePoint, dataType.Equals("binary"),
                                                     hybrid, minWidth, kMax, delta, nGrid, sbdry, tol, rnd);
                    }
                }
                else
                {
                    nChangePoints = 0;
                }
                // Save the change location
                // segEnd[k - 1] will be removed when nChangePoints == 0
                if (nChangePoints == 0)
                {
                    changeLocations.Add(segEnd[k - 1]);
                }
                // Offset iChangePoint by segEnd[k - 2]
                for (int i = 0; i < nChangePoints; i++)
                {
                    iChangePoint[i] += segEnd[k - 2];
                }
                switch (nChangePoints)      // switch by the number of change points
                {
                case 0:                     // no change point
                    segEnd.RemoveAt(k - 1); // Remove the last element
                    break;

                case 1:     // one change point
                    segEnd.Insert(k - 1, iChangePoint[0]);
                    break;

                case 2:     // two change points
                    segEnd.InsertRange(k - 1, iChangePoint);
                    break;

                default:
                    Console.Error.WriteLine("There should be 0, 1, or 2 change points");
                    break;
                }
                k = segEnd.Count;
                if (verbose >= 3)
                {
                    Console.Write(".... segments to go: {0} \n", String.Join(" ", segEnd));
                }
            }
            changeLocations.Reverse(); // changeLocations is no longer needed
            List <int> segEnds = changeLocations;
            int        nSeg    = segEnds.Count;

            segEnds.Insert(0, 0);
            lengthSeg = Helper.Diff(segEnds.ToArray());
            if (nSeg > 1)
            {
                if (undoSplits == SegmentSplitUndo.Prune)
                {
                    lengthSeg = ChangePointsPrune(genomeData, lengthSeg, changeCutoff: undoPrune);
                }
                if (undoSplits == SegmentSplitUndo.SDUndo)
                {
                    lengthSeg = ChangePointsSDUndo(genomeData, lengthSeg, trimmedSD, changeSD: undoSD);
                }
            }
            segmentMeans = new double[lengthSeg.Length];
            int ll = 0, uu = 0;

            for (int i = 0; i < lengthSeg.Length; i++)
            {
                uu += lengthSeg[i];
                // Works even if weights == null
                segmentMeans[i] = Helper.WeightedAverage(genomeData, null, iStart: ll, iEnd: uu);
                ll = uu;
            }
        }