Exemple #1
0
        public static SegmentationInput.Segment[] DeriveSegments(List <int> breakpoints, int segmentsLength, uint[] startByChr, uint[] endByChr)
        {
            var startBreakpointsPos = new List <int>();
            var endBreakpointPos    = new List <int>();
            var lengthSeg           = new List <int>();

            if (breakpoints.Count >= 2 && segmentsLength > 10)
            {
                if (breakpoints[0] != 0)
                {
                    breakpoints.Insert(0, 0);
                }
                startBreakpointsPos.Add(breakpoints[0]);
                endBreakpointPos.Add(breakpoints[1] - 1);
                lengthSeg.Add(breakpoints[1] - 1);
                for (int i = 1; i < breakpoints.Count - 1; i++)
                {
                    startBreakpointsPos.Add(breakpoints[i]);
                    endBreakpointPos.Add(breakpoints[i + 1] - 1);
                    lengthSeg.Add(breakpoints[i + 1] - 1 - breakpoints[i]);
                }
                startBreakpointsPos.Add(breakpoints[breakpoints.Count - 1]);
                endBreakpointPos.Add(segmentsLength - 1);
                lengthSeg.Add(segmentsLength - breakpoints[breakpoints.Count - 1] - 1);
            }
            else
            {
                startBreakpointsPos.Add(0);
                endBreakpointPos.Add(segmentsLength - 1);
                lengthSeg.Add(segmentsLength - 1);
            }


            var segments = new SegmentationInput.Segment[startBreakpointsPos.Count];

            for (int i = 0; i < startBreakpointsPos.Count; i++)
            {
                int start = startBreakpointsPos[i];
                int end   = endBreakpointPos[i];
                segments[i] = new SegmentationInput.Segment
                {
                    start = startByChr[start],
                    end   = endByChr[end]
                };
            }
            return(segments);
        }
Exemple #2
0
        /// <summary>
        /// CBS: circular binary segmentation porting the R function segment in DNAcopy
        /// </summary>
        /// <param name="alpha">Now in this.Alpha</param>
        /// <param name="nPerm"></param>
        /// <param name="pMethod">"hybrid" or "perm"</param>
        /// <param name="minWidth"></param>
        /// <param name="kMax"></param>
        /// <param name="nMin"></param>
        /// <param name="eta"></param>
        /// <param name="sbdry"></param>
        /// <param name="trim"></param>
        /// <param name="undoSplit">"none" or "prune" or "sdundo"; now in this.UndoMethod</param>
        /// <param name="undoPrune"></param>
        /// <param name="undoSD"></param>
        /// <param name="verbose"></param>
        public Dictionary <string, SegmentationInput.Segment[]> Run(SegmentationInput segmentation, uint nPerm = 10000, string pMethod = "hybrid", int minWidth = 2, int kMax = 25,
                                                                    uint nMin        = 200, double eta     = 0.05, uint[] sbdry = null, double trim = 0.025,
                                                                    double undoPrune = 0.05, double undoSD = 3, int verbose     = 1)
        {
            if (minWidth < 2 || minWidth > 5)
            {
                Console.Error.WriteLine("Minimum segment width should be between 2 and 5");
                Environment.Exit(1);
            }
            if (nMin < 4 * kMax)
            {
                Console.Error.WriteLine("nMin should be >= 4 * kMax");
                Environment.Exit(1);
            }
            if (sbdry == null)
            {
                GetBoundary.ComputeBoundary(nPerm, this._alpha, eta, out sbdry);
            }

            Dictionary <string, int[]>    inaByChr          = new Dictionary <string, int[]>();
            Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>();

            List <ThreadStart> tasks = new List <ThreadStart>();

            foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.CoverageInfo.CoverageByChr)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    string chr = scoreByChrKVP.Key;
                    int[] ina;
                    Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf

                    double[] scores;
                    if (ina.Length == scoreByChrKVP.Value.Length)
                    {
                        scores = scoreByChrKVP.Value;
                    }
                    else
                    {
                        Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                    }

                    lock (finiteScoresByChr)
                    {
                        finiteScoresByChr[chr] = scores;
                        inaByChr[chr]          = ina;
                    }
                }));
            }
            Parallel.ForEach(tasks, task => task.Invoke());

            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = 0;

            foreach (var list in finiteScoresByChr.Values)
            {
                n += list.Length;
            }
            if (n == 0)
            {
                return(new Dictionary <string, SegmentationInput.Segment[]>());
            }

            double trimmedSD = Math.Sqrt(ChangePoint.TrimmedVariance(finiteScoresByChr, trim: trim));

            Dictionary <string, SegmentationInput.Segment[]> segmentByChr = new Dictionary <string, SegmentationInput.Segment[]>();

            // when parallelizing we need an RNG for each chromosome to get deterministic results
            Random seedGenerator = new MersenneTwister(0);
            Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>();

            foreach (string chr in segmentation.CoverageInfo.CoverageByChr.Keys)
            {
                perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true);
            }

            tasks = new List <ThreadStart>();
            foreach (string chr in segmentation.CoverageInfo.CoverageByChr.Keys)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    int[] ina = inaByChr[chr];
                    int[] lengthSeg;
                    double[] segmentMeans;
                    ChangePoint.ChangePoints(segmentation.CoverageInfo.CoverageByChr[chr], sbdry, out lengthSeg, out segmentMeans, perChromosomeRandom[chr],
                                             dataType: "logratio", alpha: this._alpha, nPerm: nPerm,
                                             pMethod: pMethod, minWidth: minWidth, kMax: kMax, nMin: nMin, trimmedSD: trimmedSD,
                                             undoSplits: this._undoMethod, undoPrune: undoPrune, undoSD: undoSD, verbose: verbose);

                    SegmentationInput.Segment[] segments = new SegmentationInput.Segment[lengthSeg.Length];
                    int cs1 = 0, cs2 = -1; // cumulative sum
                    for (int i = 0; i < lengthSeg.Length; i++)
                    {
                        cs2              += lengthSeg[i];
                        int start         = ina[cs1];
                        int end           = ina[cs2];
                        segments[i]       = new SegmentationInput.Segment();
                        segments[i].start = segmentation.CoverageInfo.StartByChr[chr][start]; // Genomic start
                        segments[i].end   = segmentation.CoverageInfo.EndByChr[chr][end];     // Genomic end
                        cs1              += lengthSeg[i];
                    }

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }));
            }

            Parallel.ForEach(tasks, task => task.Invoke());
            Console.WriteLine("{0} Completed CBS tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }