コード例 #1
0
ファイル: Segmentation.cs プロジェクト: weizhiting/canvas
        public void WriteCanvasPartitionResults(string outPath, GenomeSegmentationResults segmentationResults)
        {
            Dictionary <string, bool> starts = new Dictionary <string, bool>();
            Dictionary <string, bool> stops  = new Dictionary <string, bool>();

            foreach (string chr in segmentationResults.SegmentByChr.Keys)
            {
                for (int segmentIndex = 0; segmentIndex < segmentationResults.SegmentByChr[chr].Length; segmentIndex++)
                {
                    Segmentation.Segment segment = segmentationResults.SegmentByChr[chr][segmentIndex];
                    starts[chr + ":" + segment.start] = true;
                    stops[chr + ":" + segment.end]    = true;
                }
            }

            Dictionary <string, List <SampleGenomicBin> > excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >();

            if (!string.IsNullOrEmpty(ForbiddenIntervalBedPath))
            {
                excludedIntervals = CanvasCommon.Utilities.LoadBedFile(ForbiddenIntervalBedPath);
            }

            using (GzipWriter writer = new GzipWriter(outPath))
            {
                int segmentNum = -1;

                foreach (string chr in StartByChr.Keys)
                {
                    List <SampleGenomicBin> excludeIntervals = null;
                    if (excludedIntervals.ContainsKey(chr))
                    {
                        excludeIntervals = excludedIntervals[chr];
                    }
                    int  excludeIndex   = 0; // Points to the first interval which *doesn't* end before our current position
                    uint previousBinEnd = 0;
                    for (int pos = 0; pos < StartByChr[chr].Length; pos++)
                    {
                        uint   start      = StartByChr[chr][pos];
                        uint   end        = EndByChr[chr][pos];
                        string key        = chr + ":" + start;
                        bool   newSegment = IsNewSegment(starts, key, excludeIntervals, previousBinEnd, end, start, ref excludeIndex);
                        if (newSegment)
                        {
                            segmentNum++;
                        }
                        writer.WriteLine(string.Format($"{chr}\t{start}\t{end}\t{ScoreByChr[chr][pos]}\t{segmentNum}"));
                        previousBinEnd = end;
                    }
                }
            }
        }
コード例 #2
0
ファイル: Segmentation.cs プロジェクト: weizhiting/canvas
        public static Segmentation.Segment[] DeriveSegments(List <int> breakpoints, int sizeScoreByChr, uint[] startByChr, uint[] endByChr)
        {
            List <int> startBreakpointsPos = new List <int>();
            List <int> endBreakpointPos    = new List <int>();
            List <int> lengthSeg           = new List <int>();

            if (breakpoints.Count() >= 2 && sizeScoreByChr > 10)
            {
                startBreakpointsPos.Add(breakpoints[0]);
                endBreakpointPos.Add(breakpoints[1] - 1);
                lengthSeg.Add(breakpoints[1] - 1);
                for (int i = 1; i < breakpoints.Count - 1; i++)
                {
                    startBreakpointsPos.Add(breakpoints[i] - 1);
                    endBreakpointPos.Add(breakpoints[i + 1]);
                    lengthSeg.Add(breakpoints[i + 1] - 1 - breakpoints[i]);
                }
                startBreakpointsPos.Add(breakpoints[breakpoints.Count - 1]);
                endBreakpointPos.Add(sizeScoreByChr - 1);
                lengthSeg.Add(sizeScoreByChr - breakpoints[breakpoints.Count - 1] - 1);
            }
            else
            {
                startBreakpointsPos.Add(0);
                endBreakpointPos.Add(sizeScoreByChr - 1);
                lengthSeg.Add(sizeScoreByChr - 1);
            }


            Segmentation.Segment[] segments = new Segmentation.Segment[startBreakpointsPos.Count];
            for (int i = 0; i < startBreakpointsPos.Count; i++)
            {
                int start = startBreakpointsPos[i];
                int end   = endBreakpointPos[i];
                segments[i] = new Segmentation.Segment
                {
                    start = startByChr[start],
                    end   = endByChr[end]
                };
                // Genomic start
                // Genomic end
            }
            return(segments);
        }
コード例 #3
0
ファイル: CBSRunner.cs プロジェクト: weizhiting/canvas
        /// <summary>
        /// CBS: circular binary segmentation porting the R function segment in DNAcopy
        /// </summary>
        /// <param name="alpha">Now in this.Alpha</param>
        /// <param name="nPerm"></param>
        /// <param name="pMethod">"hybrid" or "perm"</param>
        /// <param name="minWidth"></param>
        /// <param name="kMax"></param>
        /// <param name="nMin"></param>
        /// <param name="eta"></param>
        /// <param name="sbdry"></param>
        /// <param name="trim"></param>
        /// <param name="undoSplit">"none" or "prune" or "sdundo"; now in this.UndoMethod</param>
        /// <param name="undoPrune"></param>
        /// <param name="undoSD"></param>
        /// <param name="verbose"></param>
        public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation, uint nPerm = 10000, string pMethod = "hybrid", int minWidth = 2, int kMax = 25,
                                                               uint nMin        = 200, double eta     = 0.05, uint[] sbdry = null, double trim = 0.025,
                                                               double undoPrune = 0.05, double undoSD = 3, int verbose     = 1)
        {
            if (minWidth < 2 || minWidth > 5)
            {
                Console.Error.WriteLine("Minimum segment width should be between 2 and 5");
                Environment.Exit(1);
            }
            if (nMin < 4 * kMax)
            {
                Console.Error.WriteLine("nMin should be >= 4 * kMax");
                Environment.Exit(1);
            }
            if (sbdry == null)
            {
                GetBoundary.ComputeBoundary(nPerm, this._alpha, eta, out sbdry);
            }

            Dictionary <string, int[]>    inaByChr          = new Dictionary <string, int[]>();
            Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>();

            List <ThreadStart> tasks = new List <ThreadStart>();

            foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    string chr = scoreByChrKVP.Key;
                    int[] ina;
                    Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf

                    double[] scores;
                    if (ina.Length == scoreByChrKVP.Value.Length)
                    {
                        scores = scoreByChrKVP.Value;
                    }
                    else
                    {
                        Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                    }

                    lock (finiteScoresByChr)
                    {
                        finiteScoresByChr[chr] = scores;
                        inaByChr[chr]          = ina;
                    }
                }));
            }
            Parallel.ForEach(tasks, task => task.Invoke());

            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = 0;

            foreach (var list in finiteScoresByChr.Values)
            {
                n += list.Length;
            }
            if (n == 0)
            {
                return(new Dictionary <string, Segmentation.Segment[]>());
            }

            double trimmedSD = Math.Sqrt(ChangePoint.TrimmedVariance(finiteScoresByChr, trim: trim));

            Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>();

            // when parallelizing we need an RNG for each chromosome to get deterministic results
            Random seedGenerator = new MersenneTwister(0);
            Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>();

            foreach (string chr in segmentation.ScoreByChr.Keys)
            {
                perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true);
            }

            tasks = new List <ThreadStart>();
            foreach (string chr in segmentation.ScoreByChr.Keys)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    int[] ina = inaByChr[chr];
                    int[] lengthSeg;
                    double[] segmentMeans;
                    ChangePoint.ChangePoints(segmentation.ScoreByChr[chr], sbdry, out lengthSeg, out segmentMeans, perChromosomeRandom[chr],
                                             dataType: "logratio", alpha: this._alpha, nPerm: nPerm,
                                             pMethod: pMethod, minWidth: minWidth, kMax: kMax, nMin: nMin, trimmedSD: trimmedSD,
                                             undoSplits: this._undoMethod, undoPrune: undoPrune, undoSD: undoSD, verbose: verbose);

                    Segmentation.Segment[] segments = new Segmentation.Segment[lengthSeg.Length];
                    int cs1 = 0, cs2 = -1; // cumulative sum
                    for (int i = 0; i < lengthSeg.Length; i++)
                    {
                        cs2              += lengthSeg[i];
                        int start         = ina[cs1];
                        int end           = ina[cs2];
                        segments[i]       = new Segmentation.Segment();
                        segments[i].start = segmentation.StartByChr[chr][start]; // Genomic start
                        segments[i].end   = segmentation.EndByChr[chr][end];     // Genomic end
                        cs1              += lengthSeg[i];
                    }

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }));
            }

            Parallel.ForEach(tasks, task => task.Invoke());
            // segmentation.SegmentationResults = new Segmentation.GenomeSegmentationResults(segmentByChr);
            Console.WriteLine("{0} Completed CBS tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }