public void WriteCanvasPartitionResults(string outPath, GenomeSegmentationResults segmentationResults) { Dictionary <string, bool> starts = new Dictionary <string, bool>(); Dictionary <string, bool> stops = new Dictionary <string, bool>(); foreach (string chr in segmentationResults.SegmentByChr.Keys) { for (int segmentIndex = 0; segmentIndex < segmentationResults.SegmentByChr[chr].Length; segmentIndex++) { Segmentation.Segment segment = segmentationResults.SegmentByChr[chr][segmentIndex]; starts[chr + ":" + segment.start] = true; stops[chr + ":" + segment.end] = true; } } Dictionary <string, List <SampleGenomicBin> > excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >(); if (!string.IsNullOrEmpty(ForbiddenIntervalBedPath)) { excludedIntervals = CanvasCommon.Utilities.LoadBedFile(ForbiddenIntervalBedPath); } using (GzipWriter writer = new GzipWriter(outPath)) { int segmentNum = -1; foreach (string chr in StartByChr.Keys) { List <SampleGenomicBin> excludeIntervals = null; if (excludedIntervals.ContainsKey(chr)) { excludeIntervals = excludedIntervals[chr]; } int excludeIndex = 0; // Points to the first interval which *doesn't* end before our current position uint previousBinEnd = 0; for (int pos = 0; pos < StartByChr[chr].Length; pos++) { uint start = StartByChr[chr][pos]; uint end = EndByChr[chr][pos]; string key = chr + ":" + start; bool newSegment = IsNewSegment(starts, key, excludeIntervals, previousBinEnd, end, start, ref excludeIndex); if (newSegment) { segmentNum++; } writer.WriteLine(string.Format($"{chr}\t{start}\t{end}\t{ScoreByChr[chr][pos]}\t{segmentNum}")); previousBinEnd = end; } } } }
public static Segmentation.Segment[] DeriveSegments(List <int> breakpoints, int sizeScoreByChr, uint[] startByChr, uint[] endByChr) { List <int> startBreakpointsPos = new List <int>(); List <int> endBreakpointPos = new List <int>(); List <int> lengthSeg = new List <int>(); if (breakpoints.Count() >= 2 && sizeScoreByChr > 10) { startBreakpointsPos.Add(breakpoints[0]); endBreakpointPos.Add(breakpoints[1] - 1); lengthSeg.Add(breakpoints[1] - 1); for (int i = 1; i < breakpoints.Count - 1; i++) { startBreakpointsPos.Add(breakpoints[i] - 1); endBreakpointPos.Add(breakpoints[i + 1]); lengthSeg.Add(breakpoints[i + 1] - 1 - breakpoints[i]); } startBreakpointsPos.Add(breakpoints[breakpoints.Count - 1]); endBreakpointPos.Add(sizeScoreByChr - 1); lengthSeg.Add(sizeScoreByChr - breakpoints[breakpoints.Count - 1] - 1); } else { startBreakpointsPos.Add(0); endBreakpointPos.Add(sizeScoreByChr - 1); lengthSeg.Add(sizeScoreByChr - 1); } Segmentation.Segment[] segments = new Segmentation.Segment[startBreakpointsPos.Count]; for (int i = 0; i < startBreakpointsPos.Count; i++) { int start = startBreakpointsPos[i]; int end = endBreakpointPos[i]; segments[i] = new Segmentation.Segment { start = startByChr[start], end = endByChr[end] }; // Genomic start // Genomic end } return(segments); }
/// <summary> /// CBS: circular binary segmentation porting the R function segment in DNAcopy /// </summary> /// <param name="alpha">Now in this.Alpha</param> /// <param name="nPerm"></param> /// <param name="pMethod">"hybrid" or "perm"</param> /// <param name="minWidth"></param> /// <param name="kMax"></param> /// <param name="nMin"></param> /// <param name="eta"></param> /// <param name="sbdry"></param> /// <param name="trim"></param> /// <param name="undoSplit">"none" or "prune" or "sdundo"; now in this.UndoMethod</param> /// <param name="undoPrune"></param> /// <param name="undoSD"></param> /// <param name="verbose"></param> public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation, uint nPerm = 10000, string pMethod = "hybrid", int minWidth = 2, int kMax = 25, uint nMin = 200, double eta = 0.05, uint[] sbdry = null, double trim = 0.025, double undoPrune = 0.05, double undoSD = 3, int verbose = 1) { if (minWidth < 2 || minWidth > 5) { Console.Error.WriteLine("Minimum segment width should be between 2 and 5"); Environment.Exit(1); } if (nMin < 4 * kMax) { Console.Error.WriteLine("nMin should be >= 4 * kMax"); Environment.Exit(1); } if (sbdry == null) { GetBoundary.ComputeBoundary(nPerm, this._alpha, eta, out sbdry); } Dictionary <string, int[]> inaByChr = new Dictionary <string, int[]>(); Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>(); List <ThreadStart> tasks = new List <ThreadStart>(); foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr) { tasks.Add(new ThreadStart(() => { string chr = scoreByChrKVP.Key; int[] ina; Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf double[] scores; if (ina.Length == scoreByChrKVP.Value.Length) { scores = scoreByChrKVP.Value; } else { Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores); } lock (finiteScoresByChr) { finiteScoresByChr[chr] = scores; inaByChr[chr] = ina; } })); } Parallel.ForEach(tasks, task => task.Invoke()); // Quick sanity-check: If we don't have any segments, then return a dummy result. int n = 0; foreach (var list in finiteScoresByChr.Values) { n += list.Length; } if (n == 0) { return(new Dictionary <string, Segmentation.Segment[]>()); } double trimmedSD = Math.Sqrt(ChangePoint.TrimmedVariance(finiteScoresByChr, trim: trim)); Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>(); // when parallelizing we need an RNG for each chromosome to get deterministic results Random seedGenerator = new MersenneTwister(0); Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>(); foreach (string chr in segmentation.ScoreByChr.Keys) { perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true); } tasks = new List <ThreadStart>(); foreach (string chr in segmentation.ScoreByChr.Keys) { tasks.Add(new ThreadStart(() => { int[] ina = inaByChr[chr]; int[] lengthSeg; double[] segmentMeans; ChangePoint.ChangePoints(segmentation.ScoreByChr[chr], sbdry, out lengthSeg, out segmentMeans, perChromosomeRandom[chr], dataType: "logratio", alpha: this._alpha, nPerm: nPerm, pMethod: pMethod, minWidth: minWidth, kMax: kMax, nMin: nMin, trimmedSD: trimmedSD, undoSplits: this._undoMethod, undoPrune: undoPrune, undoSD: undoSD, verbose: verbose); Segmentation.Segment[] segments = new Segmentation.Segment[lengthSeg.Length]; int cs1 = 0, cs2 = -1; // cumulative sum for (int i = 0; i < lengthSeg.Length; i++) { cs2 += lengthSeg[i]; int start = ina[cs1]; int end = ina[cs2]; segments[i] = new Segmentation.Segment(); segments[i].start = segmentation.StartByChr[chr][start]; // Genomic start segments[i].end = segmentation.EndByChr[chr][end]; // Genomic end cs1 += lengthSeg[i]; } lock (segmentByChr) { segmentByChr[chr] = segments; } })); } Parallel.ForEach(tasks, task => task.Invoke()); // segmentation.SegmentationResults = new Segmentation.GenomeSegmentationResults(segmentByChr); Console.WriteLine("{0} Completed CBS tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }