/// <summary> /// Wavelets: unbalanced HAAR wavelets segmentation /// </summary> public Dictionary <string, SegmentationInput.Segment[]> Run(SegmentationInput segmentationInput, int windowSize) { double?coverageCV = segmentationInput.GetCoverageVariability(windowSize); var factorOfThreeCMADs = segmentationInput.FactorOfThreeCoverageVariabilities();; try { double evennessScore = segmentationInput.GetEvennessScore(windowSize); if (!segmentationInput.EvennessMetricFile.IsNullOrEmpty()) { CanvasIO.WriteEvennessMetricToTextFile(segmentationInput.EvennessMetricFile, evennessScore); } } catch (Exception) { Console.Error.WriteLine("Unable to calculate an evenness score, using coverage for segmentation"); } Dictionary <string, List <int> > adjustedBreakpoints; var breakpoints = LaunchWavelets(segmentationInput.CoverageInfo.CoverageByChr, segmentationInput.CoverageInfo.StartByChr, segmentationInput.CoverageInfo.EndByChr, coverageCV, factorOfThreeCMADs); adjustedBreakpoints = AdjustBreakpoints(segmentationInput.CoverageInfo.CoverageByChr, breakpoints, vafContainingBinsByChr: null); var segments = new Dictionary <string, SegmentationInput.Segment[]>(); foreach (string chr in segmentationInput.VafByChr.Keys) { segments[chr] = SegmentationInput.DeriveSegments(adjustedBreakpoints[chr], segmentationInput.CoverageInfo.CoverageByChr[chr].Length, segmentationInput.CoverageInfo.StartByChr[chr], segmentationInput.CoverageInfo.EndByChr[chr]); } return(segments); }
private static void PostProcessAndWriteResults(SegmentationInput segmentationInput, string outPartitionedFile, PloidyInfo referencePloidy, GenomeSegmentationResults segmentationResults) { var segments = segmentationInput.PostProcessSegments(segmentationResults, referencePloidy); segmentationInput.WriteCanvasPartitionResults(outPartitionedFile, segments); }
public Dictionary <string, SegmentationInput.Segment[]> Run(List <SegmentationInput> segmentation, bool isPerSample) { var segmentByChr = new Dictionary <string, SegmentationInput.Segment[]>(); var cts = new CancellationTokenSource(); // Compute whole-genome median and inter-quartile-range-based pseudo-variance for each sample; // it would be better to exclude regions that are not diploid, and we should really be // using a different variance for each copy number, but using these values is better than // using the per-chromosome mean and variance, which have the following problems: // - chromosomes with a lot of outliers can get a very high variance // - chromosomes that have a whole-chromosome CNV or a CNV that affects a lot of the chromosome // can have problematic estimates var medians = new List <double>(); var pseudoVariances = new List <double>(); foreach (var singleSampleSegmentation in segmentation) { var cvgVals = new List <float>(); foreach (var chr in singleSampleSegmentation.CoverageInfo.CoverageByChr.Keys) { cvgVals.AddRange(singleSampleSegmentation.CoverageInfo.CoverageByChr[chr].Select(x => (float)x)); } var quartiles = CanvasCommon.Utilities.Quartiles(cvgVals); medians.Add(quartiles.Item2); var iqr = quartiles.Item3 - quartiles.Item1; pseudoVariances.Add(iqr * iqr); //Console.WriteLine($"Global estimation of median and pseudovariance: {quartiles.Item2} {iqr * iqr}"); } Parallel.ForEach( segmentation.First().CoverageInfo.CoverageByChr.Keys, new ParallelOptions { CancellationToken = cts.Token, MaxDegreeOfParallelism = Environment.ProcessorCount, TaskScheduler = TaskScheduler.Default }, chr => { var breakpoints = new List <int>(); int length = segmentation.First().CoverageInfo.CoverageByChr[chr].Length; var startByChr = segmentation.First().CoverageInfo.StartByChr[chr]; var endByChr = segmentation.First().CoverageInfo.EndByChr[chr]; var multiSampleCoverage = new List <List <double> >(length); for (int i = 0; i < length; i++) { multiSampleCoverage.Add(segmentation.Select(x => x.CoverageInfo.CoverageByChr[chr][i]).ToList()); } if (length > _minSize) { var haploidMeans = new List <double>(_nHiddenStates); var negativeBinomialDistributions = isPerSample ? InitializeNegativeBinomialEmission(multiSampleCoverage, _nHiddenStates, haploidMeans, medians, pseudoVariances) : InitializeNegativeBinomialEmission(multiSampleCoverage, _nHiddenStates, haploidMeans, null, null); //for (int j = 0; j < 1; j++) // for (int i = 0; i < 190; i++) // { // Console.WriteLine($"NegBin smp {j} count {i}: {negativeBinomialDistributions[0].Probability(j, i)} {negativeBinomialDistributions[1].Probability(j, i)} {negativeBinomialDistributions[2].Probability(j, i)} {negativeBinomialDistributions[3].Probability(j, i)} {negativeBinomialDistributions[4].Probability(j, i)}"); // } var hmm = new HiddenMarkovModel(multiSampleCoverage, negativeBinomialDistributions, haploidMeans, isPerSample); Console.WriteLine($"{DateTime.Now} Launching HMM task for chromosome {chr}"); //if (_nSamples == 1) // hmm.FindMaximalLikelihood(multiSampleCoverage); var bestPathViterbi = hmm.BestPathViterbi(multiSampleCoverage, startByChr, haploidMeans); Console.WriteLine($"{DateTime.Now} Completed HMM task for chromosome {chr}"); breakpoints.Add(0); for (int i = 1; i < length; i++) { if (bestPathViterbi[i] - bestPathViterbi[i - 1] != 0) { breakpoints.Add(i); } } var segments = SegmentationInput.DeriveSegments(breakpoints, length, startByChr, endByChr); lock (segmentByChr) { segmentByChr[chr] = segments; } } }); Console.WriteLine("{0} Completed HMM tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }
/// <summary> /// CBS: circular binary segmentation porting the R function segment in DNAcopy /// </summary> /// <param name="alpha">Now in this.Alpha</param> /// <param name="nPerm"></param> /// <param name="pMethod">"hybrid" or "perm"</param> /// <param name="minWidth"></param> /// <param name="kMax"></param> /// <param name="nMin"></param> /// <param name="eta"></param> /// <param name="sbdry"></param> /// <param name="trim"></param> /// <param name="undoSplit">"none" or "prune" or "sdundo"; now in this.UndoMethod</param> /// <param name="undoPrune"></param> /// <param name="undoSD"></param> /// <param name="verbose"></param> public Dictionary <string, SegmentationInput.Segment[]> Run(SegmentationInput segmentation, uint nPerm = 10000, string pMethod = "hybrid", int minWidth = 2, int kMax = 25, uint nMin = 200, double eta = 0.05, uint[] sbdry = null, double trim = 0.025, double undoPrune = 0.05, double undoSD = 3, int verbose = 1) { if (minWidth < 2 || minWidth > 5) { Console.Error.WriteLine("Minimum segment width should be between 2 and 5"); Environment.Exit(1); } if (nMin < 4 * kMax) { Console.Error.WriteLine("nMin should be >= 4 * kMax"); Environment.Exit(1); } if (sbdry == null) { GetBoundary.ComputeBoundary(nPerm, this._alpha, eta, out sbdry); } Dictionary <string, int[]> inaByChr = new Dictionary <string, int[]>(); Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>(); List <ThreadStart> tasks = new List <ThreadStart>(); foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.CoverageInfo.CoverageByChr) { tasks.Add(new ThreadStart(() => { string chr = scoreByChrKVP.Key; int[] ina; Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf double[] scores; if (ina.Length == scoreByChrKVP.Value.Length) { scores = scoreByChrKVP.Value; } else { Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores); } lock (finiteScoresByChr) { finiteScoresByChr[chr] = scores; inaByChr[chr] = ina; } })); } Parallel.ForEach(tasks, task => task.Invoke()); // Quick sanity-check: If we don't have any segments, then return a dummy result. int n = 0; foreach (var list in finiteScoresByChr.Values) { n += list.Length; } if (n == 0) { return(new Dictionary <string, SegmentationInput.Segment[]>()); } double trimmedSD = Math.Sqrt(ChangePoint.TrimmedVariance(finiteScoresByChr, trim: trim)); Dictionary <string, SegmentationInput.Segment[]> segmentByChr = new Dictionary <string, SegmentationInput.Segment[]>(); // when parallelizing we need an RNG for each chromosome to get deterministic results Random seedGenerator = new MersenneTwister(0); Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>(); foreach (string chr in segmentation.CoverageInfo.CoverageByChr.Keys) { perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true); } tasks = new List <ThreadStart>(); foreach (string chr in segmentation.CoverageInfo.CoverageByChr.Keys) { tasks.Add(new ThreadStart(() => { int[] ina = inaByChr[chr]; int[] lengthSeg; double[] segmentMeans; ChangePoint.ChangePoints(segmentation.CoverageInfo.CoverageByChr[chr], sbdry, out lengthSeg, out segmentMeans, perChromosomeRandom[chr], dataType: "logratio", alpha: this._alpha, nPerm: nPerm, pMethod: pMethod, minWidth: minWidth, kMax: kMax, nMin: nMin, trimmedSD: trimmedSD, undoSplits: this._undoMethod, undoPrune: undoPrune, undoSD: undoSD, verbose: verbose); SegmentationInput.Segment[] segments = new SegmentationInput.Segment[lengthSeg.Length]; int cs1 = 0, cs2 = -1; // cumulative sum for (int i = 0; i < lengthSeg.Length; i++) { cs2 += lengthSeg[i]; int start = ina[cs1]; int end = ina[cs2]; segments[i] = new SegmentationInput.Segment(); segments[i].start = segmentation.CoverageInfo.StartByChr[chr][start]; // Genomic start segments[i].end = segmentation.CoverageInfo.EndByChr[chr][end]; // Genomic end cs1 += lengthSeg[i]; } lock (segmentByChr) { segmentByChr[chr] = segments; } })); } Parallel.ForEach(tasks, task => task.Invoke()); Console.WriteLine("{0} Completed CBS tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }