public static HaarWavelets ( double ratio, double thresholdlower, double thresholdupper, List |
||
ratio | double | |
thresholdlower | double | |
thresholdupper | double | |
breakpoints | List |
|
isGermline | bool | |
return | void |
public Dictionary <string, List <int> > LaunchWavelets(Dictionary <string, double[]> coverageByChr, Dictionary <string, uint[]> startByChr, Dictionary <string, uint[]> endByChr, double?CV, List <double> factorOfThreeCMADs) { var inaByChr = new Dictionary <string, int[]>(); var finiteScoresByChr = new Dictionary <string, double[]>(); var tasks = coverageByChr.Select(scoreByChrKVP => new ThreadStart(() => { string chr = scoreByChrKVP.Key; Helper.GetFiniteIndices(scoreByChrKVP.Value, out int[] ina); // not NaN, -Inf, Inf double[] scores; if (ina.Length == scoreByChrKVP.Value.Length) { scores = scoreByChrKVP.Value; } else { Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores); } lock (finiteScoresByChr) { finiteScoresByChr[chr] = scores; inaByChr[chr] = ina; } })).ToList(); Parallel.ForEach(tasks, task => task.Invoke()); // Quick sanity-check: If we don't have any segments, then return a dummy result. int n = finiteScoresByChr.Values.Sum(list => list.Length); if (n == 0) { return(new Dictionary <string, List <int> >()); } var breakpointsByChr = new Dictionary <string, List <int> >(); tasks = coverageByChr.Keys.Select(chr => new ThreadStart(() => { var breakpoints = new List <int>(); // to cover cases of no SNVs present (i.e. chrY) => chromosome becomes one segment int segmentLengthByChr = Math.Max(coverageByChr[chr].Length, 1); if (segmentLengthByChr > _parameters.MinSize) { WaveletSegmentation.HaarWavelets(coverageByChr[chr], _parameters.ThresholdLower, _parameters.ThresholdUpper, breakpoints, _parameters.IsGermline, _parameters.MadFactor, CV, factorOfThreeCMADs, chr); } lock (breakpointsByChr) { breakpointsByChr[chr] = breakpoints; } })).ToList(); Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now); Parallel.ForEach(tasks, task => task.Invoke()); Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(breakpointsByChr); }
/// <summary> /// Wavelets: unbalanced HAAR wavelets segmentation /// </summary> /// <param name="threshold">wavelets coefficient threshold</param> private void Wavelets(bool isGermline, double thresholdLower = 5, double thresholdUpper = 80, int minSize = 10, int verbose = 1) { Dictionary <string, int[]> inaByChr = new Dictionary <string, int[]>(); Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>(); List <ThreadStart> tasks = new List <ThreadStart>(); foreach (KeyValuePair <string, double[]> scoreByChrKVP in ScoreByChr) { tasks.Add(new ThreadStart(() => { string chr = scoreByChrKVP.Key; int[] ina; Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf double[] scores; if (ina.Length == scoreByChrKVP.Value.Length) { scores = scoreByChrKVP.Value; } else { Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores); } lock (finiteScoresByChr) { finiteScoresByChr[chr] = scores; inaByChr[chr] = ina; } })); } Isas.Shared.Utilities.DoWorkParallelThreads(tasks); // Quick sanity-check: If we don't have any segments, then return a dummy result. int n = 0; foreach (var list in finiteScoresByChr.Values) { n += list.Length; } if (n == 0) { this.SegmentationResults = this.GetDummySegmentationResults(); return; } Dictionary <string, Segment[]> segmentByChr = new Dictionary <string, Segment[]>(); // when parallelizing we need an RNG for each chromosome to get deterministic results Random seedGenerator = new MersenneTwister(0); Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>(); foreach (string chr in this.ScoreByChr.Keys) { perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true); } tasks = new List <ThreadStart>(); foreach (string chr in ScoreByChr.Keys) { tasks.Add(new ThreadStart(() => { int[] ina = inaByChr[chr]; List <int> breakpoints = new List <int>(); int sizeScoreByChr = this.ScoreByChr[chr].Length; if (sizeScoreByChr > minSize) { WaveletSegmentation.HaarWavelets(this.ScoreByChr[chr].ToArray(), thresholdLower, thresholdUpper, breakpoints, isGermline); } List <int> startBreakpointsPos = new List <int>(); List <int> endBreakpointPos = new List <int>(); List <int> lengthSeg = new List <int>(); if (breakpoints.Count() >= 2 && sizeScoreByChr > 10) { startBreakpointsPos.Add(breakpoints[0]); endBreakpointPos.Add(breakpoints[1] - 1); lengthSeg.Add(breakpoints[1] - 1); for (int i = 1; i < breakpoints.Count - 1; i++) { startBreakpointsPos.Add(breakpoints[i]); endBreakpointPos.Add(breakpoints[i + 1] - 1); lengthSeg.Add(breakpoints[i + 1] - 1 - breakpoints[i]); } startBreakpointsPos.Add(breakpoints[breakpoints.Count - 1]); endBreakpointPos.Add(sizeScoreByChr - 1); lengthSeg.Add(sizeScoreByChr - breakpoints[breakpoints.Count - 1] - 1); } else { startBreakpointsPos.Add(0); endBreakpointPos.Add(sizeScoreByChr - 1); lengthSeg.Add(sizeScoreByChr - 1); } // estimate segment means double[] segmentMeans = new double[lengthSeg.Count()]; int ss = 0, ee = 0; for (int i = 0; i < lengthSeg.Count(); i++) { ee += lengthSeg[i]; // Works even if weights == null segmentMeans[i] = Helper.WeightedAverage(this.ScoreByChr[chr], null, iStart: ss, iEnd: ee); ss = ee; } Segment[] segments = new Segment[startBreakpointsPos.Count]; for (int i = 0; i < startBreakpointsPos.Count; i++) { int start = startBreakpointsPos[i]; int end = endBreakpointPos[i]; segments[i] = new Segment(); segments[i].start = this.StartByChr[chr][start]; // Genomic start segments[i].end = this.EndByChr[chr][end]; // Genomic end segments[i].nMarkers = lengthSeg[i]; segments[i].mean = segmentMeans[i]; } lock (segmentByChr) { segmentByChr[chr] = segments; } })); } Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now); Isas.Shared.Utilities.DoWorkParallelThreads(tasks); Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now); this.SegmentationResults = new GenomeSegmentationResults(segmentByChr); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); }
/// <summary> /// Wavelets: unbalanced HAAR wavelets segmentation /// </summary> /// <param name="threshold">wavelets coefficient threshold</param> public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation) { Dictionary <string, int[]> inaByChr = new Dictionary <string, int[]>(); Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>(); List <ThreadStart> tasks = new List <ThreadStart>(); foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr) { tasks.Add(new ThreadStart(() => { string chr = scoreByChrKVP.Key; int[] ina; Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf double[] scores; if (ina.Length == scoreByChrKVP.Value.Length) { scores = scoreByChrKVP.Value; } else { Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores); } lock (finiteScoresByChr) { finiteScoresByChr[chr] = scores; inaByChr[chr] = ina; } })); } Parallel.ForEach(tasks, task => task.Invoke()); // Quick sanity-check: If we don't have any segments, then return a dummy result. int n = finiteScoresByChr.Values.Sum(list => list.Length); if (n == 0) { return(new Dictionary <string, Segmentation.Segment[]>()); } Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>(); // load common CNV segments Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null; if (_parameters.CommonCnVs != null) { commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_parameters.CommonCnVs); CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _parameters.CommonCnVs); } tasks = new List <ThreadStart>(); foreach (string chr in segmentation.ScoreByChr.Keys) { tasks.Add(new ThreadStart(() => { List <int> breakpoints = new List <int>(); int sizeScoreByChr = segmentation.ScoreByChr[chr].Length; if (sizeScoreByChr > _parameters.MinSize) { WaveletSegmentation.HaarWavelets(segmentation.ScoreByChr[chr], _parameters.ThresholdLower, _parameters.ThresholdUpper, breakpoints, _parameters.IsGermline, madFactor: _parameters.MadFactor); } if (_parameters.CommonCnVs != null) { if (commonCNVintervals.ContainsKey(chr)) { List <SampleGenomicBin> remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], segmentation.StartByChr[chr], segmentation.EndByChr[chr]); List <int> oldbreakpoints = breakpoints; breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals); } } var segments = Segmentation.DeriveSegments(breakpoints, sizeScoreByChr, segmentation.StartByChr[chr], segmentation.EndByChr[chr]); lock (segmentByChr) { segmentByChr[chr] = segments; } })); } Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now); Parallel.ForEach(tasks, task => task.Invoke()); Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }