public Dictionary <string, Segmentation.Segment[]> Run(List <Segmentation> segmentation) { Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null; if (_commonCnVs != null) { commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_commonCnVs); CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _commonCnVs); } var segmentByChr = new Dictionary <string, Segmentation.Segment[]>(); var cts = new CancellationTokenSource(); Parallel.ForEach( segmentation.First().ScoreByChr.Keys, new ParallelOptions { CancellationToken = cts.Token, MaxDegreeOfParallelism = Environment.ProcessorCount, TaskScheduler = TaskScheduler.Default }, chr => { var breakpoints = new List <int>(); int length = segmentation.First().ScoreByChr[chr].Length; var startByChr = segmentation.First().StartByChr[chr]; var endByChr = segmentation.First().EndByChr[chr]; var multiSampleCoverage = new List <List <double> >(length); for (int i = 0; i < length; i++) { multiSampleCoverage.Add(segmentation.Select(x => x.ScoreByChr[chr][i]).ToList()); } if (length > _minSize) { var haploidMeans = new List <double>(_nHiddenStates); var negativeBinomialDistributions = InitializeNegativeBinomialEmission(multiSampleCoverage, _nHiddenStates, haploidMeans); var hmm = new HiddenMarkovModel(multiSampleCoverage, negativeBinomialDistributions, haploidMeans); Console.WriteLine($"{DateTime.Now} Launching HMM task for chromosome {chr}"); if (_nSamples <= 3) { hmm.FindMaximalLikelihood(multiSampleCoverage); } var bestPathViterbi = hmm.BestPathViterbi(multiSampleCoverage, startByChr, haploidMeans); Console.WriteLine($"{DateTime.Now} Completed HMM task for chromosome {chr}"); breakpoints.Add(0); for (int i = 1; i < length; i++) { if (bestPathViterbi[i] - bestPathViterbi[i - 1] != 0) { breakpoints.Add(i); } } if (_commonCnVs != null) { if (commonCNVintervals.ContainsKey(chr)) { var remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], startByChr, endByChr); var oldbreakpoints = breakpoints; breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals); } } var segments = Segmentation.DeriveSegments(breakpoints, length, startByChr, endByChr); lock (segmentByChr) { segmentByChr[chr] = segments; } } }); Console.WriteLine("{0} Completed HMM tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }
/// <summary> /// Wavelets: unbalanced HAAR wavelets segmentation /// </summary> /// <param name="threshold">wavelets coefficient threshold</param> public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation) { Dictionary <string, int[]> inaByChr = new Dictionary <string, int[]>(); Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>(); List <ThreadStart> tasks = new List <ThreadStart>(); foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr) { tasks.Add(new ThreadStart(() => { string chr = scoreByChrKVP.Key; int[] ina; Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf double[] scores; if (ina.Length == scoreByChrKVP.Value.Length) { scores = scoreByChrKVP.Value; } else { Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores); } lock (finiteScoresByChr) { finiteScoresByChr[chr] = scores; inaByChr[chr] = ina; } })); } Parallel.ForEach(tasks, task => task.Invoke()); // Quick sanity-check: If we don't have any segments, then return a dummy result. int n = finiteScoresByChr.Values.Sum(list => list.Length); if (n == 0) { return(new Dictionary <string, Segmentation.Segment[]>()); } Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>(); // load common CNV segments Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null; if (_parameters.CommonCnVs != null) { commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_parameters.CommonCnVs); CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _parameters.CommonCnVs); } tasks = new List <ThreadStart>(); foreach (string chr in segmentation.ScoreByChr.Keys) { tasks.Add(new ThreadStart(() => { List <int> breakpoints = new List <int>(); int sizeScoreByChr = segmentation.ScoreByChr[chr].Length; if (sizeScoreByChr > _parameters.MinSize) { WaveletSegmentation.HaarWavelets(segmentation.ScoreByChr[chr], _parameters.ThresholdLower, _parameters.ThresholdUpper, breakpoints, _parameters.IsGermline, madFactor: _parameters.MadFactor); } if (_parameters.CommonCnVs != null) { if (commonCNVintervals.ContainsKey(chr)) { List <SampleGenomicBin> remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], segmentation.StartByChr[chr], segmentation.EndByChr[chr]); List <int> oldbreakpoints = breakpoints; breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals); } } var segments = Segmentation.DeriveSegments(breakpoints, sizeScoreByChr, segmentation.StartByChr[chr], segmentation.EndByChr[chr]); lock (segmentByChr) { segmentByChr[chr] = segments; } })); } Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now); Parallel.ForEach(tasks, task => task.Invoke()); Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }