static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); string inFile = null; string outFile = null; bool needHelp = false; bool isGermline = false; string bedPath = null; double alpha = Segmentation.DefaultAlpha; SegmentSplitUndo undoMethod = SegmentSplitUndo.None; SegmentationMethod partitionMethod = SegmentationMethod.Wavelets; int maxInterBinDistInSegment = 1000000; OptionSet p = new OptionSet() { { "i|infile=", "input file - usually generated by CanvasClean", v => inFile = v }, { "o|outfile=", "text file to output", v => outFile = v }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "a|alpha=", "alpha parameter to CBS. Default: " + alpha, v => alpha = float.Parse(v) }, { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationMethod)Enum.Parse(typeof(SegmentationMethod), v) }, { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) }, { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => bedPath = v }, { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null }, { "d|maxInterBinDistInSegment=", "the maximum distance between adjacent bins in a segment (negative numbers turn off splitting segments after segmentation). Default: " + maxInterBinDistInSegment, v => maxInterBinDistInSegment = int.Parse(v) }, }; List<string> extraArgs = p.Parse(args); if (needHelp) { ShowHelp(p); return 0; } if (inFile == null || outFile == null) { ShowHelp(p); return 0; } if (!File.Exists(inFile)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", inFile); return 1; } if (!string.IsNullOrEmpty(bedPath) && !File.Exists(bedPath)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", bedPath); return 1; } // no command line parameter for segmentation method Segmentation SegmentationEngine = new Segmentation(inFile, bedPath, maxInterBinDistInSegment: maxInterBinDistInSegment); SegmentationEngine.Alpha = alpha; SegmentationEngine.UndoMethod = undoMethod; SegmentationEngine.SegmentGenome(outFile, partitionMethod, isGermline); return 0; }
public Dictionary <string, Segmentation.Segment[]> Run(List <Segmentation> segmentation) { Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null; if (_commonCnVs != null) { commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_commonCnVs); CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _commonCnVs); } var segmentByChr = new Dictionary <string, Segmentation.Segment[]>(); var cts = new CancellationTokenSource(); Parallel.ForEach( segmentation.First().ScoreByChr.Keys, new ParallelOptions { CancellationToken = cts.Token, MaxDegreeOfParallelism = Environment.ProcessorCount, TaskScheduler = TaskScheduler.Default }, chr => { var breakpoints = new List <int>(); int length = segmentation.First().ScoreByChr[chr].Length; var startByChr = segmentation.First().StartByChr[chr]; var endByChr = segmentation.First().EndByChr[chr]; var multiSampleCoverage = new List <List <double> >(length); for (int i = 0; i < length; i++) { multiSampleCoverage.Add(segmentation.Select(x => x.ScoreByChr[chr][i]).ToList()); } if (length > _minSize) { var haploidMeans = new List <double>(_nHiddenStates); var negativeBinomialDistributions = InitializeNegativeBinomialEmission(multiSampleCoverage, _nHiddenStates, haploidMeans); var hmm = new HiddenMarkovModel(multiSampleCoverage, negativeBinomialDistributions, haploidMeans); Console.WriteLine($"{DateTime.Now} Launching HMM task for chromosome {chr}"); if (_nSamples <= 3) { hmm.FindMaximalLikelihood(multiSampleCoverage); } var bestPathViterbi = hmm.BestPathViterbi(multiSampleCoverage, startByChr, haploidMeans); Console.WriteLine($"{DateTime.Now} Completed HMM task for chromosome {chr}"); breakpoints.Add(0); for (int i = 1; i < length; i++) { if (bestPathViterbi[i] - bestPathViterbi[i - 1] != 0) { breakpoints.Add(i); } } if (_commonCnVs != null) { if (commonCNVintervals.ContainsKey(chr)) { var remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], startByChr, endByChr); var oldbreakpoints = breakpoints; breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals); } } var segments = Segmentation.DeriveSegments(breakpoints, length, startByChr, endByChr); lock (segmentByChr) { segmentByChr[chr] = segments; } } }); Console.WriteLine("{0} Completed HMM tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }
/// <summary> /// Wavelets: unbalanced HAAR wavelets segmentation /// </summary> /// <param name="threshold">wavelets coefficient threshold</param> public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation) { Dictionary <string, int[]> inaByChr = new Dictionary <string, int[]>(); Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>(); List <ThreadStart> tasks = new List <ThreadStart>(); foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr) { tasks.Add(new ThreadStart(() => { string chr = scoreByChrKVP.Key; int[] ina; Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf double[] scores; if (ina.Length == scoreByChrKVP.Value.Length) { scores = scoreByChrKVP.Value; } else { Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores); } lock (finiteScoresByChr) { finiteScoresByChr[chr] = scores; inaByChr[chr] = ina; } })); } Parallel.ForEach(tasks, task => task.Invoke()); // Quick sanity-check: If we don't have any segments, then return a dummy result. int n = finiteScoresByChr.Values.Sum(list => list.Length); if (n == 0) { return(new Dictionary <string, Segmentation.Segment[]>()); } Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>(); // load common CNV segments Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null; if (_parameters.CommonCnVs != null) { commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_parameters.CommonCnVs); CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _parameters.CommonCnVs); } tasks = new List <ThreadStart>(); foreach (string chr in segmentation.ScoreByChr.Keys) { tasks.Add(new ThreadStart(() => { List <int> breakpoints = new List <int>(); int sizeScoreByChr = segmentation.ScoreByChr[chr].Length; if (sizeScoreByChr > _parameters.MinSize) { WaveletSegmentation.HaarWavelets(segmentation.ScoreByChr[chr], _parameters.ThresholdLower, _parameters.ThresholdUpper, breakpoints, _parameters.IsGermline, madFactor: _parameters.MadFactor); } if (_parameters.CommonCnVs != null) { if (commonCNVintervals.ContainsKey(chr)) { List <SampleGenomicBin> remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], segmentation.StartByChr[chr], segmentation.EndByChr[chr]); List <int> oldbreakpoints = breakpoints; breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals); } } var segments = Segmentation.DeriveSegments(breakpoints, sizeScoreByChr, segmentation.StartByChr[chr], segmentation.EndByChr[chr]); lock (segmentByChr) { segmentByChr[chr] = segments; } })); } Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now); Parallel.ForEach(tasks, task => task.Invoke()); Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now); Console.WriteLine("{0} Segmentation results complete", DateTime.Now); return(segmentByChr); }