Esempio n. 1
0
        static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            string inFile = null;
            string outFile = null;
            bool needHelp = false;
            bool isGermline = false;
            string bedPath = null;
            double alpha = Segmentation.DefaultAlpha;
            SegmentSplitUndo undoMethod = SegmentSplitUndo.None;
            SegmentationMethod partitionMethod = SegmentationMethod.Wavelets;
            int maxInterBinDistInSegment = 1000000;
            OptionSet p = new OptionSet()
            {
                { "i|infile=", "input file - usually generated by CanvasClean", v => inFile = v },
                { "o|outfile=", "text file to output", v => outFile = v },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "a|alpha=", "alpha parameter to CBS. Default: " + alpha, v => alpha = float.Parse(v) },
                { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationMethod)Enum.Parse(typeof(SegmentationMethod), v) },
                { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) },
                { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => bedPath = v },
                { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null },
                { "d|maxInterBinDistInSegment=", "the maximum distance between adjacent bins in a segment (negative numbers turn off splitting segments after segmentation). Default: " + maxInterBinDistInSegment, v => maxInterBinDistInSegment = int.Parse(v) },
            };

            List<string> extraArgs = p.Parse(args);

            if (needHelp)
            {
                ShowHelp(p);
                return 0;
            }

            if (inFile == null || outFile == null)
            {
                ShowHelp(p);
                return 0;
            }

            if (!File.Exists(inFile))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", inFile);
                return 1;
            }

            if (!string.IsNullOrEmpty(bedPath) && !File.Exists(bedPath))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", bedPath);
                return 1;
            }

            // no command line parameter for segmentation method
            Segmentation SegmentationEngine = new Segmentation(inFile, bedPath, maxInterBinDistInSegment: maxInterBinDistInSegment);
            SegmentationEngine.Alpha = alpha;
            SegmentationEngine.UndoMethod = undoMethod;
            SegmentationEngine.SegmentGenome(outFile, partitionMethod, isGermline);
            return 0;
        }
        public Dictionary <string, Segmentation.Segment[]> Run(List <Segmentation> segmentation)
        {
            Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null;

            if (_commonCnVs != null)
            {
                commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_commonCnVs);
                CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _commonCnVs);
            }
            var segmentByChr = new Dictionary <string, Segmentation.Segment[]>();

            var cts = new CancellationTokenSource();

            Parallel.ForEach(
                segmentation.First().ScoreByChr.Keys,
                new ParallelOptions
            {
                CancellationToken      = cts.Token,
                MaxDegreeOfParallelism = Environment.ProcessorCount,
                TaskScheduler          = TaskScheduler.Default
            },
                chr =>
            {
                var breakpoints         = new List <int>();
                int length              = segmentation.First().ScoreByChr[chr].Length;
                var startByChr          = segmentation.First().StartByChr[chr];
                var endByChr            = segmentation.First().EndByChr[chr];
                var multiSampleCoverage = new List <List <double> >(length);
                for (int i = 0; i < length; i++)
                {
                    multiSampleCoverage.Add(segmentation.Select(x => x.ScoreByChr[chr][i]).ToList());
                }

                if (length > _minSize)
                {
                    var haploidMeans = new List <double>(_nHiddenStates);
                    var negativeBinomialDistributions = InitializeNegativeBinomialEmission(multiSampleCoverage, _nHiddenStates, haploidMeans);
                    var hmm = new HiddenMarkovModel(multiSampleCoverage, negativeBinomialDistributions, haploidMeans);
                    Console.WriteLine($"{DateTime.Now} Launching HMM task for chromosome {chr}");
                    if (_nSamples <= 3)
                    {
                        hmm.FindMaximalLikelihood(multiSampleCoverage);
                    }
                    var bestPathViterbi = hmm.BestPathViterbi(multiSampleCoverage, startByChr, haploidMeans);
                    Console.WriteLine($"{DateTime.Now} Completed HMM task for chromosome {chr}");

                    breakpoints.Add(0);
                    for (int i = 1; i < length; i++)
                    {
                        if (bestPathViterbi[i] - bestPathViterbi[i - 1] != 0)
                        {
                            breakpoints.Add(i);
                        }
                    }


                    if (_commonCnVs != null)
                    {
                        if (commonCNVintervals.ContainsKey(chr))
                        {
                            var remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], startByChr, endByChr);
                            var oldbreakpoints             = breakpoints;
                            breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals);
                        }
                    }

                    var segments = Segmentation.DeriveSegments(breakpoints, length, startByChr, endByChr);

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }
            });

            Console.WriteLine("{0} Completed HMM tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }
Esempio n. 3
0
        /// <summary>
        /// Wavelets: unbalanced HAAR wavelets segmentation
        /// </summary>
        /// <param name="threshold">wavelets coefficient threshold</param>
        public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation)
        {
            Dictionary <string, int[]>    inaByChr          = new Dictionary <string, int[]>();
            Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>();

            List <ThreadStart> tasks = new List <ThreadStart>();

            foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    string chr = scoreByChrKVP.Key;
                    int[] ina;
                    Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf

                    double[] scores;
                    if (ina.Length == scoreByChrKVP.Value.Length)
                    {
                        scores = scoreByChrKVP.Value;
                    }
                    else
                    {
                        Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                    }

                    lock (finiteScoresByChr)
                    {
                        finiteScoresByChr[chr] = scores;
                        inaByChr[chr]          = ina;
                    }
                }));
            }
            Parallel.ForEach(tasks, task => task.Invoke());
            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = finiteScoresByChr.Values.Sum(list => list.Length);

            if (n == 0)
            {
                return(new Dictionary <string, Segmentation.Segment[]>());
            }

            Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>();

            // load common CNV segments
            Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null;

            if (_parameters.CommonCnVs != null)
            {
                commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_parameters.CommonCnVs);
                CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _parameters.CommonCnVs);
            }

            tasks = new List <ThreadStart>();
            foreach (string chr in segmentation.ScoreByChr.Keys)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    List <int> breakpoints = new List <int>();
                    int sizeScoreByChr     = segmentation.ScoreByChr[chr].Length;
                    if (sizeScoreByChr > _parameters.MinSize)
                    {
                        WaveletSegmentation.HaarWavelets(segmentation.ScoreByChr[chr], _parameters.ThresholdLower, _parameters.ThresholdUpper,
                                                         breakpoints, _parameters.IsGermline, madFactor: _parameters.MadFactor);
                    }

                    if (_parameters.CommonCnVs != null)
                    {
                        if (commonCNVintervals.ContainsKey(chr))
                        {
                            List <SampleGenomicBin> remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], segmentation.StartByChr[chr], segmentation.EndByChr[chr]);
                            List <int> oldbreakpoints = breakpoints;
                            breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals);
                        }
                    }

                    var segments = Segmentation.DeriveSegments(breakpoints, sizeScoreByChr, segmentation.StartByChr[chr], segmentation.EndByChr[chr]);

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }));
            }
            Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now);
            Parallel.ForEach(tasks, task => task.Invoke());
            Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }