Пример #1
0
        /// <summary>
        /// Wavelets: unbalanced HAAR wavelets segmentation
        /// </summary>
        public Dictionary <string, SegmentationInput.Segment[]> Run(SegmentationInput segmentationInput, int windowSize)
        {
            double?coverageCV         = segmentationInput.GetCoverageVariability(windowSize);
            var    factorOfThreeCMADs = segmentationInput.FactorOfThreeCoverageVariabilities();;

            try
            {
                double evennessScore = segmentationInput.GetEvennessScore(windowSize);
                if (!segmentationInput.EvennessMetricFile.IsNullOrEmpty())
                {
                    CanvasIO.WriteEvennessMetricToTextFile(segmentationInput.EvennessMetricFile, evennessScore);
                }
            }
            catch (Exception)
            {
                Console.Error.WriteLine("Unable to calculate an evenness score, using coverage for segmentation");
            }

            Dictionary <string, List <int> > adjustedBreakpoints;

            var breakpoints = LaunchWavelets(segmentationInput.CoverageInfo.CoverageByChr, segmentationInput.CoverageInfo.StartByChr,
                                             segmentationInput.CoverageInfo.EndByChr, coverageCV, factorOfThreeCMADs);

            adjustedBreakpoints = AdjustBreakpoints(segmentationInput.CoverageInfo.CoverageByChr, breakpoints, vafContainingBinsByChr: null);

            var segments = new Dictionary <string, SegmentationInput.Segment[]>();

            foreach (string chr in segmentationInput.VafByChr.Keys)
            {
                segments[chr] = SegmentationInput.DeriveSegments(adjustedBreakpoints[chr], segmentationInput.CoverageInfo.CoverageByChr[chr].Length,
                                                                 segmentationInput.CoverageInfo.StartByChr[chr], segmentationInput.CoverageInfo.EndByChr[chr]);
            }
            return(segments);
        }
Пример #2
0
        private static void PostProcessAndWriteResults(SegmentationInput segmentationInput, string outPartitionedFile,
                                                       PloidyInfo referencePloidy, GenomeSegmentationResults segmentationResults)
        {
            var segments = segmentationInput.PostProcessSegments(segmentationResults, referencePloidy);

            segmentationInput.WriteCanvasPartitionResults(outPartitionedFile, segments);
        }
Пример #3
0
        public Dictionary <string, SegmentationInput.Segment[]> Run(List <SegmentationInput> segmentation, bool isPerSample)
        {
            var segmentByChr = new Dictionary <string, SegmentationInput.Segment[]>();

            var cts = new CancellationTokenSource();

            // Compute whole-genome median and inter-quartile-range-based pseudo-variance for each sample;
            // it would be better to exclude regions that are not diploid, and we should really be
            // using a different variance for each copy number, but using these values is better than
            // using the per-chromosome mean and variance, which have the following problems:
            // - chromosomes with a lot of outliers can get a very high variance
            // - chromosomes that have a whole-chromosome CNV or a CNV that affects a lot of the chromosome
            //   can have problematic estimates
            var medians         = new List <double>();
            var pseudoVariances = new List <double>();

            foreach (var singleSampleSegmentation in segmentation)
            {
                var cvgVals = new List <float>();
                foreach (var chr in singleSampleSegmentation.CoverageInfo.CoverageByChr.Keys)
                {
                    cvgVals.AddRange(singleSampleSegmentation.CoverageInfo.CoverageByChr[chr].Select(x => (float)x));
                }
                var quartiles = CanvasCommon.Utilities.Quartiles(cvgVals);
                medians.Add(quartiles.Item2);
                var iqr = quartiles.Item3 - quartiles.Item1;
                pseudoVariances.Add(iqr * iqr);
                //Console.WriteLine($"Global estimation of median and pseudovariance: {quartiles.Item2} {iqr * iqr}");
            }
            Parallel.ForEach(
                segmentation.First().CoverageInfo.CoverageByChr.Keys,
                new ParallelOptions
            {
                CancellationToken      = cts.Token,
                MaxDegreeOfParallelism = Environment.ProcessorCount,
                TaskScheduler          = TaskScheduler.Default
            },
                chr =>
            {
                var breakpoints         = new List <int>();
                int length              = segmentation.First().CoverageInfo.CoverageByChr[chr].Length;
                var startByChr          = segmentation.First().CoverageInfo.StartByChr[chr];
                var endByChr            = segmentation.First().CoverageInfo.EndByChr[chr];
                var multiSampleCoverage = new List <List <double> >(length);
                for (int i = 0; i < length; i++)
                {
                    multiSampleCoverage.Add(segmentation.Select(x => x.CoverageInfo.CoverageByChr[chr][i]).ToList());
                }

                if (length > _minSize)
                {
                    var haploidMeans = new List <double>(_nHiddenStates);
                    var negativeBinomialDistributions = isPerSample ?
                                                        InitializeNegativeBinomialEmission(multiSampleCoverage, _nHiddenStates, haploidMeans, medians, pseudoVariances)
                          : InitializeNegativeBinomialEmission(multiSampleCoverage, _nHiddenStates, haploidMeans, null, null);
                    //for (int j = 0; j < 1; j++)
                    //    for (int i = 0; i < 190; i++)
                    //    {
                    //        Console.WriteLine($"NegBin smp {j} count {i}: {negativeBinomialDistributions[0].Probability(j, i)} {negativeBinomialDistributions[1].Probability(j, i)} {negativeBinomialDistributions[2].Probability(j, i)} {negativeBinomialDistributions[3].Probability(j, i)} {negativeBinomialDistributions[4].Probability(j, i)}");
                    //    }
                    var hmm = new HiddenMarkovModel(multiSampleCoverage, negativeBinomialDistributions, haploidMeans, isPerSample);
                    Console.WriteLine($"{DateTime.Now} Launching HMM task for chromosome {chr}");
                    //if (_nSamples == 1)
                    //    hmm.FindMaximalLikelihood(multiSampleCoverage);
                    var bestPathViterbi = hmm.BestPathViterbi(multiSampleCoverage, startByChr, haploidMeans);
                    Console.WriteLine($"{DateTime.Now} Completed HMM task for chromosome {chr}");

                    breakpoints.Add(0);
                    for (int i = 1; i < length; i++)
                    {
                        if (bestPathViterbi[i] - bestPathViterbi[i - 1] != 0)
                        {
                            breakpoints.Add(i);
                        }
                    }

                    var segments = SegmentationInput.DeriveSegments(breakpoints, length, startByChr, endByChr);

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }
            });

            Console.WriteLine("{0} Completed HMM tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }
Пример #4
0
        /// <summary>
        /// CBS: circular binary segmentation porting the R function segment in DNAcopy
        /// </summary>
        /// <param name="alpha">Now in this.Alpha</param>
        /// <param name="nPerm"></param>
        /// <param name="pMethod">"hybrid" or "perm"</param>
        /// <param name="minWidth"></param>
        /// <param name="kMax"></param>
        /// <param name="nMin"></param>
        /// <param name="eta"></param>
        /// <param name="sbdry"></param>
        /// <param name="trim"></param>
        /// <param name="undoSplit">"none" or "prune" or "sdundo"; now in this.UndoMethod</param>
        /// <param name="undoPrune"></param>
        /// <param name="undoSD"></param>
        /// <param name="verbose"></param>
        public Dictionary <string, SegmentationInput.Segment[]> Run(SegmentationInput segmentation, uint nPerm = 10000, string pMethod = "hybrid", int minWidth = 2, int kMax = 25,
                                                                    uint nMin        = 200, double eta     = 0.05, uint[] sbdry = null, double trim = 0.025,
                                                                    double undoPrune = 0.05, double undoSD = 3, int verbose     = 1)
        {
            if (minWidth < 2 || minWidth > 5)
            {
                Console.Error.WriteLine("Minimum segment width should be between 2 and 5");
                Environment.Exit(1);
            }
            if (nMin < 4 * kMax)
            {
                Console.Error.WriteLine("nMin should be >= 4 * kMax");
                Environment.Exit(1);
            }
            if (sbdry == null)
            {
                GetBoundary.ComputeBoundary(nPerm, this._alpha, eta, out sbdry);
            }

            Dictionary <string, int[]>    inaByChr          = new Dictionary <string, int[]>();
            Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>();

            List <ThreadStart> tasks = new List <ThreadStart>();

            foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.CoverageInfo.CoverageByChr)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    string chr = scoreByChrKVP.Key;
                    int[] ina;
                    Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf

                    double[] scores;
                    if (ina.Length == scoreByChrKVP.Value.Length)
                    {
                        scores = scoreByChrKVP.Value;
                    }
                    else
                    {
                        Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                    }

                    lock (finiteScoresByChr)
                    {
                        finiteScoresByChr[chr] = scores;
                        inaByChr[chr]          = ina;
                    }
                }));
            }
            Parallel.ForEach(tasks, task => task.Invoke());

            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = 0;

            foreach (var list in finiteScoresByChr.Values)
            {
                n += list.Length;
            }
            if (n == 0)
            {
                return(new Dictionary <string, SegmentationInput.Segment[]>());
            }

            double trimmedSD = Math.Sqrt(ChangePoint.TrimmedVariance(finiteScoresByChr, trim: trim));

            Dictionary <string, SegmentationInput.Segment[]> segmentByChr = new Dictionary <string, SegmentationInput.Segment[]>();

            // when parallelizing we need an RNG for each chromosome to get deterministic results
            Random seedGenerator = new MersenneTwister(0);
            Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>();

            foreach (string chr in segmentation.CoverageInfo.CoverageByChr.Keys)
            {
                perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true);
            }

            tasks = new List <ThreadStart>();
            foreach (string chr in segmentation.CoverageInfo.CoverageByChr.Keys)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    int[] ina = inaByChr[chr];
                    int[] lengthSeg;
                    double[] segmentMeans;
                    ChangePoint.ChangePoints(segmentation.CoverageInfo.CoverageByChr[chr], sbdry, out lengthSeg, out segmentMeans, perChromosomeRandom[chr],
                                             dataType: "logratio", alpha: this._alpha, nPerm: nPerm,
                                             pMethod: pMethod, minWidth: minWidth, kMax: kMax, nMin: nMin, trimmedSD: trimmedSD,
                                             undoSplits: this._undoMethod, undoPrune: undoPrune, undoSD: undoSD, verbose: verbose);

                    SegmentationInput.Segment[] segments = new SegmentationInput.Segment[lengthSeg.Length];
                    int cs1 = 0, cs2 = -1; // cumulative sum
                    for (int i = 0; i < lengthSeg.Length; i++)
                    {
                        cs2              += lengthSeg[i];
                        int start         = ina[cs1];
                        int end           = ina[cs2];
                        segments[i]       = new SegmentationInput.Segment();
                        segments[i].start = segmentation.CoverageInfo.StartByChr[chr][start]; // Genomic start
                        segments[i].end   = segmentation.CoverageInfo.EndByChr[chr][end];     // Genomic end
                        cs1              += lengthSeg[i];
                    }

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }));
            }

            Parallel.ForEach(tasks, task => task.Invoke());
            Console.WriteLine("{0} Completed CBS tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }