WavevletSegmentation: Unbalanced HAAR wavelets segmentation C# implementation of "Unbalanced Haar Technique for Nonparametric Function Estimation" Piotr Fryzlewicz, Journal of the American Statistical Association Vol. 102, No. 480 (Dec., 2007), pp. 1318-1327
Exemple #1
0
        public Dictionary <string, List <int> > LaunchWavelets(Dictionary <string, double[]> coverageByChr, Dictionary <string, uint[]> startByChr,
                                                               Dictionary <string, uint[]> endByChr, double?CV, List <double> factorOfThreeCMADs)
        {
            var inaByChr          = new Dictionary <string, int[]>();
            var finiteScoresByChr = new Dictionary <string, double[]>();

            var tasks = coverageByChr.Select(scoreByChrKVP => new ThreadStart(() =>
            {
                string chr = scoreByChrKVP.Key;
                Helper.GetFiniteIndices(scoreByChrKVP.Value, out int[] ina); // not NaN, -Inf, Inf

                double[] scores;
                if (ina.Length == scoreByChrKVP.Value.Length)
                {
                    scores = scoreByChrKVP.Value;
                }
                else
                {
                    Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                }

                lock (finiteScoresByChr)
                {
                    finiteScoresByChr[chr] = scores;
                    inaByChr[chr]          = ina;
                }
            })).ToList();


            Parallel.ForEach(tasks, task => task.Invoke());
            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = finiteScoresByChr.Values.Sum(list => list.Length);

            if (n == 0)
            {
                return(new Dictionary <string, List <int> >());
            }

            var breakpointsByChr = new Dictionary <string, List <int> >();

            tasks = coverageByChr.Keys.Select(chr => new ThreadStart(() =>
            {
                var breakpoints = new List <int>();
                // to cover cases of no SNVs present (i.e. chrY) => chromosome becomes one segment
                int segmentLengthByChr = Math.Max(coverageByChr[chr].Length, 1);
                if (segmentLengthByChr > _parameters.MinSize)
                {
                    WaveletSegmentation.HaarWavelets(coverageByChr[chr], _parameters.ThresholdLower,
                                                     _parameters.ThresholdUpper,
                                                     breakpoints, _parameters.IsGermline, _parameters.MadFactor,
                                                     CV, factorOfThreeCMADs, chr);
                }

                lock (breakpointsByChr)
                {
                    breakpointsByChr[chr] = breakpoints;
                }
            })).ToList();

            Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now);
            Parallel.ForEach(tasks, task => task.Invoke());
            Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(breakpointsByChr);
        }
Exemple #2
0
        /// <summary>
        /// Wavelets: unbalanced HAAR wavelets segmentation
        /// </summary>
        /// <param name="threshold">wavelets coefficient threshold</param>
        private void Wavelets(bool isGermline, double thresholdLower = 5, double thresholdUpper = 80, int minSize = 10, int verbose = 1)
        {
            Dictionary <string, int[]>    inaByChr          = new Dictionary <string, int[]>();
            Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>();

            List <ThreadStart> tasks = new List <ThreadStart>();

            foreach (KeyValuePair <string, double[]> scoreByChrKVP in ScoreByChr)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    string chr = scoreByChrKVP.Key;
                    int[] ina;
                    Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf

                    double[] scores;
                    if (ina.Length == scoreByChrKVP.Value.Length)
                    {
                        scores = scoreByChrKVP.Value;
                    }
                    else
                    {
                        Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                    }

                    lock (finiteScoresByChr)
                    {
                        finiteScoresByChr[chr] = scores;
                        inaByChr[chr]          = ina;
                    }
                }));
            }
            Isas.Shared.Utilities.DoWorkParallelThreads(tasks);
            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = 0;

            foreach (var list in finiteScoresByChr.Values)
            {
                n += list.Length;
            }
            if (n == 0)
            {
                this.SegmentationResults = this.GetDummySegmentationResults();
                return;
            }

            Dictionary <string, Segment[]> segmentByChr = new Dictionary <string, Segment[]>();

            // when parallelizing we need an RNG for each chromosome to get deterministic results
            Random seedGenerator = new MersenneTwister(0);
            Dictionary <string, Random> perChromosomeRandom = new Dictionary <string, Random>();

            foreach (string chr in this.ScoreByChr.Keys)
            {
                perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true);
            }

            tasks = new List <ThreadStart>();
            foreach (string chr in ScoreByChr.Keys)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    int[] ina = inaByChr[chr];
                    List <int> breakpoints = new List <int>();
                    int sizeScoreByChr     = this.ScoreByChr[chr].Length;
                    if (sizeScoreByChr > minSize)
                    {
                        WaveletSegmentation.HaarWavelets(this.ScoreByChr[chr].ToArray(), thresholdLower, thresholdUpper, breakpoints, isGermline);
                    }

                    List <int> startBreakpointsPos = new List <int>();
                    List <int> endBreakpointPos    = new List <int>();
                    List <int> lengthSeg           = new List <int>();

                    if (breakpoints.Count() >= 2 && sizeScoreByChr > 10)
                    {
                        startBreakpointsPos.Add(breakpoints[0]);
                        endBreakpointPos.Add(breakpoints[1] - 1);
                        lengthSeg.Add(breakpoints[1] - 1);

                        for (int i = 1; i < breakpoints.Count - 1; i++)
                        {
                            startBreakpointsPos.Add(breakpoints[i]);
                            endBreakpointPos.Add(breakpoints[i + 1] - 1);
                            lengthSeg.Add(breakpoints[i + 1] - 1 - breakpoints[i]);
                        }
                        startBreakpointsPos.Add(breakpoints[breakpoints.Count - 1]);
                        endBreakpointPos.Add(sizeScoreByChr - 1);
                        lengthSeg.Add(sizeScoreByChr - breakpoints[breakpoints.Count - 1] - 1);
                    }
                    else
                    {
                        startBreakpointsPos.Add(0);
                        endBreakpointPos.Add(sizeScoreByChr - 1);
                        lengthSeg.Add(sizeScoreByChr - 1);
                    }
                    // estimate segment means

                    double[] segmentMeans = new double[lengthSeg.Count()];
                    int ss = 0, ee = 0;
                    for (int i = 0; i < lengthSeg.Count(); i++)
                    {
                        ee += lengthSeg[i];
                        // Works even if weights == null
                        segmentMeans[i] = Helper.WeightedAverage(this.ScoreByChr[chr], null, iStart: ss, iEnd: ee);
                        ss = ee;
                    }

                    Segment[] segments = new Segment[startBreakpointsPos.Count];
                    for (int i = 0; i < startBreakpointsPos.Count; i++)
                    {
                        int start            = startBreakpointsPos[i];
                        int end              = endBreakpointPos[i];
                        segments[i]          = new Segment();
                        segments[i].start    = this.StartByChr[chr][start]; // Genomic start
                        segments[i].end      = this.EndByChr[chr][end];     // Genomic end
                        segments[i].nMarkers = lengthSeg[i];
                        segments[i].mean     = segmentMeans[i];
                    }

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }));
            }
            Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now);
            Isas.Shared.Utilities.DoWorkParallelThreads(tasks);
            Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now);
            this.SegmentationResults = new GenomeSegmentationResults(segmentByChr);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
        }
Exemple #3
0
        /// <summary>
        /// Wavelets: unbalanced HAAR wavelets segmentation
        /// </summary>
        /// <param name="threshold">wavelets coefficient threshold</param>
        public Dictionary <string, Segmentation.Segment[]> Run(Segmentation segmentation)
        {
            Dictionary <string, int[]>    inaByChr          = new Dictionary <string, int[]>();
            Dictionary <string, double[]> finiteScoresByChr = new Dictionary <string, double[]>();

            List <ThreadStart> tasks = new List <ThreadStart>();

            foreach (KeyValuePair <string, double[]> scoreByChrKVP in segmentation.ScoreByChr)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    string chr = scoreByChrKVP.Key;
                    int[] ina;
                    Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf

                    double[] scores;
                    if (ina.Length == scoreByChrKVP.Value.Length)
                    {
                        scores = scoreByChrKVP.Value;
                    }
                    else
                    {
                        Helper.ExtractValues <double>(scoreByChrKVP.Value, ina, out scores);
                    }

                    lock (finiteScoresByChr)
                    {
                        finiteScoresByChr[chr] = scores;
                        inaByChr[chr]          = ina;
                    }
                }));
            }
            Parallel.ForEach(tasks, task => task.Invoke());
            // Quick sanity-check: If we don't have any segments, then return a dummy result.
            int n = finiteScoresByChr.Values.Sum(list => list.Length);

            if (n == 0)
            {
                return(new Dictionary <string, Segmentation.Segment[]>());
            }

            Dictionary <string, Segmentation.Segment[]> segmentByChr = new Dictionary <string, Segmentation.Segment[]>();

            // load common CNV segments
            Dictionary <string, List <SampleGenomicBin> > commonCNVintervals = null;

            if (_parameters.CommonCnVs != null)
            {
                commonCNVintervals = CanvasCommon.Utilities.LoadBedFile(_parameters.CommonCnVs);
                CanvasCommon.Utilities.SortAndOverlapCheck(commonCNVintervals, _parameters.CommonCnVs);
            }

            tasks = new List <ThreadStart>();
            foreach (string chr in segmentation.ScoreByChr.Keys)
            {
                tasks.Add(new ThreadStart(() =>
                {
                    List <int> breakpoints = new List <int>();
                    int sizeScoreByChr     = segmentation.ScoreByChr[chr].Length;
                    if (sizeScoreByChr > _parameters.MinSize)
                    {
                        WaveletSegmentation.HaarWavelets(segmentation.ScoreByChr[chr], _parameters.ThresholdLower, _parameters.ThresholdUpper,
                                                         breakpoints, _parameters.IsGermline, madFactor: _parameters.MadFactor);
                    }

                    if (_parameters.CommonCnVs != null)
                    {
                        if (commonCNVintervals.ContainsKey(chr))
                        {
                            List <SampleGenomicBin> remappedCommonCNVintervals = Segmentation.RemapCommonRegions(commonCNVintervals[chr], segmentation.StartByChr[chr], segmentation.EndByChr[chr]);
                            List <int> oldbreakpoints = breakpoints;
                            breakpoints = Segmentation.OverlapCommonRegions(oldbreakpoints, remappedCommonCNVintervals);
                        }
                    }

                    var segments = Segmentation.DeriveSegments(breakpoints, sizeScoreByChr, segmentation.StartByChr[chr], segmentation.EndByChr[chr]);

                    lock (segmentByChr)
                    {
                        segmentByChr[chr] = segments;
                    }
                }));
            }
            Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now);
            Parallel.ForEach(tasks, task => task.Invoke());
            Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now);
            Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
            return(segmentByChr);
        }