GetOnTargetBins() public static method

Get the on-target bins by intersecting the manifest.
public static GetOnTargetBins ( IEnumerable bins, NexteraManifest manifest ) : IEnumerable
bins IEnumerable
manifest NexteraManifest
return IEnumerable
Ejemplo n.º 1
0
        /// <summary>
        /// Assumes the bins are sorted by genomic coordinates
        /// </summary>
        /// <param name="bins">Bins whose counts are to be normalized</param>
        /// <param name="countsByGC">An array of lists. Each array element (0-100) will hold a list of counts whose bins have the same GC content.</param>
        /// <param name="counts">Will hold all of the autosomal counts present in 'bins'</param>
        public static void GetCountsByGC(List <SampleGenomicBin> bins, NexteraManifest manifest, out List <float>[] countsByGC, out List <float> counts)
        {
            countsByGC = new List <float> [numberOfGCbins];
            counts     = new List <float>(bins.Count);

            // Initialize the lists
            for (int i = 0; i < countsByGC.Length; i++)
            {
                countsByGC[i] = new List <float>();
            }

            foreach (SampleGenomicBin bin in manifest == null ? bins : EnrichmentUtilities.GetOnTargetBins(bins, manifest))
            {
                if (!GenomeMetadata.SequenceMetadata.IsAutosome(bin.GenomicBin.Chromosome))
                {
                    continue;
                }

                // Put the observed count in the GC-appropriate list.
                countsByGC[bin.GenomicBin.GC].Add(bin.Count);

                // Add to the global list of counts.
                counts.Add(bin.Count);
            }
        }
Ejemplo n.º 2
0
        private void initialize()
        {
            IEnumerable <SampleGenomicBin> onTargetBins = manifest == null ? bins : EnrichmentUtilities.GetOnTargetBins(bins, manifest);

            List <double> x = new List <double>();
            List <double> y = new List <double>();

            withoutChrY = new List <int>();
            int i = 0; // index into x and y

            foreach (var bin in onTargetBins)
            {
                double count = countTransformer(bin.Count); // Variance stablization
                if (!double.IsInfinity(count))
                {
                    x.Add(bin.GenomicBin.GC);
                    y.Add(count);
                    string chrom  = bin.GenomicBin.Chromosome.ToLower();
                    bool   isChrY = chrom == "chry" || chrom == "y";
                    if (!isChrY)
                    {
                        withoutChrY.Add(i);
                    }
                    i++;
                }
            }

            gcs    = x.ToArray();
            counts = y.ToArray();
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Remove bins with extreme GC content.
        /// </summary>
        /// <param name="bins">Genomic bins in from which we filter out GC content outliers.</param>
        /// <param name="threshold">Minimum number of bins with the same GC content required to keep a bin.</param>
        ///
        /// The rationale of this function is that a GC normalization is performed by computing the median count
        /// for each possible GC value. If that count is small, then the corresponding normalization constant
        /// is unstable and we shouldn't use these data.
        static List <SampleGenomicBin> RemoveBinsWithExtremeGC(List <SampleGenomicBin> bins, int threshold, NexteraManifest manifest = null)
        {
            // Will hold outlier-removed bins.
            List <SampleGenomicBin> stripped = new List <SampleGenomicBin>();

            // used to count the number of bins with each possible GC content (0-100)
            int[]  counts     = new int[EnrichmentUtilities.numberOfGCbins];
            double totalCount = 0;

            foreach (SampleGenomicBin bin in manifest == null ? bins : EnrichmentUtilities.GetOnTargetBins(bins, manifest))
            {
                // We only count autosomal bins because these are the ones we computed normalization factor upon.
                if (!GenomeMetadata.SequenceMetadata.IsAutosome(bin.GenomicBin.Chromosome))
                {
                    continue;
                }

                counts[bin.GenomicBin.GC]++;
                totalCount++;
            }

            int averageCountPerGC = Math.Max(minNumberOfBinsPerGCForWeightedMedian, (int)(totalCount / counts.Length));

            threshold = Math.Min(threshold, averageCountPerGC);
            foreach (SampleGenomicBin bin in bins)
            {
                // Remove outlier (not a lot of bins with the same GC content)
                if (counts[bin.GenomicBin.GC] < threshold)
                {
                    continue;
                }
                stripped.Add(bin);
            }

            return(stripped);
        }