Beispiel #1
0
        /// <summary>
        /// Populate the list of GenomicBin objects for this chromosome.
        /// </summary>
        static void BinCountsForChromosome(BinTaskArguments arguments)
        {
            List <GenomicBin> bins = arguments.Bins;
            bool               usePredefinedBins  = bins.Any();
            int                predefinedBinIndex = 0;
            GenericRead        fastaEntry         = arguments.FastaEntry; //fastaEntryKVP.Value;
            BinState           currentBin         = new BinState();
            string             chr = arguments.Chromosome;
            BitArray           possibleAlignments = arguments.PossibleAlignments;
            HitArray           observedAlignments = arguments.ObservedAlignments;
            CanvasCoverageMode coverageMode       = arguments.CoverageMode;
            int                pos = usePredefinedBins ? bins[predefinedBinIndex].Start : 0;

            // Skip past leading Ns
            while (fastaEntry.Bases[pos].Equals('n'))
            {
                pos++;
            }
            List <float> binPositions    = new List <float>();
            List <int>   binObservations = new List <int>();

            for (; pos < fastaEntry.Bases.Length; pos++)
            {
                // Sets the start of the bin
                if (currentBin.StartPosition == -1)
                {
                    currentBin.StartPosition = pos;
                }

                if (!fastaEntry.Bases[pos].Equals("n"))
                {
                    currentBin.NucleotideCount++;
                }


                //if (Utilities.IsGC(fastaEntry.Bases[pos]))
                //    currentBin.GCCount++;
                switch (fastaEntry.Bases[pos])
                {
                case 'C':
                case 'c':
                case 'G':
                case 'g':
                    currentBin.GCCount++;
                    break;
                }

                if (possibleAlignments[pos])
                {
                    currentBin.PossibleCount++;
                    currentBin.ObservedCount += observedAlignments.Data[pos];
                    binObservations.Add(observedAlignments.Data[pos]);
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    {
                        binPositions.Add(arguments.ObservedVsExpectedGC[arguments.ReadGCContent[pos]]);
                    }
                }

                // We've seen the desired number of possible alignment positions.
                if ((!usePredefinedBins && currentBin.PossibleCount == arguments.BinSize) ||
                    (usePredefinedBins && pos == bins[predefinedBinIndex].Stop - 1))
                {
                    if (coverageMode == CanvasCoverageMode.TruncatedDynamicRange) // Truncated dynamic range
                    {
                        currentBin.ObservedCount = 0;
                        foreach (int Value in binObservations)
                        {
                            currentBin.ObservedCount += Math.Min(10, Value);
                        }
                    }
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted) // read GC content weighted
                    {
                        currentBin.ObservedCount = 0;
                        float tmpObservedCount = 0;
                        for (int i = 0; i < binObservations.Count; i++)
                        {
                            tmpObservedCount += Math.Min(10, (float)binObservations[i] / binPositions[i]);
                        }
                        currentBin.ObservedCount = (int)Math.Round(tmpObservedCount);
                    }

                    int gc = (int)(100 * currentBin.GCCount / currentBin.NucleotideCount);

                    if (usePredefinedBins)
                    {
                        bins[predefinedBinIndex].GC    = gc;
                        bins[predefinedBinIndex].Count = currentBin.ObservedCount;
                        predefinedBinIndex++;
                        if (predefinedBinIndex >= bins.Count)
                        {
                            break;
                        }                                         // we have processed all the bins
                        pos = bins[predefinedBinIndex].Start - 1; // jump to right before the next predefined bin
                    }
                    else
                    {
                        // Note the pos + 1 to make the first three conform to bed specification
                        GenomicBin bin = new GenomicBin(chr, currentBin.StartPosition, pos + 1, gc, currentBin.ObservedCount);
                        bins.Add(bin);
                    }

                    // Reset all relevant variables
                    currentBin.Reset();
                    binObservations.Clear();
                    binPositions.Clear();
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// Populate the list of GenomicBin objects for this chromosome.  
        /// </summary>
        static void BinCountsForChromosome(BinTaskArguments arguments)
        {
            List<GenomicBin> bins = arguments.Bins;
            bool usePredefinedBins = bins.Any();
            int predefinedBinIndex = 0;
            GenericRead fastaEntry = arguments.FastaEntry; //fastaEntryKVP.Value;
            BinState currentBin = new BinState();
            string chr = arguments.Chromosome;
            BitArray possibleAlignments = arguments.PossibleAlignments;
            HitArray observedAlignments = arguments.ObservedAlignments;
            CanvasCoverageMode coverageMode = arguments.CoverageMode;
            int pos = usePredefinedBins ? bins[predefinedBinIndex].Start : 0;

            // Skip past leading Ns
            while (fastaEntry.Bases[pos].Equals('n'))
                pos++;
            List<float> binPositions = new List<float>();
            List<int> binObservations = new List<int>();
            for (; pos < fastaEntry.Bases.Length; pos++)
            {
                // Sets the start of the bin
                if (currentBin.StartPosition == -1)
                    currentBin.StartPosition = pos;

                if (!fastaEntry.Bases[pos].Equals("n"))
                    currentBin.NucleotideCount++;


                //if (IsGC(fastaEntry.Bases[pos]))
                //    currentBin.GCCount++;
                switch (fastaEntry.Bases[pos])
                {
                    case 'C':
                    case 'c':
                    case 'G':
                    case 'g':
                        currentBin.GCCount++;
                        break;

                }

                if (possibleAlignments[pos])
                {
                    currentBin.PossibleCount++;
                    currentBin.ObservedCount += observedAlignments.Data[pos];
                    binObservations.Add(observedAlignments.Data[pos]);
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                        binPositions.Add(arguments.ObservedVsExpectedGC[arguments.ReadGCContent[pos]]);
                }

                // We've seen the desired number of possible alignment positions.
                if ((!usePredefinedBins && currentBin.PossibleCount == arguments.BinSize)
                    || (usePredefinedBins && pos == bins[predefinedBinIndex].Stop - 1))
                {
                    if (coverageMode == CanvasCoverageMode.TruncatedDynamicRange) // Truncated dynamic range
                    {
                        currentBin.ObservedCount = 0;
                        foreach (int Value in binObservations)
                        {
                            currentBin.ObservedCount += Math.Min(10, Value);
                        }
                    }
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted) // read GC content weighted 
                    {
                        currentBin.ObservedCount = 0;
                        float tmpObservedCount = 0;
                        for (int i = 0; i < binObservations.Count; i++)
                        {
                            tmpObservedCount += Math.Min(10, (float)binObservations[i] / binPositions[i]);
                        }
                        currentBin.ObservedCount = (int)Math.Round(tmpObservedCount);

                    }

                    int gc = (int)(100 * currentBin.GCCount / currentBin.NucleotideCount);

                    if (usePredefinedBins)
                    {
                        bins[predefinedBinIndex].GC = gc;
                        bins[predefinedBinIndex].Count = currentBin.ObservedCount;
                        predefinedBinIndex++;
                        if (predefinedBinIndex >= bins.Count) { break; } // we have processed all the bins
                        pos = bins[predefinedBinIndex].Start - 1; // jump to right before the next predefined bin
                    }
                    else
                    {
                        // Note the pos + 1 to make the first three conform to bed specification
                        GenomicBin bin = new GenomicBin(chr, currentBin.StartPosition, pos + 1, gc, currentBin.ObservedCount);
                        bins.Add(bin);
                    }

                    // Reset all relevant variables
                    currentBin.Reset();
                    binObservations.Clear();
                    binPositions.Clear();
                }
            }
        }