Exemplo n.º 1
0
        protected override bool ShouldSkipRead(BamAlignment alignment)
        {
            if (!alignment.IsMapped())
            {
                _statusCounter.AddDebugStatusCount("Skipped not mapped");
                return(true);
            }
            if (alignment.IsSupplementaryAlignment())
            {
                _statusCounter.AddDebugStatusCount("Skipped supplementary");
                return(true);
            }
            if (_filterForProperPairs && !alignment.IsProperPair())
            {
                _statusCounter.AddDebugStatusCount("Skipped improper pair");
                return(true);
            }
            if (alignment.MapQuality < _minMapQuality)
            {
                _statusCounter.AddDebugStatusCount("Skipped low map quality");
                return(true);
            }

            return(false);
        }
Exemplo n.º 2
0
        protected override bool ShouldBlacklistReadIndexer(BamAlignment alignment)
        {
            if (_filterPairLowMapQ)
            {
                if (alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality)
                {
                    return(true);
                }
            }
            if (_filterPairUnmapped)
            {
                if (!alignment.IsMapped())
                {
                    _statusCounter.AddDebugStatusCount("Skipped not mapped");
                    return(true);
                }
                if (!alignment.IsMateMapped())
                {
                    _statusCounter.AddDebugStatusCount("Skipped mate not mapped");
                    return(true);
                }
            }
            // Only check if read is duplicate once (otherwise de novo dup finder will falsely mark dup because it has seen this read before!)
            // Blacklist rather than just skipping because if one mate is duplicate, we presume the other one is too.
            // Note: This breaks down is if we have a fusion read and the first mate we see is not a duplicate and the second mate is. In our case,
            // (if we are not trying to mate fusions) we will flush the first mate to bam without knowing that the second mate is a dup.
            // This is a highly unlikely degenerate case.
            var isDuplicate = ReadIsDuplicate(alignment);

            if (isDuplicate)
            {
                _statusCounter.AddStatusCount("Blacklisted Duplicates");
            }
            return(isDuplicate);
        }
Exemplo n.º 3
0
        public void FromBam()
        {
            var alignment = new BamAlignment
            {
                Bases        = "ATCTTA",
                Position     = 100,
                MatePosition = 500,
                Name         = "test",
                CigarData    = new CigarAlignment("5M1S"),
                MapQuality   = 10,
                Qualities    = new[] { (byte)10, (byte)20, (byte)30 }
            };

            alignment.SetIsDuplicate(true);
            alignment.SetIsProperPair(true);
            alignment.SetIsSecondaryAlignment(true);
            alignment.SetIsUnmapped(true);

            var read = new Read("chr1", alignment);

            Assert.Equal(read.Chromosome, "chr1");
            Assert.Equal(read.Sequence, alignment.Bases);
            Assert.Equal(read.Position, alignment.Position + 1);
            Assert.Equal(read.MatePosition, alignment.MatePosition + 1);
            Assert.Equal(read.Name, alignment.Name);
            Assert.Equal(read.CigarData, alignment.CigarData);
            Assert.Equal(read.IsMapped, alignment.IsMapped());
            Assert.Equal(read.IsProperPair, alignment.IsProperPair());
            Assert.Equal(read.IsPrimaryAlignment, alignment.IsPrimaryAlignment());
            Assert.Equal(read.IsPcrDuplicate, alignment.IsDuplicate());

            foreach (var direction in read.SequencedBaseDirectionMap)
            {
                Assert.Equal(direction, DirectionType.Forward);
            }

            for (var i = 0; i < read.Qualities.Length; i++)
            {
                Assert.Equal(read.Qualities[i], alignment.Qualities[i]);
            }
        }
Exemplo n.º 4
0
        private bool MayOverlapMate(BamAlignment alignment)
        {
            if (!alignment.IsMateMapped())
            {
                return(false);
            }
            if (!alignment.IsMapped())
            {
                return(false);
            }
            if (alignment.RefID != alignment.MateRefID)
            {
                return(false);
            }
            if (Math.Abs(alignment.Position - alignment.MatePosition) > _maxPairGap)
            {
                return(false);
            }

            return(true);
        }
        private PairStatus SingleReadStatus(BamAlignment alignment)
        {
            if ((alignment.RefID != alignment.MateRefID && alignment.IsPaired()))
            {
                return(PairStatus.SplitChromosomes);                                                                  // Stitched reads will have split ref ids too but not the same thing
            }
            if (((!alignment.IsMateMapped() && alignment.RefID == -1) || (!alignment.IsMapped() && alignment.MateRefID == -1)))
            {
                return(PairStatus.MateUnmapped);
            }
            if (alignment.IsDuplicate())
            {
                return(PairStatus.Duplicate);
            }

            if (_considerInsertSize)
            {
                if (alignment.IsPaired() && !OverlapsMate(alignment))
                {
                    return(PairStatus.LongFragment);
                }
            }
            return(PairStatus.Unknown);
        }
        protected override bool ShouldBlacklistReadIndexer(BamAlignment alignment)
        {
            if (_filterPairLowMapQ)
            {
                if (alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality)
                {
                    return(true);
                }
            }
            if (_filterPairUnmapped)
            {
                // Need to check mapped flag in addition to refid because some pairs have one mate mapped and one mate mapped right next to it but with mapq 0 and with mapping(chr: pos) information. This allows us to distinguish those from truly unmapped("don't know what the heck to do with this") reads
                if (!alignment.IsMapped() && alignment.RefID == -1)
                {
                    _statusCounter.AddDebugStatusCount("Skipped not mapped");
                    return(true);
                }
                if (!alignment.IsMateMapped() && alignment.MateRefID == -1)
                {
                    _statusCounter.AddDebugStatusCount("Skipped mate not mapped");
                    return(true);
                }
            }
            // Only check if read is duplicate once (otherwise de novo dup finder will falsely mark dup because it has seen this read before!)
            // Blacklist rather than just skipping because if one mate is duplicate, we presume the other one is too.
            // Note: This breaks down is if we have a fusion read and the first mate we see is not a duplicate and the second mate is. In our case,
            // (if we are not trying to mate fusions) we will flush the first mate to bam without knowing that the second mate is a dup.
            // This is a highly unlikely degenerate case.
            var isDuplicate = ReadIsDuplicate(alignment);

            if (isDuplicate)
            {
                _statusCounter.AddStatusCount("Blacklisted Duplicates");
            }
            return(isDuplicate);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount = 0;
            int nextVariantIndex = 0;
            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read = new BamAlignment();
                int refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result) break;
                    if (!read.HasPosition() || read.RefID > refID) break; // We're past our chromosome of interest.
                    if (read.RefID < refID) continue; // We're not yet on our chromosome of interest.
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment()) continue;
                    if (!read.IsMapped()) continue;
                    if (read.IsDuplicate()) continue;
                    if (read.MapQuality <= MinimumMapQ) continue;

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count) break;

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) continue;

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount     = 0;
            int nextVariantIndex = 0;

            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read  = new BamAlignment();
                int          refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }
                    if (!read.HasPosition() || read.RefID > refID)
                    {
                        break;                                            // We're past our chromosome of interest.
                    }
                    if (read.RefID < refID)
                    {
                        continue;                     // We're not yet on our chromosome of interest.
                    }
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment())
                    {
                        continue;
                    }
                    if (!read.IsMapped())
                    {
                        continue;
                    }
                    if (read.IsDuplicate())
                    {
                        continue;
                    }
                    if (read.MapQuality <= MinimumMapQ)
                    {
                        continue;
                    }

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count)
                    {
                        break;
                    }

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition)
                    {
                        continue;
                    }

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Exemplo n.º 9
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex,
                                               HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped())
                {
                    return;
                }
                if (!alignment.IsMateMapped())
                {
                    return;
                }
                if (!alignment.IsPrimaryAlignment())
                {
                    return;
                }
                if (!(alignment.IsPaired() && alignment.IsProperPair()))
                {
                    return;
                }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality)
                {
                    return;
                }

                if (alignment.RefID != alignment.MateRefID)
                {
                    return;
                }                                                       // does this ever happen?

                if (IsRightMostInPair(alignment))
                {
                    return;
                }                                             // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0)
                {
                    return;
                }                                              // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position;                                         // 0-based, inclusive
                int fragmentStop  = alignment.Position + alignment.FragmentLength;              // 0-based, exclusive

                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count)
                {
                    return;
                }                                            // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);

                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }
Exemplo n.º 10
0
        /// <summary>
        ///     Updates the index with respect to the current alignment
        /// </summary>
        /// <returns>false if multiple reads without coordinates are encountered</returns>
        public bool UpdateReferenceIndex(ref BamAlignment alignment, ulong offset)
        {
            // record the number of unaligned reads
            if (alignment.RefID < 0) ++_numUnalignedWithoutCoordinates;

            // update the reference IDs and check that the alignment is sorted
            if (alignment.RefID != _lastRefID)
            {
                _lastRefID = alignment.RefID;
                _lastBin = uint.MaxValue;
            }
            else if (alignment.Position < _lastPosition)
            {
                throw new ApplicationException(
                    string.Format(
                        "ERROR: The BAM file is not sorted. An alignment ({0}) occurred before the preceding alignment ({1}).",
                        alignment.Position, _lastPosition));
            }

            if (alignment.RefID >= 0) AddOffset(ref _index[alignment.RefID].OffsetList, ref alignment, _lastOffset);

            if (alignment.Bin != _lastBin)
            {
                if (_saveBin != uint.MaxValue)
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, _saveBin, _saveOffset, _lastOffset);
                if ((_lastBin == uint.MaxValue) && (_saveRefID != int.MinValue))
                {
                    _endOffset = _lastOffset;
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, BamMaxBin, _beginOffset, _endOffset);
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, BamMaxBin, _numAligned, _numUnaligned);
                    _numAligned = _numUnaligned = 0;
                    _beginOffset = _endOffset;
                }

                _saveOffset = _lastOffset;
                _saveBin = _lastBin = alignment.Bin;
                _saveRefID = alignment.RefID;

                if (_saveRefID < 0)
                {
                    _hasUnalignedReads = true;
                    return false;
                }
            }

            if (offset <= _lastOffset)
            {
                throw new ApplicationException(
                    "ERROR: While updating the BAM index, the offset did not increase after processing the last alignment.");
            }

            if (alignment.IsMapped()) ++_numAligned;
            else ++_numUnaligned;

            _lastOffset = offset;
            _lastPosition = alignment.Position;

            return true;
        }
Exemplo n.º 11
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";

            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                              string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                                            bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int          readCount     = 0;
                int          keptReadCount = 0;
                string       header        = reader.GetHeader();
                BamAlignment alignment     = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped())
                    {
                        continue;
                    }
                    if (alignment.IsFailedQC())
                    {
                        continue;
                    }
                    if (alignment.IsDuplicate())
                    {
                        continue;
                    }
                    if (alignment.IsReverseStrand())
                    {
                        continue;
                    }
                    if (!alignment.IsMainAlignment())
                    {
                        continue;
                    }

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35)
                    {
                        continue;
                    }

                    if (isPairedEnd && !alignment.IsProperPair())
                    {
                        continue;
                    }

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                    {
                        break;
                    }

                    if (refID == -1)
                    {
                        continue;
                    }

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    {
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                    }
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }
Exemplo n.º 12
0
        /// <summary>
        ///     Updates the index with respect to the current alignment
        /// </summary>
        /// <returns>false if multiple reads without coordinates are encountered</returns>
        public bool UpdateReferenceIndex(ref BamAlignment alignment, ulong offset)
        {
            // record the number of unaligned reads
            if (alignment.RefID < 0)
            {
                ++_numUnalignedWithoutCoordinates;
            }

            // update the reference IDs and check that the alignment is sorted
            if (alignment.RefID != _lastRefID)
            {
                _lastRefID = alignment.RefID;
                _lastBin   = int.MaxValue;
            }
            else if (alignment.Position < _lastPosition)
            {
                throw new InvalidDataException(
                          string.Format(
                              "ERROR: The BAM file is not sorted. An alignment ({0}:{1}) occurred before the preceding alignment ({2}:{3}).",
                              alignment.RefID, alignment.Position, _lastRefID, _lastPosition));
            }

            if (alignment.RefID >= 0)
            {
                AddOffset(ref _index[alignment.RefID].OffsetList, ref alignment, _lastOffset);
            }

            if (alignment.Bin != _lastBin)
            {
                if (_saveBin != uint.MaxValue)
                {
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, _saveBin, _saveOffset, _lastOffset);
                }
                if ((_lastBin == uint.MaxValue) && (_saveRefID != int.MinValue))
                {
                    _endOffset = _lastOffset;
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, BamMaxBin, _beginOffset, _endOffset);
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, BamMaxBin, _numAligned, _numUnaligned);
                    _numAligned  = _numUnaligned = 0;
                    _beginOffset = _endOffset;
                }

                _saveOffset = _lastOffset;
                _saveBin    = _lastBin = alignment.Bin;
                _saveRefID  = alignment.RefID;

                if (_saveRefID < 0)
                {
                    _hasUnalignedReads = true;
                    return(false);
                }
            }

            if (offset <= _lastOffset)
            {
                throw new InvalidDataException(
                          "ERROR: While updating the BAM index, the offset did not increase after processing the last alignment.");
            }

            if (alignment.IsMapped())
            {
                ++_numAligned;
            }
            else
            {
                ++_numUnaligned;
            }

            _lastOffset   = offset;
            _lastPosition = alignment.Position;

            return(true);
        }
Exemplo n.º 13
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";
            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                        string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                        bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this 
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int readCount = 0;
                int keptReadCount = 0;
                string header = reader.GetHeader();
                BamAlignment alignment = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped()) continue;
                    if (alignment.IsFailedQC()) continue;
                    if (alignment.IsDuplicate()) continue;
                    if (alignment.IsReverseStrand()) continue;
                    if (!alignment.IsMainAlignment()) continue;

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) continue;

                    if (isPairedEnd && !alignment.IsProperPair()) continue;

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                        break;

                    if (refID == -1)
                        continue;

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }
Exemplo n.º 14
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex,
                HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped()) { return; }
                if (!alignment.IsMateMapped()) { return; }
                if (!alignment.IsPrimaryAlignment()) { return; }
                if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality) { return; }

                if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen?

                if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position; // 0-based, inclusive
                int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive
                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);
                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }