private PairStatus SingleReadStatus(BamAlignment alignment) { if ((alignment.RefID != alignment.MateRefID && alignment.IsPaired())) { return(PairStatus.SplitChromosomes); // Stitched reads will have split ref ids too but not the same thing } if (((!alignment.IsMateMapped() && alignment.RefID == -1) || (!alignment.IsMapped() && alignment.MateRefID == -1))) { return(PairStatus.MateUnmapped); } if (alignment.IsDuplicate()) { return(PairStatus.Duplicate); } if (_considerInsertSize) { if (alignment.IsPaired() && !OverlapsMate(alignment)) { return(PairStatus.LongFragment); } } return(PairStatus.Unknown); }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex, HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }
/// <summary> /// Bins fragments. /// </summary> private void binFragments() { // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome! if (!Bam.Index.Exists) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName)); } long pairedAlignmentCount = 0; // keep track of paired alignments usableFragmentCount = 0; using (BamReader reader = new BamReader(Bam.BamFile.FullName)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(Chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } Dictionary<string, int> readNameToBinIndex = new Dictionary<string, int>(); HashSet<string> samePositionReadNames = new HashSet<string>(); int binIndexStart = 0; int prevPosition = -1; BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) break; if (refID == -1) continue; if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted { throw new ApplicationException( string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name)); } prevPosition = alignment.Position; if (alignment.IsPaired()) { pairedAlignmentCount++; } BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex, samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart); } } if (pairedAlignmentCount == 0) { throw new ApplicationException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName)); } }
/// <summary> /// Bins fragments. /// </summary> private void binFragments() { // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome! if (!Bam.Index.Exists) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName)); } long pairedAlignmentCount = 0; // keep track of paired alignments usableFragmentCount = 0; using (BamReader reader = new BamReader(Bam.BamFile.FullName)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(Chromosome); if (desiredRefIndex == -1) { throw new Illumina.Common.IlluminaException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } Dictionary <string, int> readNameToBinIndex = new Dictionary <string, int>(); HashSet <string> samePositionReadNames = new HashSet <string>(); int binIndexStart = 0; int prevPosition = -1; BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted { throw new Illumina.Common.IlluminaException( string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name)); } prevPosition = alignment.Position; if (alignment.IsPaired()) { pairedAlignmentCount++; } BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex, samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart); } } if (pairedAlignmentCount == 0) { throw new Illumina.Common.IlluminaException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName)); } }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex, HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }