private PairStatus SingleReadStatus(BamAlignment alignment)
        {
            if ((alignment.RefID != alignment.MateRefID && alignment.IsPaired()))
            {
                return(PairStatus.SplitChromosomes);                                                                  // Stitched reads will have split ref ids too but not the same thing
            }
            if (((!alignment.IsMateMapped() && alignment.RefID == -1) || (!alignment.IsMapped() && alignment.MateRefID == -1)))
            {
                return(PairStatus.MateUnmapped);
            }
            if (alignment.IsDuplicate())
            {
                return(PairStatus.Duplicate);
            }

            if (_considerInsertSize)
            {
                if (alignment.IsPaired() && !OverlapsMate(alignment))
                {
                    return(PairStatus.LongFragment);
                }
            }
            return(PairStatus.Unknown);
        }
Ejemplo n.º 2
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex,
                                               HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped())
                {
                    return;
                }
                if (!alignment.IsMateMapped())
                {
                    return;
                }
                if (!alignment.IsPrimaryAlignment())
                {
                    return;
                }
                if (!(alignment.IsPaired() && alignment.IsProperPair()))
                {
                    return;
                }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality)
                {
                    return;
                }

                if (alignment.RefID != alignment.MateRefID)
                {
                    return;
                }                                                       // does this ever happen?

                if (IsRightMostInPair(alignment))
                {
                    return;
                }                                             // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0)
                {
                    return;
                }                                              // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position;                                         // 0-based, inclusive
                int fragmentStop  = alignment.Position + alignment.FragmentLength;              // 0-based, exclusive

                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count)
                {
                    return;
                }                                            // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);

                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }
Ejemplo n.º 3
0
            /// <summary>
            /// Bins fragments.
            /// </summary>
            private void binFragments()
            {
                // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome!
                if (!Bam.Index.Exists)
                {
                    throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName));
                }

                long pairedAlignmentCount = 0; // keep track of paired alignments
                usableFragmentCount = 0;
                using (BamReader reader = new BamReader(Bam.BamFile.FullName))
                {
                    int desiredRefIndex = -1;
                    desiredRefIndex = reader.GetReferenceIndex(Chromosome);
                    if (desiredRefIndex == -1)
                    {
                        throw new ApplicationException(
                            string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName));
                    }
                    bool result = reader.Jump(desiredRefIndex, 0);
                    if (!result)
                    {
                        // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                        // .bam file.  That is not uncommon e.g. for truseq amplicon.
                        return;
                    }

                    Dictionary<string, int> readNameToBinIndex = new Dictionary<string, int>();
                    HashSet<string> samePositionReadNames = new HashSet<string>();
                    int binIndexStart = 0;
                    int prevPosition = -1;
                    BamAlignment alignment = new BamAlignment();
                    while (reader.GetNextAlignment(ref alignment, true))
                    {
                        int refID = alignment.RefID;

                        // quit if the current reference index is different from the desired reference index
                        if (refID != desiredRefIndex)
                            break;

                        if (refID == -1)
                            continue;

                        if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted
                        {
                            throw new ApplicationException(
                                string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name));
                        }
                        prevPosition = alignment.Position;

                        if (alignment.IsPaired()) { pairedAlignmentCount++; }

                        BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex,
                            samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart);
                    }
                }
                if (pairedAlignmentCount == 0)
                {
                    throw new ApplicationException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName));
                }
            }
Ejemplo n.º 4
0
            /// <summary>
            /// Bins fragments.
            /// </summary>
            private void binFragments()
            {
                // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome!
                if (!Bam.Index.Exists)
                {
                    throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName));
                }

                long pairedAlignmentCount = 0; // keep track of paired alignments

                usableFragmentCount = 0;
                using (BamReader reader = new BamReader(Bam.BamFile.FullName))
                {
                    int desiredRefIndex = -1;
                    desiredRefIndex = reader.GetReferenceIndex(Chromosome);
                    if (desiredRefIndex == -1)
                    {
                        throw new Illumina.Common.IlluminaException(
                                  string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName));
                    }
                    bool result = reader.Jump(desiredRefIndex, 0);
                    if (!result)
                    {
                        // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                        // .bam file.  That is not uncommon e.g. for truseq amplicon.
                        return;
                    }

                    Dictionary <string, int> readNameToBinIndex    = new Dictionary <string, int>();
                    HashSet <string>         samePositionReadNames = new HashSet <string>();
                    int          binIndexStart = 0;
                    int          prevPosition  = -1;
                    BamAlignment alignment     = new BamAlignment();
                    while (reader.GetNextAlignment(ref alignment, true))
                    {
                        int refID = alignment.RefID;

                        // quit if the current reference index is different from the desired reference index
                        if (refID != desiredRefIndex)
                        {
                            break;
                        }

                        if (refID == -1)
                        {
                            continue;
                        }

                        if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted
                        {
                            throw new Illumina.Common.IlluminaException(
                                      string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name));
                        }
                        prevPosition = alignment.Position;

                        if (alignment.IsPaired())
                        {
                            pairedAlignmentCount++;
                        }

                        BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex,
                                        samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart);
                    }
                }
                if (pairedAlignmentCount == 0)
                {
                    throw new Illumina.Common.IlluminaException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName));
                }
            }
Ejemplo n.º 5
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex,
                HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped()) { return; }
                if (!alignment.IsMateMapped()) { return; }
                if (!alignment.IsPrimaryAlignment()) { return; }
                if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality) { return; }

                if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen?

                if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position; // 0-based, inclusive
                int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive
                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);
                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }