Пример #1
0
        protected override bool ShouldBlacklistReadIndexer(BamAlignment alignment)
        {
            if (_filterPairLowMapQ)
            {
                if (alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality)
                {
                    return(true);
                }
            }
            if (_filterPairUnmapped)
            {
                if (!alignment.IsMapped())
                {
                    _statusCounter.AddDebugStatusCount("Skipped not mapped");
                    return(true);
                }
                if (!alignment.IsMateMapped())
                {
                    _statusCounter.AddDebugStatusCount("Skipped mate not mapped");
                    return(true);
                }
            }
            // Only check if read is duplicate once (otherwise de novo dup finder will falsely mark dup because it has seen this read before!)
            // Blacklist rather than just skipping because if one mate is duplicate, we presume the other one is too.
            // Note: This breaks down is if we have a fusion read and the first mate we see is not a duplicate and the second mate is. In our case,
            // (if we are not trying to mate fusions) we will flush the first mate to bam without knowing that the second mate is a dup.
            // This is a highly unlikely degenerate case.
            var isDuplicate = ReadIsDuplicate(alignment);

            if (isDuplicate)
            {
                _statusCounter.AddStatusCount("Blacklisted Duplicates");
            }
            return(isDuplicate);
        }
Пример #2
0
        private static void AddReadLevelIndelMetrics(BamAlignment bamAlignment, bool isReputable, bool stitched, IndelEvidence indelMetrics,
                                                     bool isRepeat)
        {
            indelMetrics.Observations++;
            if (stitched)
            {
                indelMetrics.Stitched++;
            }
            else
            {
                if (bamAlignment.IsReverseStrand())
                {
                    indelMetrics.Reverse++;
                }
                else
                {
                    indelMetrics.Forward++;
                }
            }

            if (isReputable)
            {
                indelMetrics.ReputableSupport++;
            }

            if (isRepeat)
            {
                indelMetrics.IsRepeat++;
            }

            if (!bamAlignment.IsMateMapped() || bamAlignment.MateRefID != bamAlignment.RefID)
            {
                indelMetrics.IsSplit++;
            }
        }
Пример #3
0
        private void AdjustMates(string tmpFile, BamWriter writer)
        {
            // Second pass: Adjust flags on mates
            Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count);
            var read = new BamAlignment();

            using (var reader = new BamReader(tmpFile))
            {
                while (true)
                {
                    var result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }

                    // Adjust flags as needed:
                    var       mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1);
                    RemapInfo info;

                    if (!_remappings.TryGetValue(mateKey, out info))
                    {
                        writer.WriteAlignment(read);
                        continue;
                    }

                    if (info.Start == -1)
                    {
                        read.SetIsMateUnmapped(true);
                        read.SetIsProperPair(false);
                        read.FragmentLength = 0;
                    }
                    else
                    {
                        read.MatePosition = info.Start;
                    }
                    if (read.IsMateMapped() && read.IsProperPair())
                    {
                        int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1;
                        // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs
                        read.FragmentLength = (read.Position < info.Start
                            ? info.End - read.Position + 1
                            : info.Start - readEnd - 1);
                    }

                    writer.WriteAlignment(read);
                }
            }
        }
Пример #4
0
        private bool MayOverlapMate(BamAlignment alignment)
        {
            if (!alignment.IsMateMapped())
            {
                return(false);
            }
            if (!alignment.IsMapped())
            {
                return(false);
            }
            if (alignment.RefID != alignment.MateRefID)
            {
                return(false);
            }
            if (Math.Abs(alignment.Position - alignment.MatePosition) > _maxPairGap)
            {
                return(false);
            }

            return(true);
        }
        private PairStatus SingleReadStatus(BamAlignment alignment)
        {
            if ((alignment.RefID != alignment.MateRefID && alignment.IsPaired()))
            {
                return(PairStatus.SplitChromosomes);                                                                  // Stitched reads will have split ref ids too but not the same thing
            }
            if (((!alignment.IsMateMapped() && alignment.RefID == -1) || (!alignment.IsMapped() && alignment.MateRefID == -1)))
            {
                return(PairStatus.MateUnmapped);
            }
            if (alignment.IsDuplicate())
            {
                return(PairStatus.Duplicate);
            }

            if (_considerInsertSize)
            {
                if (alignment.IsPaired() && !OverlapsMate(alignment))
                {
                    return(PairStatus.LongFragment);
                }
            }
            return(PairStatus.Unknown);
        }
        protected override bool ShouldBlacklistReadIndexer(BamAlignment alignment)
        {
            if (_filterPairLowMapQ)
            {
                if (alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality)
                {
                    return(true);
                }
            }
            if (_filterPairUnmapped)
            {
                // Need to check mapped flag in addition to refid because some pairs have one mate mapped and one mate mapped right next to it but with mapq 0 and with mapping(chr: pos) information. This allows us to distinguish those from truly unmapped("don't know what the heck to do with this") reads
                if (!alignment.IsMapped() && alignment.RefID == -1)
                {
                    _statusCounter.AddDebugStatusCount("Skipped not mapped");
                    return(true);
                }
                if (!alignment.IsMateMapped() && alignment.MateRefID == -1)
                {
                    _statusCounter.AddDebugStatusCount("Skipped mate not mapped");
                    return(true);
                }
            }
            // Only check if read is duplicate once (otherwise de novo dup finder will falsely mark dup because it has seen this read before!)
            // Blacklist rather than just skipping because if one mate is duplicate, we presume the other one is too.
            // Note: This breaks down is if we have a fusion read and the first mate we see is not a duplicate and the second mate is. In our case,
            // (if we are not trying to mate fusions) we will flush the first mate to bam without knowing that the second mate is a dup.
            // This is a highly unlikely degenerate case.
            var isDuplicate = ReadIsDuplicate(alignment);

            if (isDuplicate)
            {
                _statusCounter.AddStatusCount("Blacklisted Duplicates");
            }
            return(isDuplicate);
        }
Пример #7
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex,
                                               HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped())
                {
                    return;
                }
                if (!alignment.IsMateMapped())
                {
                    return;
                }
                if (!alignment.IsPrimaryAlignment())
                {
                    return;
                }
                if (!(alignment.IsPaired() && alignment.IsProperPair()))
                {
                    return;
                }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality)
                {
                    return;
                }

                if (alignment.RefID != alignment.MateRefID)
                {
                    return;
                }                                                       // does this ever happen?

                if (IsRightMostInPair(alignment))
                {
                    return;
                }                                             // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0)
                {
                    return;
                }                                              // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position;                                         // 0-based, inclusive
                int fragmentStop  = alignment.Position + alignment.FragmentLength;              // 0-based, exclusive

                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count)
                {
                    return;
                }                                            // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);

                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }
Пример #8
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex,
                HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped()) { return; }
                if (!alignment.IsMateMapped()) { return; }
                if (!alignment.IsPrimaryAlignment()) { return; }
                if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality) { return; }

                if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen?

                if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position; // 0-based, inclusive
                int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive
                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);
                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }