private bool ShouldSkipRead(BamAlignment alignment)
        {
            if (alignment.IsSupplementaryAlignment() || !alignment.IsPrimaryAlignment())
            {
                return(true);
            }

            if (_filterForProperPairs && !alignment.IsProperPair())
            {
                return(true);
            }

            return(false);
        }
Exemplo n.º 2
0
        public void FromBam()
        {
            var alignment = new BamAlignment
            {
                Bases        = "ATCTTA",
                Position     = 100,
                MatePosition = 500,
                Name         = "test",
                CigarData    = new CigarAlignment("5M1S"),
                MapQuality   = 10,
                Qualities    = new[] { (byte)10, (byte)20, (byte)30 }
            };

            alignment.SetIsDuplicate(true);
            alignment.SetIsProperPair(true);
            alignment.SetIsSecondaryAlignment(true);
            alignment.SetIsUnmapped(true);

            var read = new Read("chr1", alignment);

            Assert.Equal(read.Chromosome, "chr1");
            Assert.Equal(read.Sequence, alignment.Bases);
            Assert.Equal(read.Position, alignment.Position + 1);
            Assert.Equal(read.MatePosition, alignment.MatePosition + 1);
            Assert.Equal(read.Name, alignment.Name);
            Assert.Equal(read.CigarData, alignment.CigarData);
            Assert.Equal(read.IsMapped, alignment.IsMapped());
            Assert.Equal(read.IsProperPair, alignment.IsProperPair());
            Assert.Equal(read.IsPrimaryAlignment, alignment.IsPrimaryAlignment());
            Assert.Equal(read.IsPcrDuplicate, alignment.IsDuplicate());

            foreach (var direction in read.SequencedBaseDirectionMap)
            {
                Assert.Equal(direction, DirectionType.Forward);
            }

            for (var i = 0; i < read.Qualities.Length; i++)
            {
                Assert.Equal(read.Qualities[i], alignment.Qualities[i]);
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount = 0;
            int nextVariantIndex = 0;
            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read = new BamAlignment();
                int refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result) break;
                    if (!read.HasPosition() || read.RefID > refID) break; // We're past our chromosome of interest.
                    if (read.RefID < refID) continue; // We're not yet on our chromosome of interest.
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment()) continue;
                    if (!read.IsMapped()) continue;
                    if (read.IsDuplicate()) continue;
                    if (read.MapQuality <= MinimumMapQ) continue;

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count) break;

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) continue;

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Exemplo n.º 4
0
        public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA)
        {
            var alignmentCopy = new BamAlignment(alignment);

            if (alignment.IsPrimaryAlignment() && !alignment.IsSupplementaryAlignment())
            {
                if (readNumber == ReadNumber.NA)
                {
                    if (Read1 != null && Read2 != null)
                    {
                        throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}.");
                    }
                    if (Read1 == null)
                    {
                        Read1 = alignmentCopy;
                    }
                    else
                    {
                        Read2 = alignmentCopy;
                    }
                }
                else if (readNumber == ReadNumber.Read1)
                {
                    if (Read1 != null)
                    {
                        throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}.");
                    }
                    Read1 = alignmentCopy;
                }
                else if (readNumber == ReadNumber.Read2)
                {
                    if (Read2 != null)
                    {
                        throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}.");
                    }
                    Read2 = alignmentCopy;
                }
            }
            else if (alignment.IsSupplementaryAlignment())
            {
                switch (readNumber)
                {
                case ReadNumber.Read1:
                    if (Read1SupplementaryAlignments == null)
                    {
                        Read1SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read1SupplementaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.Read2:
                    if (Read2SupplementaryAlignments == null)
                    {
                        Read2SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read2SupplementaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.NA:
                    if (Read1SupplementaryAlignments == null)
                    {
                        Read1SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read1SupplementaryAlignments.Add(alignmentCopy);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null);
                }
            }
            else
            {
                switch (readNumber)
                {
                case ReadNumber.Read1:
                    if (Read1SecondaryAlignments == null)
                    {
                        Read1SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read1SecondaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.Read2:
                    if (Read2SecondaryAlignments == null)
                    {
                        Read2SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read2SecondaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.NA:
                    if (Read1SecondaryAlignments == null)
                    {
                        Read1SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read1SecondaryAlignments.Add(alignmentCopy);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null);
                }
            }

            // Set as improper once we add any alignment that is flagged as improper
            if (!alignment.IsProperPair())
            {
                IsImproper = true;
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount     = 0;
            int nextVariantIndex = 0;

            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read  = new BamAlignment();
                int          refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }
                    if (!read.HasPosition() || read.RefID > refID)
                    {
                        break;                                            // We're past our chromosome of interest.
                    }
                    if (read.RefID < refID)
                    {
                        continue;                     // We're not yet on our chromosome of interest.
                    }
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment())
                    {
                        continue;
                    }
                    if (!read.IsMapped())
                    {
                        continue;
                    }
                    if (read.IsDuplicate())
                    {
                        continue;
                    }
                    if (read.MapQuality <= MinimumMapQ)
                    {
                        continue;
                    }

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count)
                    {
                        break;
                    }

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition)
                    {
                        continue;
                    }

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Exemplo n.º 6
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex,
                                               HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped())
                {
                    return;
                }
                if (!alignment.IsMateMapped())
                {
                    return;
                }
                if (!alignment.IsPrimaryAlignment())
                {
                    return;
                }
                if (!(alignment.IsPaired() && alignment.IsProperPair()))
                {
                    return;
                }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality)
                {
                    return;
                }

                if (alignment.RefID != alignment.MateRefID)
                {
                    return;
                }                                                       // does this ever happen?

                if (IsRightMostInPair(alignment))
                {
                    return;
                }                                             // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0)
                {
                    return;
                }                                              // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position;                                         // 0-based, inclusive
                int fragmentStop  = alignment.Position + alignment.FragmentLength;              // 0-based, exclusive

                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count)
                {
                    return;
                }                                            // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);

                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }
Exemplo n.º 7
0
        public static List <PreIndel> FindIndelsAndRecordEvidence(BamAlignment bamAlignment, IndelTargetFinder targetFinder, Dictionary <string, IndelEvidence> lookup,
                                                                  bool isReputable, string chrom, int minMapQuality, bool stitched = false)
        {
            // TODO define whether we want to collect indels from supplementaries. I think we probably do...
            // TODO do we want to collect indels from duplicates?
            // Was thinking this might be faster than checking all the ops on all the reads, we'll see - it also makes an important assumption that no reads are full I or full D
            if (bamAlignment.MapQuality > minMapQuality && bamAlignment.CigarData.Count > 1 &&
                bamAlignment.IsPrimaryAlignment())
            {
                var indels = targetFinder.FindIndels(bamAlignment, chrom);

                if (indels.Any())
                {
                    // TODO this doesn't support nm from stitched, which is not in a tag. Need to pass it in!!
                    var nm      = bamAlignment.GetIntTag("NM");
                    var totalNm = nm ?? 0;

                    var isMulti = indels.Count() > 1;
                    int readSpanNeededToCoverBoth = 0;
                    if (isMulti)
                    {
                        var firstPosOfVariation = indels[0].ReferencePosition;
                        var lastIndel           = indels[indels.Count - 1];
                        var lastPosOfVariation  = lastIndel.Type == AlleleCategory.Deletion
                                                     ? lastIndel.ReferencePosition + 1
                                                     : lastIndel.ReferencePosition + lastIndel.Length;
                        readSpanNeededToCoverBoth = lastPosOfVariation - firstPosOfVariation;
                    }



                    // TODO do we want to collect info here for individual indels if they are only seen in multis?
                    // Currently trying to solve this by only collecting for individuals if it seems likely that we're going to see reads that don't span both
                    if (!isMulti || (readSpanNeededToCoverBoth > 25)) // TODO magic number
                    {
                        foreach (var indel in indels)
                        {
                            var indelKey = indel.ToString();

                            // TODO less gnarly

                            var indelMetrics = IndelMetrics(lookup, indelKey);

                            UpdateIndelMetrics(bamAlignment, isReputable, stitched, indelMetrics, indel, totalNm);
                        }
                    }

                    if (isMulti)
                    {
                        var indelKey = string.Join("|", indels.Select(x => x.ToString()));
                        // TODO less gnarly

                        var indelMetrics = IndelMetrics(lookup, indelKey);

                        // TODO - are read-level repeats that informative? Because this is kind of a perf burden
                        // (^ Removed for now for that reason)
                        bool isRepeat = false;
                        //var isRepeat = StitchingLogic.OverlapEvaluator.IsRepeat(bamAlignment.Bases.Substring(0, (int)indels[0].LeftAnchor), 2, out repeatUnit) || StitchingLogic.OverlapEvaluator.IsRepeat(bamAlignment.Bases.Substring(0, (int)indels[1].RightAnchor), 2, out repeatUnit);

                        AddReadLevelIndelMetrics(bamAlignment, isReputable, stitched, indelMetrics, isRepeat);
                        AddMultiIndelMetrics(indelMetrics, indels, totalNm);
                    }
                }

                return(indels);
            }
            return(null);
        }
Exemplo n.º 8
0
        public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA)
        {
            var alignmentCopy = new BamAlignment(alignment);

            if (alignmentCopy.IsPrimaryAlignment() && !alignmentCopy.IsSupplementaryAlignment())
            {
                if (FragmentSize == 0)
                {
                    FragmentSize = Math.Abs(alignmentCopy.FragmentLength);

                    // Can be either F1R2 or F2R1
                    NormalPairOrientation = (!alignmentCopy.IsReverseStrand() && alignmentCopy.IsMateReverseStrand()) ||
                                            (alignmentCopy.IsReverseStrand() && !alignmentCopy.IsMateReverseStrand());

                    if (NormalPairOrientation)
                    {
                        if (alignmentCopy.RefID == alignmentCopy.MateRefID)
                        {
                            if (!alignmentCopy.IsReverseStrand())
                            {
                                if (alignmentCopy.Position > alignmentCopy.MatePosition)
                                {
                                    // RF
                                    NormalPairOrientation = false;
                                }
                            }
                            else
                            {
                                if (alignmentCopy.MatePosition > alignmentCopy.Position)
                                {
                                    // RF
                                    NormalPairOrientation = false;
                                }
                            }
                        }
                    }
                }

                NumPrimaryReads++;
                bool useForPos = true;
                if (useForPos)
                {
                    if (alignmentCopy.Position > MaxPosition)
                    {
                        MaxPosition = alignment.Position;
                    }

                    if (alignmentCopy.Position < MinPosition)
                    {
                        MinPosition = alignment.Position;
                    }
                }

                if (readNumber == ReadNumber.NA)
                {
                    if (Read1 != null && Read2 != null)
                    {
                        throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}.");
                    }
                    if (Read1 == null)
                    {
                        Read1 = alignmentCopy;
                    }
                    else
                    {
                        Read2 = alignmentCopy;
                    }
                }
                else if (readNumber == ReadNumber.Read1)
                {
                    if (Read1 != null)
                    {
                        throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}.");
                    }
                    Read1 = alignmentCopy;
                }
                else if (readNumber == ReadNumber.Read2)
                {
                    if (Read2 != null)
                    {
                        throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}.");
                    }
                    Read2 = alignmentCopy;
                }
            }
            else if (alignmentCopy.IsSupplementaryAlignment())
            {
                switch (readNumber)
                {
                case ReadNumber.Read1:
                    if (Read1SupplementaryAlignments == null)
                    {
                        Read1SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read1SupplementaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.Read2:
                    if (Read2SupplementaryAlignments == null)
                    {
                        Read2SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read2SupplementaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.NA:
                    if (Read1SupplementaryAlignments == null)
                    {
                        Read1SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read1SupplementaryAlignments.Add(alignmentCopy);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null);
                }
            }
            else
            {
                switch (readNumber)
                {
                case ReadNumber.Read1:
                    if (Read1SecondaryAlignments == null)
                    {
                        Read1SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read1SecondaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.Read2:
                    if (Read2SecondaryAlignments == null)
                    {
                        Read2SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read2SecondaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.NA:
                    if (Read1SecondaryAlignments == null)
                    {
                        Read1SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read1SecondaryAlignments.Add(alignmentCopy);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null);
                }
            }

            // Set as improper once we add any alignment that is flagged as improper
            if (!alignmentCopy.IsProperPair())
            {
                IsImproper = true;
            }
        }
Exemplo n.º 9
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex,
                HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped()) { return; }
                if (!alignment.IsMateMapped()) { return; }
                if (!alignment.IsPrimaryAlignment()) { return; }
                if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality) { return; }

                if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen?

                if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position; // 0-based, inclusive
                int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive
                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);
                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }