예제 #1
0
        public static int?AnyIndelCoveredInMate(IEnumerable <IndelSite> readIndelPositions,
                                                BamAlignment readWithoutIndels, BamAlignment readWithIndels, int anchorSize = 0)
        {
            if (readIndelPositions == null || !readIndelPositions.Any())
            {
                return(null);
            }

            if (readWithIndels.IsReverseStrand())
            {
                readIndelPositions = readIndelPositions.Reverse();
            }

            foreach (var indelPosition in readIndelPositions)
            {
                var coveredInR1 =
                    readWithoutIndels.ContainsPosition(indelPosition.PreviousMappedPosition - anchorSize, readWithIndels.RefID) &&
                    readWithoutIndels.ContainsPosition(indelPosition.NextMappedPosition + anchorSize, readWithIndels.RefID);
                if (coveredInR1)
                {
                    return(indelPosition.PreviousMappedPosition);
                }
            }

            return(null);
        }
예제 #2
0
        private static void AddReadLevelIndelMetrics(BamAlignment bamAlignment, bool isReputable, bool stitched, IndelEvidence indelMetrics,
                                                     bool isRepeat)
        {
            indelMetrics.Observations++;
            if (stitched)
            {
                indelMetrics.Stitched++;
            }
            else
            {
                if (bamAlignment.IsReverseStrand())
                {
                    indelMetrics.Reverse++;
                }
                else
                {
                    indelMetrics.Forward++;
                }
            }

            if (isReputable)
            {
                indelMetrics.ReputableSupport++;
            }

            if (isRepeat)
            {
                indelMetrics.IsRepeat++;
            }

            if (!bamAlignment.IsMateMapped() || bamAlignment.MateRefID != bamAlignment.RefID)
            {
                indelMetrics.IsSplit++;
            }
        }
예제 #3
0
            public override void MoveToNextRecord()
            {
                _isEnd = !_bamReader.GetNextAlignment(ref _currentBamAlignment, false);

                if (_isEnd)
                {
                    return;
                }

                // No memory allocation
                _currentSerializedAlignment.RefID           = _currentBamAlignment.RefID;
                _currentSerializedAlignment.Position        = _currentBamAlignment.Position;
                _currentSerializedAlignment.AlignmentFlag   = _currentBamAlignment.AlignmentFlag;
                _currentSerializedAlignment.FragmentLength  = _currentBamAlignment.FragmentLength;
                _currentSerializedAlignment.MapQuality      = _currentBamAlignment.MapQuality;
                _currentSerializedAlignment.MatePosition    = _currentBamAlignment.MatePosition;
                _currentSerializedAlignment.MateRefID       = _currentBamAlignment.MateRefID;
                _currentSerializedAlignment.IsReverseStrand = _currentBamAlignment.IsReverseStrand();
                _currentSerializedAlignment.Name            = _currentBamAlignment.Name;
            }
예제 #4
0
        private static List <BamAlignment> IndelsDisagreeWithStrongMate(List <IndelSite> r1IndelPositions,
                                                                        List <IndelSite> r2IndelPositions, BamAlignment read1,
                                                                        BamAlignment read2, out bool disagree, int mismatchesAllowed = 1, int r1IndelAdjustment = 0,
                                                                        int r2IndelAdjustment = 0, bool softclipWeakOne = true, int?r1Nm = null, int?r2Nm = null)
        {
            var checkBoth = true;
            // TODO maybe also check if one of the reads has ins AND del
            // TODO if we've grabbed this info here, propagate it out so we don't do it twice
            // TODO indel adjustment should only actually remove insertions, no??
            var read1Nm         = r1Nm ?? read1.GetIntTag("NM");
            var read2Nm         = r2Nm ?? read2.GetIntTag("NM");
            var read1AdjustedNm = read1Nm - r1IndelAdjustment;
            var read2AdjustedNm = read2Nm - r2IndelAdjustment;

            disagree = false;

            var r1IndelPositionsUnique = r1IndelPositions != null && r2IndelPositions != null?GetUniqueIndelSites(r1IndelPositions, r2IndelPositions) : r1IndelPositions;

            var r2IndelPositionsUnique = r1IndelPositions != null && r2IndelPositions != null?GetUniqueIndelSites(r2IndelPositions, r1IndelPositions) : r2IndelPositions;

            // No sense doing further checks if there's nothing to disagree over...
            if (r1IndelPositionsUnique.Any() || r2IndelPositionsUnique.Any())
            {
                var r1AdjustedClean = read1AdjustedNm <= mismatchesAllowed;
                var r2AdjustedClean = read2AdjustedNm <= mismatchesAllowed;
                var r1Clean         = read1Nm <= mismatchesAllowed;
                var r2Clean         = read2Nm <= mismatchesAllowed;
                var r1NumIndels     = r1IndelPositions?.Count;
                var r2NumIndels     = r2IndelPositions?.Count;
                var r1IsGood        = r1AdjustedClean && (r1Clean || r1NumIndels <= 1);
                var r2IsGood        = r2AdjustedClean && (r2Clean || r2NumIndels <= 1);

                if ((read1Nm != null && read2Nm != null) && (r1IsGood || r2IsGood))
                {
                    if (r1IsGood)
                    {
                        var disagreeingPos = AnyIndelCoveredInMate(r2IndelPositionsUnique, read1, read2);

                        if (disagreeingPos != null)
                        {
                            disagree = true;
                            if (softclipWeakOne && !r2IsGood)
                            {
                                SoftclipAfterIndel(read2, read2.IsReverseStrand(), disagreeingPos.Value);
                            }
                        }
                        else
                        {
                            if (checkBoth)
                            {
                                disagreeingPos = AnyIndelCoveredInMate(r1IndelPositionsUnique, read2, read1);
                                if (disagreeingPos != null)
                                {
                                    disagree = true;
                                }
                            }
                        }
                    }
                    else
                    {
                        var disagreeingPos = AnyIndelCoveredInMate(r1IndelPositionsUnique, read2, read1);
                        if (disagreeingPos != null)
                        {
                            disagree = true;
                            if (softclipWeakOne && !r1IsGood)
                            {
                                SoftclipAfterIndel(read1, read1.IsReverseStrand(), disagreeingPos.Value);
                            }
                        }
                        else
                        {
                            if (checkBoth)
                            {
                                disagreeingPos = AnyIndelCoveredInMate(r2IndelPositionsUnique, read1, read2);
                                if (disagreeingPos != null)
                                {
                                    disagree = true;
                                }
                            }
                        }
                    }
                }
            }

            // If both are good, and they disagree, should still say they disagree?

            return(new List <BamAlignment>()
            {
                read1, read2
            });
        }
예제 #5
0
            public bool AddSerializedAlignment(ref BamAlignment al)
            {
                byte[] byteArray   = null;
                int    smallOffset = 0;

                if (!_serializedAlignments.GetByteArrayAndOffset(
                        _offset,
                        ref byteArray,
                        ref smallOffset))
                {
                    // Out of space
                    return(false);
                }

                int smallOffsetInitial = smallOffset;

                if (!SerializeAlignment(ref al, ref byteArray, ref smallOffset))
                {
                    // It didn't fit in the subarray. Try the next one.
                    // This math moves to the next array. For example, say the
                    // max size for 1 array is 1000, and we were at 1987.
                    // 1000 - (1987 - 1000 * (1987/1000)) = 13.
                    // 1987 + 13 = 2000
                    // 2000 is the first element of array number 2. (indexes start at 0).
                    _offset += _serializedAlignments.GetMaxArraySize() - (_offset - _serializedAlignments.GetMaxArraySize() * (_offset / _serializedAlignments.GetMaxArraySize()));
                    if (!_serializedAlignments.GetByteArrayAndOffset(
                            _offset,
                            ref byteArray,
                            ref smallOffset))
                    {
                        // Out of space
                        return(false);
                    }

                    smallOffsetInitial = smallOffset;

                    if (!SerializeAlignment(ref al, ref byteArray, ref smallOffset))
                    {
                        // We just checked that we have space. This should never fail.
                        throw new InvalidOperationException("Error: Check available memory. Serialization of alignment failed.");
                    }
                }

                int alignmentSize = smallOffset - smallOffsetInitial;

                if (_bamAlignmentList.Count == 1000)
                {
                    // Assume the first 1000 records are representative of the typical size
                    // Add 20% to ensure a memory reallocation is unlikely.
                    _bamAlignmentList.Capacity = (int)(1.2 * _serializedAlignments.SizeInBytes / (_offset / 1000));
                }
                _bamAlignmentList.Add(new SerializedBamAlignment(
                                          _offset,
                                          alignmentSize,
                                          al.RefID,
                                          al.Position,
                                          al.AlignmentFlag,
                                          al.FragmentLength,
                                          al.MapQuality,
                                          al.MatePosition,
                                          al.MateRefID,
                                          al.IsReverseStrand()));

                _offset += (UInt64)alignmentSize;

                return(true);
            }
예제 #6
0
        public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA)
        {
            var alignmentCopy = new BamAlignment(alignment);

            if (alignmentCopy.IsPrimaryAlignment() && !alignmentCopy.IsSupplementaryAlignment())
            {
                if (FragmentSize == 0)
                {
                    FragmentSize = Math.Abs(alignmentCopy.FragmentLength);

                    // Can be either F1R2 or F2R1
                    NormalPairOrientation = (!alignmentCopy.IsReverseStrand() && alignmentCopy.IsMateReverseStrand()) ||
                                            (alignmentCopy.IsReverseStrand() && !alignmentCopy.IsMateReverseStrand());

                    if (NormalPairOrientation)
                    {
                        if (alignmentCopy.RefID == alignmentCopy.MateRefID)
                        {
                            if (!alignmentCopy.IsReverseStrand())
                            {
                                if (alignmentCopy.Position > alignmentCopy.MatePosition)
                                {
                                    // RF
                                    NormalPairOrientation = false;
                                }
                            }
                            else
                            {
                                if (alignmentCopy.MatePosition > alignmentCopy.Position)
                                {
                                    // RF
                                    NormalPairOrientation = false;
                                }
                            }
                        }
                    }
                }

                NumPrimaryReads++;
                bool useForPos = true;
                if (useForPos)
                {
                    if (alignmentCopy.Position > MaxPosition)
                    {
                        MaxPosition = alignment.Position;
                    }

                    if (alignmentCopy.Position < MinPosition)
                    {
                        MinPosition = alignment.Position;
                    }
                }

                if (readNumber == ReadNumber.NA)
                {
                    if (Read1 != null && Read2 != null)
                    {
                        throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}.");
                    }
                    if (Read1 == null)
                    {
                        Read1 = alignmentCopy;
                    }
                    else
                    {
                        Read2 = alignmentCopy;
                    }
                }
                else if (readNumber == ReadNumber.Read1)
                {
                    if (Read1 != null)
                    {
                        throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}.");
                    }
                    Read1 = alignmentCopy;
                }
                else if (readNumber == ReadNumber.Read2)
                {
                    if (Read2 != null)
                    {
                        throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}.");
                    }
                    Read2 = alignmentCopy;
                }
            }
            else if (alignmentCopy.IsSupplementaryAlignment())
            {
                switch (readNumber)
                {
                case ReadNumber.Read1:
                    if (Read1SupplementaryAlignments == null)
                    {
                        Read1SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read1SupplementaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.Read2:
                    if (Read2SupplementaryAlignments == null)
                    {
                        Read2SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read2SupplementaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.NA:
                    if (Read1SupplementaryAlignments == null)
                    {
                        Read1SupplementaryAlignments = new List <BamAlignment>();
                    }
                    Read1SupplementaryAlignments.Add(alignmentCopy);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null);
                }
            }
            else
            {
                switch (readNumber)
                {
                case ReadNumber.Read1:
                    if (Read1SecondaryAlignments == null)
                    {
                        Read1SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read1SecondaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.Read2:
                    if (Read2SecondaryAlignments == null)
                    {
                        Read2SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read2SecondaryAlignments.Add(alignmentCopy);
                    break;

                case ReadNumber.NA:
                    if (Read1SecondaryAlignments == null)
                    {
                        Read1SecondaryAlignments = new List <BamAlignment>();
                    }
                    Read1SecondaryAlignments.Add(alignmentCopy);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null);
                }
            }

            // Set as improper once we add any alignment that is flagged as improper
            if (!alignmentCopy.IsProperPair())
            {
                IsImproper = true;
            }
        }
예제 #7
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";

            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                              string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                                            bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int          readCount     = 0;
                int          keptReadCount = 0;
                string       header        = reader.GetHeader();
                BamAlignment alignment     = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped())
                    {
                        continue;
                    }
                    if (alignment.IsFailedQC())
                    {
                        continue;
                    }
                    if (alignment.IsDuplicate())
                    {
                        continue;
                    }
                    if (alignment.IsReverseStrand())
                    {
                        continue;
                    }
                    if (!alignment.IsMainAlignment())
                    {
                        continue;
                    }

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35)
                    {
                        continue;
                    }

                    if (isPairedEnd && !alignment.IsProperPair())
                    {
                        continue;
                    }

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                    {
                        break;
                    }

                    if (refID == -1)
                    {
                        continue;
                    }

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    {
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                    }
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }
예제 #8
0
파일: CanvasBin.cs 프로젝트: abladon/canvas
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";
            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                        string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                        bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this 
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int readCount = 0;
                int keptReadCount = 0;
                string header = reader.GetHeader();
                BamAlignment alignment = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped()) continue;
                    if (alignment.IsFailedQC()) continue;
                    if (alignment.IsDuplicate()) continue;
                    if (alignment.IsReverseStrand()) continue;
                    if (!alignment.IsMainAlignment()) continue;

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) continue;

                    if (isPairedEnd && !alignment.IsProperPair()) continue;

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                        break;

                    if (refID == -1)
                        continue;

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }