Example #1
0
        /// <summary>
        /// Create an index from a sorted BAM file
        /// </summary>
        /// <param name="bamFilePath">Path to BAM file</param>
        public BamIndex(string bamFilePath)
        {
            _numUnalignedWithoutCoordinates = 0;

            // allocate space for the reference index
            using (var reader = new BamReader(bamFilePath))
            {
                List<GenomeMetadata.SequenceMetadata> references = reader.GetReferences();
                Initialize(references.Count, reader.Tell());

                BamAlignment alignment = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    if (!UpdateReferenceIndex(ref alignment, reader.Tell())) break;
                }

                // perform some post-processing on the index
                PostProcessing(reader.Tell());

                if (_hasUnalignedReads)
                {
                    while (reader.GetNextAlignment(ref alignment, true)) ++_numUnalignedWithoutCoordinates;
                }
            }
        }
Example #2
0
 private void SilenceReads(BamAlignment read1, BamAlignment read2, int readsToSilence, bool realignedR1, bool realignedR2)
 {
     if (!realignedR1 && (readsToSilence == 1 || readsToSilence == 3))
     {
         for (var i = 0; i < read1.Qualities.Length; i++)
         {
             read1.Qualities[i] = 0;
         }
     }
     if (!realignedR2 && (readsToSilence == 2 || readsToSilence == 3))
     {
         for (var i = 0; i < read2.Qualities.Length; i++)
         {
             read2.Qualities[i] = 0;
         }
     }
 }
Example #3
0
        public void AddCombinedStatusStringTags()
        {
            var counter = new ReadStatusCounter();
            var handler = new DebugSummaryStatusHandler(counter);
            var pair    = TestHelpers.GetPair("10M", "10M");

            pair.Read1.ReplaceOrAddStringTag("HI", "read1_hi");
            pair.Read2.ReplaceOrAddStringTag("HI", "read2_hi");

            var outAlignment = new BamAlignment(pair.Read1);

            outAlignment.ReplaceOrAddStringTag("HI", "nothing");

            // Should  not update
            handler.AddCombinedStatusStringTags("HI", pair.Read1, pair.Read2, outAlignment);
            Assert.Equal("nothing", outAlignment.GetStringTag("HI"));
        }
Example #4
0
        public void IsReadCollapsed()
        {
            var alignment = new BamAlignment
            {
                Bases        = "ACTC",
                Position     = 5,
                MapQuality   = 343,
                MatePosition = 12312,
                Qualities    = new[] { (byte)20, (byte)21, (byte)30, (byte)40 },
                CigarData    = new CigarAlignment("1S3M")
            };
            var read = new Read("chr1", alignment);

            Assert.False(read.IsCollapsedRead());
            alignment.TagData = ReadTestHelper.GetReadCountsTagData(1, 10);  // set XV and XW tags
            Assert.True(read.IsCollapsedRead());
        }
Example #5
0
        /// <summary>
        /// Serialize alignment to a byte array, for later flushing to output file.
        /// </summary>
        public byte[] SerializeAlignment(ref BamAlignment al)
        {
            // initialize
            uint nameLen            = (uint)al.Name.Length + 1;
            uint numBases           = (uint)al.Bases.Length;
            uint numCigarOperations = (uint)al.CigarData.Count;
            uint packedCigarLen     = numCigarOperations * 4;
            uint numEncodedBases    = (uint)((numBases / 2.0) + 0.5);
            uint tagDataLen         = (uint)al.TagData.Length;
            uint dataBlockSize      = nameLen + packedCigarLen + numEncodedBases + numBases + tagDataLen;
            uint alignBlockSize     = BamConstants.CoreAlignmentDataLen + dataBlockSize;
            uint blockSize          = alignBlockSize + 4;

            byte[] buffer = new byte[blockSize];
            int    offset = 0;

            // store the block size
            BinaryIO.AddUIntBytes(ref buffer, ref offset, alignBlockSize);

            // store the BAM core data
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.RefID);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.Position);
            BinaryIO.AddUIntBytes(ref buffer, ref offset, (al.Bin << 16) | (al.MapQuality << 8) | nameLen);
            BinaryIO.AddUIntBytes(ref buffer, ref offset, (al.AlignmentFlag << 16) | numCigarOperations);
            BinaryIO.AddUIntBytes(ref buffer, ref offset, numBases);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.MateRefID);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.MatePosition);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.FragmentLength);

            // store the alignment name
            BinaryIO.AddNullTerminatedString(ref buffer, ref offset, al.Name);

            // store the packed CIGAR string and packed bases
            PackCigar(ref offset, ref buffer, al.CigarData);
            PackBases(ref offset, ref buffer, numEncodedBases, al.Bases);

            // store the base qualities
            Buffer.BlockCopy(al.Qualities, 0, buffer, offset, al.Qualities.Length);
            offset += al.Qualities.Length;

            // store the tag data
            Buffer.BlockCopy(al.TagData, 0, buffer, offset, al.TagData.Length);
            offset += al.TagData.Length;

            return(buffer);
        }
        public static BamAlignment CreateAlignment(string name, bool isProperPair = true, int position = 0, string cigarData = "3M", bool isUnMapped = false, bool mateIsUnMapped = false, uint mapQ = 30)
        {
            var alignment = new BamAlignment
            {
                Name      = name,
                Qualities = new byte[0],
                CigarData = new CigarAlignment(cigarData),
                Position  = position
            };

            alignment.SetIsProperPair(isProperPair);
            alignment.SetIsUnmapped(isUnMapped);
            alignment.SetIsMateUnmapped(mateIsUnMapped);
            alignment.MapQuality = mapQ;

            return(alignment);
        }
Example #7
0
        public bool GetNextAlignment(Read read)
        {
            if (_bamReader == null)
            {
                throw new Exception("Already disposed.");
            }

            while (true)
            {
                Region currentInterval = null;

                if (_rawAlignment != null)
                {
                    var currentChrIntervals = GetIntervalsForChr(_rawAlignment.RefID);
                    if (currentChrIntervals != null) // null signals not to apply interval jumping
                    {
                        if (!JumpIfNeeded(currentChrIntervals, out currentInterval))
                        {
                            Dispose();
                            return(false);
                        }
                    }
                }
                else
                {
                    _rawAlignment = new BamAlignment(); // first time pass
                }

                if (!_bamReader.GetNextAlignment(ref _rawAlignment, false) ||
                    ((_bamIndexFilter > -1) && (_rawAlignment.RefID != _bamIndexFilter)))
                {
                    Dispose();
                    return(false);
                }
                if (currentInterval == null || _rawAlignment.Position < currentInterval.EndPosition)
                {
                    var reference = _references.FirstOrDefault(r => r.Index == _rawAlignment.RefID);

                    read.Reset(reference?.Name, _rawAlignment);

                    return(true);
                }
                // read off the end of the interval - keep looping to jump to the next one or scan to the end
            }
        }
Example #8
0
        public void FindVariantMNVResults()
        {
            var read = new BamAlignment();

            read.Bases = "AA" + "ACGTACGT" + "GGGG";
            //vcf coords  12-345678910-11,12,13,14
            read.CigarData = new CigarAlignment("2S8M4S");
            read.Position  = 3 - 1;
            read.Qualities = new byte[read.Bases.Length];

            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 4;
            vs1.VcfReferenceAllele   = "TA";
            vs1.VcfAlternateAllele   = "CG"; //read should match ALT for this test

            var vs2 = new VariantSite();

            vs2.VcfReferencePosition = 10;
            vs2.VcfReferenceAllele   = "TTT";
            vs2.VcfAlternateAllele   = "T";

            var vsFromVcf = new List <VariantSite>()
            {
                vs1, vs2
            };

            //given a variant site, is it in the read?

            ExecuteTest(read, 0, vsFromVcf, (foundVariants) =>
            {
                Assert.Equal(foundVariants[SubsequenceType.MatchOrMismatchSequence].Count, 1);
                Assert.Equal(foundVariants[SubsequenceType.InsertionSquence].Count, 0);
                Assert.Equal(foundVariants[SubsequenceType.DeletionSequence].Count, 0);
            }, (matchedVariants) =>
            {
                Assert.Equal(matchedVariants[0].VcfReferencePosition, 4);
                Assert.Equal(matchedVariants[0].VcfReferenceAllele, "TA");
                Assert.Equal(matchedVariants[0].VcfAlternateAllele, "CG");

                Assert.Equal(matchedVariants[1].VcfReferencePosition, 10); //a deletion not supported by the reads
                Assert.Equal(matchedVariants[1].VcfReferenceAllele, "T");  //to we just return T>T, a reference call at this loci.
                Assert.Equal(matchedVariants[1].VcfAlternateAllele, "T");
            });
        }
Example #9
0
            /// <summary>
            /// Checks if any of the conditions is true:
            /// 1. The read is a duplicate,
            /// 2. The read failed QC,
            /// 3. The read is of low mapping quality.
            /// </summary>
            /// <param name="alignment"></param>
            /// <returns></returns>
            public static bool IsDuplicateFailedQCLowQuality(BamAlignment alignment, uint qualityThreshold)
            {
                if (alignment.IsDuplicate())
                {
                    return(true);
                }
                if (alignment.IsFailedQC())
                {
                    return(true);
                }
                if (alignment.MapQuality == FragmentBinnerConstants.MappingQualityNotAvailable ||
                    alignment.MapQuality < qualityThreshold)
                {
                    return(true);
                }

                return(false);
            }
        private void WriteAlignment(BamAlignment al, int bufferNumber)
        {
            var buffer = _alignmentBuffer[bufferNumber];

            buffer.Add(al);
            if (buffer.Count >= MAX_BUFFER_SIZE)
            {
                lock (_alignmentBuffer)
                {
                    foreach (var alignment in buffer)
                    {
                        WriteAlignment(alignment);
                    }
                }

                buffer.Clear();
            }
        }
Example #11
0
        public void GetEndPosition_Tests()
        {
            var alignment1 = new BamAlignment()
            {
                Position  = 500,
                CigarData = new CigarAlignment("5M7I19M3D")
            };

            Assert.Equal(527, alignment1.GetEndPosition());

            var alignment2 = new BamAlignment()
            {
                Position  = 500,
                CigarData = new CigarAlignment("3I")
            };

            Assert.Equal(500, alignment2.GetEndPosition());
        }
Example #12
0
        private Read CreateRead(string chr, string sequence, int position, string name, bool isMapped = true,
                                bool isPrimaryAlignment = true, bool isProperPair = true, bool isDuplicate = false, int mapQuality = 10, bool addCigarData = true)
        {
            var alignment = new BamAlignment()
            {
                Bases = sequence, Position = position, Name = name, MapQuality = (uint)mapQuality
            };

            alignment.SetIsUnmapped(!isMapped);
            alignment.SetIsSecondaryAlignment(!isPrimaryAlignment);
            alignment.SetIsDuplicate(isDuplicate);
            alignment.SetIsProperPair(isProperPair);
            if (addCigarData)
            {
                alignment.CigarData = new CigarAlignment(sequence.Length + "M");
            }
            return(new Read(chr, alignment));
        }
Example #13
0
        private static void RunProcessorTest(string inBam, string outBam, string expBam, string outFolder, bool threadbyChr, StitcherOptions stitcherOptions)
        {
            if (File.Exists(outBam))
            {
                File.Delete(outBam);
            }


            Logger.OpenLog(TestPaths.LocalScratchDirectory, "StitcherTestLog.txt", true);
            var processor = threadbyChr ? (IStitcherProcessor) new GenomeProcessor(inBam) : new BamProcessor();

            processor.Process(inBam, outFolder, stitcherOptions);
            Logger.CloseLog();


            Assert.True(File.Exists(outBam));

            var observedAlignment = new BamAlignment();
            var expectedAlignment = new BamAlignment();

            using (var outReader = new BamReader(outBam))
                using (var expReader = new BamReader(expBam))
                {
                    while (true)
                    {
                        var nextObservation = outReader.GetNextAlignment(ref observedAlignment, true);

                        var nextExpected = expReader.GetNextAlignment(ref expectedAlignment, true);

                        if ((nextExpected == false) || (expectedAlignment == null))
                        {
                            break;
                        }


                        Assert.Equal(expectedAlignment.Bases, observedAlignment.Bases);
                        Assert.Equal(expectedAlignment.Position, observedAlignment.Position);
                        Assert.Equal(expectedAlignment.Qualities, observedAlignment.Qualities);
                    }

                    outReader.Close();
                    expReader.Close();
                }
        }
        private static BamAlignment BuildRead(AbstractAlignment alignment,
                                              byte qualityForAll, Tuple <int, int> MNVdata)
        {
            int MNVPosition = MNVdata.Item1;
            int MNVLength   = MNVdata.Item2;

            try
            {
                var ca         = new CigarAlignment(alignment.Cigar);
                int readLength = (int)ca.GetReadSpan();


                string readSequence = new string('A', readLength); //originalAlignment.Sequence;

                if (MNVLength > 0)
                {
                    readSequence  = new string('A', MNVPosition - 1);
                    readSequence += new string('G', MNVLength);
                    readSequence += new string('A', readLength - readSequence.Length);
                }


                var varTagUtils = new TagUtils();
                varTagUtils.AddStringTag("XD", alignment.Directions);

                var varRead = new BamAlignment()
                {
                    RefID      = 1,
                    Position   = alignment.Position - 1,
                    CigarData  = ca,
                    Bases      = readSequence,
                    TagData    = varTagUtils.ToBytes(),
                    Qualities  = Enumerable.Repeat(qualityForAll, readLength).ToArray(),
                    MapQuality = 50
                };
                return(varRead);
            }
            catch
            {
                return(null);
            }
        }
Example #15
0
        public void GetTag_Tests()
        {
            // create a tag
            TagUtils tagUtils = new TagUtils();

            tagUtils.AddIntTag("NM", 5);
            tagUtils.AddStringTag("XU", "ABCD");
            tagUtils.AddCharTag("XP", '?');
            byte[] tagData   = tagUtils.ToBytes();
            var    alignment = new BamAlignment()
            {
                TagData = tagData
            };

            // string tag scenarios
            Assert.Equal("ABCD", alignment.GetStringTag("XU"));
            Assert.Equal("?", alignment.GetStringTag("XP"));
            Assert.Throws <ApplicationException>(() => alignment.GetStringTag("NM"));
            Assert.Equal(null, alignment.GetStringTag("AB"));
        }
Example #16
0
        private bool ReadsDoNotOverlap(BamAlignment read1, BamAlignment read2)
        {
            var overlaps = read1.OverlapsAlignment(read2);

            if (overlaps)
            {
                return(false);
            }

            if (_treatHalfAnchoredAsUnanchored)
            {
                return(true);
            }

            // Check for S/M overlap, if half-anchoring is allowed
            var read1ContainsUnanchoredRead2 = AnchoredRegionContainsUnanchoredEnds(read1, read2);
            var read2ContainsUnanchoredRead1 = AnchoredRegionContainsUnanchoredEnds(read2, read1);

            return(!(read1ContainsUnanchoredRead2 || read2ContainsUnanchoredRead1));
        }
Example #17
0
        public int GetNm(BamAlignment alignment)
        {
            var positionMap = new PositionMap(alignment.Bases.Length);

            Read.UpdatePositionMap(alignment.Position + 1, alignment.CigarData, positionMap);

            var snippet = _genomeSnippetSource.GetGenomeSnippet(alignment.Position);

            var numMismatches =
                Helper.GetNumMismatches(alignment.Bases, positionMap, snippet.Sequence, snippet.StartPosition);

            if (numMismatches == null)
            {
                throw new Exception("Num mismatches is null");
            }

            var numIndelBases = alignment.CigarData.NumIndelBases();

            return(numMismatches.Value + numIndelBases);
        }
Example #18
0
        public void ProcessInsertionReadTest()
        {
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-1478/2_fwd	121432113	72M3S	CGGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTACCTA
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-662/2_fwd	121432113	72M3S	CGGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTACCTA
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-1308/2_rev	121432114	71M3I1M	GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTAC-CTA-C
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-64/2_rev	121432114	    71M3I1M	GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTAC-TTA-C
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-1322/2_rev	121432114	75M	GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTGC-CCTC

            var read = new BamAlignment();

            read.Bases = "GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTACCTAC";
            //vcf coords  12-345678910-11,12,13,14
            read.CigarData = new CigarAlignment("71M3I1M");
            read.Position  = 121432114;
            read.Qualities = new byte[read.Bases.Length];

            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 121432185;
            vs1.VcfReferenceAllele   = "C";
            vs1.VcfAlternateAllele   = "CCTA"; //read should match ALT for this test


            var vsFromVcf = new List <VariantSite>()
            {
                vs1
            };

            //given a variant site, is it in the read?
            ExecuteTest(read, 0, vsFromVcf, (foundVariants) =>
            {
                Assert.Equal(foundVariants[SubsequenceType.MatchOrMismatchSequence].Count, 2);
                Assert.Equal(foundVariants[SubsequenceType.InsertionSquence].Count, 1);
                Assert.Equal(foundVariants[SubsequenceType.DeletionSequence].Count, 0);
            }, (matchedVariants) =>
            {
                Assert.Equal(matchedVariants[0].VcfReferencePosition, 121432185);
                Assert.Equal(matchedVariants[0].VcfReferenceAllele, "C");
                Assert.Equal(matchedVariants[0].VcfAlternateAllele, "CCTA");
            });
        }
Example #19
0
        public void ProcessOneDeletionReadTest()
        {
            //reads with deletions, S102
            //       16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1014/2_rev_121416520	121416520	75M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGCCAGCTG
            //16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1484/2_fwd_121416520	121416520	68M18D7M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG
            //16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1320/2_rev_121416520	121416520	68M18D7M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG
            //16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1076/2_rev_121416520	121416520	68M18D7M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG
            //416187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-850/2_rev_121416520	121416520	75M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGCCAGCTG

            var read = new BamAlignment();

            read.Bases     = "AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG";
            read.CigarData = new CigarAlignment("68M18D7M");
            read.Position  = 121416520;
            read.Qualities = new byte[read.Bases.Length];

            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 121416588;
            vs1.VcfReferenceAllele   = "GCCAGCTGCAGACGGAGCT";
            vs1.VcfAlternateAllele   = "G"; //read should match ALT for this test


            var vsFromVcf = new List <VariantSite>()
            {
                vs1
            };

            ExecuteTest(read, 0, vsFromVcf, (foundVariants) =>
            {
                Assert.Equal(foundVariants[SubsequenceType.MatchOrMismatchSequence].Count, 2);
                Assert.Equal(foundVariants[SubsequenceType.InsertionSquence].Count, 0);
                Assert.Equal(foundVariants[SubsequenceType.DeletionSequence].Count, 1);
            },
                        (matchedVariants) =>
            {
                Assert.Equal(matchedVariants[0].VcfReferencePosition, 121416588);
                Assert.Equal(matchedVariants[0].VcfReferenceAllele, "GCCAGCTGCAGACGGAGCT");
                Assert.Equal(matchedVariants[0].VcfAlternateAllele, "G");
            });
        }
Example #20
0
        private void HandleFailedRealignment(BamAlignment origBamAlignment, ref bool forcedSoftclip, List <PreIndel> existingIndels,
                                             RealignmentResult realignResult, bool hasExistingUnsanctionedIndels,
                                             List <PreIndel> existingMatches)
        {
            _statusCounter.AddStatusCount("INDEL STATUS\tRejected\t" + realignResult.Indels);
            _statusCounter.AppendStatusStringTag("RX", "Did not accept: " + realignResult.Indels, origBamAlignment);

            // TODO could this be happening because of a low-ranked indel? Maybe we should be allowing to realign against all indels...
            // TODO STILL should this actually be happening also to reads that had no indels to realign around (i.e. started with weak indel, and couldn't go anywhere), not just the ones that were changed?
            if (_softclipUnknownIndels && hasExistingUnsanctionedIndels)
            {
                var unsanctioned = existingIndels.Where(x => !existingMatches.Contains(x));

                foreach (var preIndel in unsanctioned.OrderBy(x => x.ReferencePosition))
                {
                    var reverseClip = false;
                    var clipLength  = preIndel.RightAnchor;
                    if (preIndel.LeftAnchor < preIndel.RightAnchor)
                    {
                        reverseClip = true;
                        clipLength  = preIndel.LeftAnchor;
                    }

                    // TODO arbitrary number here...
                    // If it's pretty well-anchored, don't remove the indel
                    if (clipLength > 20)
                    {
                        continue;
                    }

                    forcedSoftclip = true;
                    _statusCounter.AddStatusCount("Softclipped out bad indel");
                    _statusCounter.AppendStatusStringTag("RX",
                                                         $"Softclipped out bad indel({origBamAlignment.CigarData},{string.Join(",", existingIndels)}...{realignResult?.Indels}",
                                                         origBamAlignment);
                    _statusCounter.AddStatusCount("INDEL STATUS\tRemoved\t" + string.Join("|", existingIndels));
                    OverlappingIndelHelpers.SoftclipAfterIndel(origBamAlignment,
                                                               reverseClip, preIndel.ReferencePosition);
                }
            }
        }
Example #21
0
        public void ReadCollapsedCounts()
        {
            var alignment = new BamAlignment
            {
                Bases        = "ACTC",
                Position     = 5,
                MapQuality   = 343,
                MatePosition = 12312,
                Qualities    = new[] { (byte)20, (byte)21, (byte)30, (byte)40 },
                CigarData    = new CigarAlignment("1S3M")
            };

            alignment.TagData = DomainTestHelper.GetReadCountsTagData(5, 10);
            var read = new Read("chr1", alignment);

            Assert.True(read.IsDuplex);

            alignment.TagData = DomainTestHelper.GetReadCountsTagData(0, 5);  // first tag is 0
            read = new Read("chr1", alignment);
            Assert.False(read.IsDuplex);

            alignment.TagData = DomainTestHelper.GetReadCountsTagData(null, 5);  // first tag is missing
            read = new Read("chr1", alignment);
            Assert.False(read.IsDuplex);

            alignment.TagData = DomainTestHelper.GetReadCountsTagData(5, 0);  // second tag is 0
            read = new Read("chr1", alignment);
            Assert.False(read.IsDuplex);

            alignment.TagData = DomainTestHelper.GetReadCountsTagData(5, null);  // second tag is missing
            read = new Read("chr1", alignment);
            Assert.False(read.IsDuplex);

            alignment.TagData = DomainTestHelper.GetReadCountsTagData(0, 0);  // both tags 0
            read = new Read("chr1", alignment);
            Assert.False(read.IsDuplex);

            alignment.TagData = DomainTestHelper.GetReadCountsTagData(null, null);  // both tags missing
            read = new Read("chr1", alignment);
            Assert.False(read.IsDuplex);
        }
Example #22
0
        public void FromBam()
        {
            var alignment = new BamAlignment
            {
                Bases        = "ATCTTA",
                Position     = 100,
                MatePosition = 500,
                Name         = "test",
                CigarData    = new CigarAlignment("5M1S"),
                MapQuality   = 10,
                Qualities    = new[] { (byte)10, (byte)20, (byte)30 }
            };

            alignment.SetIsDuplicate(true);
            alignment.SetIsProperPair(true);
            alignment.SetIsSecondaryAlignment(true);
            alignment.SetIsUnmapped(true);

            var read = new Read("chr1", alignment);

            Assert.Equal(read.Chromosome, "chr1");
            Assert.Equal(read.Sequence, alignment.Bases);
            Assert.Equal(read.Position, alignment.Position + 1);
            Assert.Equal(read.MatePosition, alignment.MatePosition + 1);
            Assert.Equal(read.Name, alignment.Name);
            Assert.Equal(read.CigarData, alignment.CigarData);
            Assert.Equal(read.IsMapped, alignment.IsMapped());
            Assert.Equal(read.IsProperPair, alignment.IsProperPair());
            Assert.Equal(read.IsPrimaryAlignment, alignment.IsPrimaryAlignment());
            Assert.Equal(read.IsPcrDuplicate, alignment.IsDuplicate());

            foreach (var direction in read.SequencedBaseDirectionMap)
            {
                Assert.Equal(direction, DirectionType.Forward);
            }

            for (var i = 0; i < read.Qualities.Length; i++)
            {
                Assert.Equal(read.Qualities[i], alignment.Qualities[i]);
            }
        }
Example #23
0
        private bool MayOverlapMate(BamAlignment alignment)
        {
            if (!alignment.IsMateMapped())
            {
                return(false);
            }
            if (!alignment.IsMapped())
            {
                return(false);
            }
            if (alignment.RefID != alignment.MateRefID)
            {
                return(false);
            }
            if (Math.Abs(alignment.Position - alignment.MatePosition) > _maxPairGap)
            {
                return(false);
            }

            return(true);
        }
Example #24
0
        protected override bool ShouldSkipRead(BamAlignment alignment)
        {
            if (!_filterPairLowMapQ && alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality)
            {
                _statusCounter.AddDebugStatusCount("Skipped read below mapQ");
                return(true);
            }

            if (alignment.IsSupplementaryAlignment())
            {
                _statusCounter.AddDebugStatusCount("Skipped supplementary");
                return(true);
            }
            if (_filterForProperPairs && !alignment.IsProperPair())
            {
                _statusCounter.AddDebugStatusCount("Skipped improper pair");
                return(true);
            }

            return(false);
        }
Example #25
0
        public static List <IndelSite> GetIndelPositions(BamAlignment read, out int totalIndelBases)
        {
            totalIndelBases = 0;
            int startIndexInRead      = 0;
            int startIndexInReference = read.Position;
            var positions             = new List <IndelSite>();

            var numOperations = read.CigarData.Count;

            for (var cigarOpIndex = 0; cigarOpIndex < numOperations; cigarOpIndex++)
            {
                var operation = read.CigarData[cigarOpIndex];
                switch (operation.Type)
                {
                case 'I':
                    positions.Add(new IndelSite(startIndexInReference - 1, startIndexInReference, operation.Type, (int)operation.Length, cigarOpIndex == 0 || cigarOpIndex == numOperations - 1));
                    totalIndelBases += (int)operation.Length;
                    break;

                case 'D':
                    positions.Add(new IndelSite(startIndexInReference - 1,
                                                startIndexInReference + (int)operation.Length, operation.Type, (int)operation.Length * -1, cigarOpIndex == 0 || cigarOpIndex == numOperations - 1));
                    totalIndelBases += (int)operation.Length;
                    break;
                }

                if (operation.IsReadSpan())
                {
                    startIndexInRead += (int)operation.Length;
                }

                if (operation.IsReferenceSpan())
                {
                    startIndexInReference += (int)operation.Length;
                }
            }

            return(positions);
        }
Example #26
0
        public void Reset()
        {
            var alignment = new BamAlignment
            {
                Bases        = "ACTC",
                Position     = 5,
                MapQuality   = 343,
                MatePosition = 12312,
                Qualities    = new[] { (byte)20, (byte)21, (byte)30, (byte)40 },
                CigarData    = new CigarAlignment("1S3M")
            };

            alignment.SetIsUnmapped(false);
            alignment.SetIsSecondaryAlignment(false);
            alignment.SetIsDuplicate(true);
            alignment.SetIsProperPair(true);

            var read = new Read("chr1", alignment);

            read.StitchedCigar             = new CigarAlignment("7M");
            read.SequencedBaseDirectionMap = new[] { DirectionType.Forward, DirectionType.Reverse, DirectionType.Stitched, DirectionType.Reverse };

            alignment.SetIsDuplicate(false);
            alignment.MatePosition = 555;

            read.Reset("chr2", alignment);
            Assert.Equal(556, read.MatePosition);
            Assert.False(read.IsPcrDuplicate);
            Assert.Equal("chr2", read.Chromosome);

            var stitchedCigar = "1S3M1S";

            alignment.TagData = DomainTestHelper.GetXCTagData(stitchedCigar);
            read.Reset("chr3", alignment);
            Assert.Equal(556, read.MatePosition);
            Assert.False(read.IsPcrDuplicate);
            Assert.Equal("chr3", read.Chromosome);
            Assert.Equal(stitchedCigar, read.StitchedCigar.ToString());
        }
Example #27
0
 /// <summary>
 /// Seek to the unaligned (and mate-unaligned) reads at the tail of the input file, and write them all out to the output file.
 /// </summary>
 private void WriteUnalignedReads(BamWriter writer)
 {
     Logger.WriteToLog("Writing unaligned reads");
     using (var reader = new BamReader(_inputFile))
     {
         reader.JumpToUnaligned();
         var read = new BamAlignment();
         while (true)
         {
             var result = reader.GetNextAlignment(ref read, false);
             if (!result)
             {
                 break;
             }
             if (read.RefID != -1)
             {
                 continue;                   // skip over last reads
             }
             writer.WriteAlignment(read);
         }
     }
 }
Example #28
0
        public static BamAlignment CreateBamAlignment(string sequence, int position,
                                                      int matePosition, byte qualityForAll, bool isReverseMapped, uint mapQ = 30, byte[] qualities = null, CigarAlignment cigar = null, string name = null, bool isFirstMate = true)
        {
            var bamAlignment = new BamAlignment
            {
                Bases        = sequence,
                Position     = position - 1,
                CigarData    = cigar ?? new CigarAlignment(sequence.Length + "M"),
                Qualities    = qualities ?? Enumerable.Repeat(qualityForAll, sequence.Length).ToArray(),
                MatePosition = matePosition - 1,
                TagData      = new byte[0],
                RefID        = 1,
                MateRefID    = 1,
                Name         = name ?? "Alignment"
            };

            bamAlignment.SetIsFirstMate(isFirstMate);
            bamAlignment.MapQuality = mapQ;
            bamAlignment.SetIsReverseStrand(isReverseMapped);
            bamAlignment.SetIsMateReverseStrand(!isReverseMapped);
            return(bamAlignment);
        }
Example #29
0
        public void UpdateIntTagData_Tests()
        {
            TagUtils tagUtils = new TagUtils();

            byte[] tagData   = tagUtils.ToBytes();
            var    alignment = new BamAlignment()
            {
                TagData = tagData
            };

            // when there was not an NM tag to begin with
            // do not add if not found
            alignment.UpdateIntTagData("NM", 4);
            Assert.Equal(null, alignment.GetIntTag("NM"));
            // add if not found
            alignment.UpdateIntTagData("NM", 4, true);
            Assert.Equal(4, alignment.GetIntTag("NM"));

            // when there was an NM tag to begin with
            alignment.UpdateIntTagData("NM", 3);
            Assert.Equal(3, alignment.GetIntTag("NM"));
        }
        public static string[] CheckReadLoading(BamAlignment read, PiscesApplicationOptions options, ChrReference chrInfo, bool isVariant, StitchingScenario scenario)
        {
            string expectedVarLoading         = scenario.RefLoading;
            string expectedCandidateDireciton = "0";

            if (isVariant)
            {
                expectedVarLoading         = scenario.VarLoading;
                expectedCandidateDireciton = scenario.CandidateDirection;
            }

            var loadingResults = LoadReads(new List <BamAlignment>()
            {
                read
            }, options, chrInfo, isVariant, expectedVarLoading, expectedCandidateDireciton);

            if (loadingResults == null)
            {
                return(new string[] { "total fail to parse variant reads" });
            }

            //coverage check
            var variantReadLoadResult         = CheckLoading(scenario, 1, loadingResults.Item1, isVariant);
            var variantReadCandidateDirection = CheckCandidateDirection(isVariant, loadingResults.Item2, expectedCandidateDireciton);


            if (variantReadLoadResult == null)
            {
                return(new string[] { "total fail to check loading" });
            }

            if (variantReadCandidateDirection == null)
            {
                return(new string[] { "total fail to check direction" });
            }

            return(new string[] { variantReadLoadResult, variantReadCandidateDirection });
        }
        /// <summary>
        /// Returns a very basic read based on the abstract alignment. We don't yet
        /// </summary>
        /// <returns></returns>
        public Read ToRead()
        {
            var        cigar         = new CigarAlignment(Cigar);
            const byte qualityForAll = 30;

            var readLength = (int)cigar.GetReadSpan();

            var alignment = new BamAlignment
            {
                CigarData = cigar,
                Position  = Position - 1,
                RefID     = 1,
                Bases     = Directions.EndsWith("F") ? new string('A', readLength) : new string('T', readLength),
                Qualities = Enumerable.Repeat(qualityForAll, readLength).ToArray()
            };

            alignment.MapQuality = 30;
            var read = new Read("chr1", alignment);
            var di   = new DirectionInfo(Directions);

            read.SequencedBaseDirectionMap = di.ToDirectionMap();
            return(read);
        }
Example #32
0
		/// <summary>
		///     jumps to the specified position in the BAM file
		/// </summary>
		/// <returns>true if we were successfully able to jump to the requested position</returns>
		public bool Jump(int refID, int position)
		{
			// sanity checks
			if (!_hasIndex) return false;
			if (refID > _referenceIndex.Count) return false;
			if (position > _referenceIndex[refID].Length) return false;

			// calculate the candidate index regions
			BamIterator bamIterator;
			bool foundOffset = _index.GetOffsets(refID, position, out bamIterator);

			if (!foundOffset || (bamIterator.Offsets == null) || (bamIterator.Offsets.Length == 0)) return false;

			int currentOffsetIndex = -1;
			int lastOffsetIndex = bamIterator.Offsets.Length - 1;
			BamAlignment alignment = new BamAlignment();

			while (true)
			{
				// jump to the next chunk
				if ((bamIterator.CurrentOffset == 0) ||
					(bamIterator.CurrentOffset >= bamIterator.Offsets[currentOffsetIndex].End))
				{
					// no more chunks
					if (currentOffsetIndex == lastOffsetIndex) return false;

					// sanity check
					if ((currentOffsetIndex >= 0) &&
						(bamIterator.CurrentOffset != bamIterator.Offsets[currentOffsetIndex].End))
					{
						throw new ApplicationException(
							string.Format(
								"Found a potential bug in the BAM index routines. CurrentOffset ({0}) != Offsets[currentOffsetIndex].End ({1}",
								bamIterator.CurrentOffset, bamIterator.Offsets[currentOffsetIndex].End));
					}

					// not adjacent chunks; then seek
					if ((currentOffsetIndex < 0) || (bamIterator.Offsets[currentOffsetIndex].End != bamIterator.Offsets[currentOffsetIndex + 1].Begin))
					{
						Seek(bamIterator.Offsets[currentOffsetIndex + 1].Begin);
						bamIterator.CurrentOffset = Tell();
					}

					currentOffsetIndex++;
				}

				// look for the desired alignment
				if (GetNextAlignment(ref alignment, false))
				{
					// no need to proceed
					if ((alignment.RefID != bamIterator.RefID) || (alignment.Position >= bamIterator.End))
					{
						return false;
					}
					if (IsOverlap(bamIterator.Begin, bamIterator.End, alignment))
					{
						// this is the read we're looking for
						break;
					}
					bamIterator.CurrentOffset = Tell();
				}
				else
				{
					//  end of file or error
					return false;
				}
			}

			// reset the file position (since we already read blew past the good alignment)
			Seek(bamIterator.CurrentOffset);

			return true;
		}
Example #33
0
		/// <summary>
		/// Jump to unaligned reads (with no associated chromosome) at end of bam file.
		/// </summary>
		public bool JumpToUnaligned()
		{
			// sanity check: make sure we have unaligned reads
			if (_index.NumUnalignedWithoutCoordinates == 0) return false;

			// get the last indexed BAM offset
			ulong currentOffset = _index.GetLargestBamOffset();

			// reposition our BAM reader
			if (currentOffset != 0)
			{
				Seek(currentOffset);
			}
			else
			{
				Rewind();
				currentOffset = Tell();
			}

			// skip all of the alignments that are aligned
			BamAlignment alignment = new BamAlignment();

			while (true)
			{
				// look for the desired alignment
				if (GetNextAlignment(ref alignment, false))
				{
					if (alignment.RefID == -1) break;
					currentOffset = Tell();
				}
				else
				{
					//  end of file or error
					return false;
				}
			}

			// reset the file position (since we already read blew past the good alignment)
			Seek(currentOffset);

			return true;
		}
Example #34
0
		/// <summary>
		///     returns true if the alignment overlaps with the specified interval
		/// </summary>
		private bool IsOverlap(int begin, int end, BamAlignment alignment)
		{
			int alignmentBegin = alignment.Position;
			int alignmentEnd = alignment.GetEndPosition();
			return (alignmentEnd >= begin) && (alignmentBegin < end);
		}
Example #35
0
        // writes an alignment
        public void WriteAlignment(BamAlignment al)
        {
            if (!IsOpen)
            {
                throw new ApplicationException(string.Format("ERROR: Tried to write an alignment but the file has not been opened yet."));
            }

            // initialize
            uint nameLen = (uint)al.Name.Length + 1;
            uint numBases = (uint)al.Bases.Length;
            uint numCigarOperations = (uint)al.CigarData.Count;
            uint packedCigarLen = numCigarOperations * 4;
            uint numEncodedBases = (uint)((numBases / 2.0) + 0.5);
            uint tagDataLen = (uint)al.TagData.Length;
            uint dataBlockSize = nameLen + packedCigarLen + numEncodedBases + numBases + tagDataLen;
            uint alignBlockSize = BamConstants.CoreAlignmentDataLen + dataBlockSize;
            uint blockSize = alignBlockSize + 4;
            int offset = 0;

            // test if we should flush the block
            if ((BlockOffset + blockSize) > MaxBlockSize) FlushBlock();

            // redimension the buffer if needed
            if (blockSize > _outputBuffer.Length) _outputBuffer = new byte[blockSize + 1024];

            // store the block size
            BinaryIO.AddUIntBytes(ref _outputBuffer, ref offset, alignBlockSize);

            // store the BAM core data
            BinaryIO.AddIntBytes(ref _outputBuffer, ref offset, al.RefID);
            BinaryIO.AddIntBytes(ref _outputBuffer, ref offset, al.Position);
            BinaryIO.AddUIntBytes(ref _outputBuffer, ref offset, (al.Bin << 16) | (al.MapQuality << 8) | nameLen);
            BinaryIO.AddUIntBytes(ref _outputBuffer, ref offset, (al.AlignmentFlag << 16) | numCigarOperations);
            BinaryIO.AddUIntBytes(ref _outputBuffer, ref offset, numBases);
            BinaryIO.AddIntBytes(ref _outputBuffer, ref offset, al.MateRefID);
            BinaryIO.AddIntBytes(ref _outputBuffer, ref offset, al.MatePosition);
            BinaryIO.AddIntBytes(ref _outputBuffer, ref offset, al.FragmentLength);

            // store the alignment name
            BinaryIO.AddNullTerminatedString(ref _outputBuffer, ref offset, al.Name);

            // store the packed CIGAR string and packed bases
            PackCigar(ref offset, ref _outputBuffer, al.CigarData);
            PackBases(ref offset, numEncodedBases, al.Bases);

            // store the base qualities
            Buffer.BlockCopy(al.Qualities, 0, _outputBuffer, offset, al.Qualities.Length);
            offset += al.Qualities.Length;

            // store the tag data
            Buffer.BlockCopy(al.TagData, 0, _outputBuffer, offset, al.TagData.Length);
            offset += al.TagData.Length;

            // write the alignment
            Write(_outputBuffer, blockSize);
        }
Example #36
0
 /// <summary>
 /// Use the CIGAR string to map bases to chromosome positions, and check whether we see the ref base or the 
 /// variant allele for our variants of interest.
 /// </summary>
 private void ProcessReadBases(BamAlignment read, int nextVariantIndex)
 {
     int position = read.Position;
     int baseIndex = 0;
     int cigarCount = read.CigarData.Count;
     for (int opIndex = 0; opIndex < cigarCount; opIndex++)
     {
         CigarOp cigar = read.CigarData[opIndex];
         switch (cigar.Type)
         {
             case 'M':
                 // Loop over matches/mismatches:
                 for (int index = 0; index < cigar.Length; index++,position++,baseIndex++)
                 {
                     for (int varIndex = nextVariantIndex; varIndex < this.Variants.Count; varIndex++)
                     {
                         VcfVariant variant = this.Variants[varIndex];
                         // Subtract 1: Vcf positions are 1-based, bam file positions are 0-based:
                         if (variant.ReferencePosition - 1 > position) break;
                         if (variant.ReferencePosition - 1 < position)
                         {
                             nextVariantIndex++;
                             continue;
                         }
                         if (read.Qualities[baseIndex] < MinimumBaseQScore) continue; // Skip low-quality base calls.
                         char Base = read.Bases[baseIndex];
                         if (Base == variant.ReferenceAllele[0]) this.ReferenceCounts[varIndex]++;
                         if (Base == variant.VariantAlleles[0][0]) this.VariantCounts[varIndex]++;
                     }
                 }
                 break;
             case 'S':
                 baseIndex += (int)cigar.Length;
                 break;
             case 'I':
                 baseIndex += (int)cigar.Length;
                 break;
             case 'D':
                 position += (int)cigar.Length;
                 break;
             default:
                 // We don't know how to cope with this CIGAR operation; bail out!
                 return;
         }
     }
 }
Example #37
0
        /// <summary>
        ///     Adds an offset to a specific reference sequence in the index
        /// </summary>
        private static void AddOffset(ref List<ulong> offsets, ref BamAlignment al, ulong offset)
        {
            int beg = al.Position >> BamLidxShift;
            int end = (al.GetEndPosition() - 1) >> BamLidxShift;

            // initialize additional entries if needed
            while (offsets.Count < (end + 1)) offsets.Add(0);

            if (beg == end)
            {
                if (offsets[beg] == 0) offsets[beg] = offset;
            }
            else
            {
                for (int i = beg; i <= end; i++)
                {
                    if (offsets[i] == 0) offsets[i] = offset;
                }
            }
        }
Example #38
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";
            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                        string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                        bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this 
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int readCount = 0;
                int keptReadCount = 0;
                string header = reader.GetHeader();
                BamAlignment alignment = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped()) continue;
                    if (alignment.IsFailedQC()) continue;
                    if (alignment.IsDuplicate()) continue;
                    if (alignment.IsReverseStrand()) continue;
                    if (!alignment.IsMainAlignment()) continue;

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) continue;

                    if (isPairedEnd && !alignment.IsProperPair()) continue;

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                        break;

                    if (refID == -1)
                        continue;

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }
Example #39
0
		// retrieves next available alignment
		public bool GetNextAlignment(ref BamAlignment alignment, bool skipAdditionalParsing)
		{
			// check that our file is open
			if (!IsOpen) return false;

			// retrieve the alignment data length
			if (Read(ref _byteBuffer, 4) != 4) return false;
			uint alignmentDataLen = BitConverter.ToUInt32(_byteBuffer, 0);
			if (alignmentDataLen == 0) return false;

			// retrieve the alignment data
			if (Read(ref _byteBuffer, alignmentDataLen) != alignmentDataLen) return false;

			// retrieve the core alignment data
			uint compositeData1 = BitConverter.ToUInt32(_byteBuffer, 8);
			uint flagAndNumCigarOps = BitConverter.ToUInt32(_byteBuffer, 12);
			uint numBases = BitConverter.ToUInt32(_byteBuffer, 16);
			if (numBases > _sequenceBuffer.Length)
			{
				// For very long reads, re-allocate this buffer to twice the data length
				_sequenceBuffer = new char[numBases * 2];
			}
			uint readNameLen = compositeData1 & 0xff;
			uint numCigarOps = flagAndNumCigarOps & 0xffff;

			alignment.RefID = BitConverter.ToInt32(_byteBuffer, 0);
			alignment.Position = BitConverter.ToInt32(_byteBuffer, 4);
			alignment.Bin = (compositeData1 >> 16);
			alignment.MapQuality = ((compositeData1 >> 8) & 0xff);
			alignment.AlignmentFlag = flagAndNumCigarOps >> 16;
			alignment.MateRefID = BitConverter.ToInt32(_byteBuffer, 20);
			alignment.MatePosition = BitConverter.ToInt32(_byteBuffer, 24);
			alignment.FragmentLength = BitConverter.ToInt32(_byteBuffer, 28);

			// retrieve the read name
			int offset = (int)BamConstants.CoreAlignmentDataLen;
			alignment.Name = Encoding.ASCII.GetString(_byteBuffer, offset, (int)(readNameLen - 1));
			offset += (int)readNameLen;

			// retrieve the CIGAR operations
			alignment.CigarData.Clear();
			for (uint i = 0; i < numCigarOps; ++i, offset += 4)
			{
				uint cigarData = BitConverter.ToUInt32(_byteBuffer, offset);
				alignment.CigarData.Add(new CigarOp(BamConstants.CigarTypes[cigarData & BamConstants.CigarMask],
													cigarData >> BamConstants.CigarShift));
			}

			// here we provide a mechanism for skipping the processing of
			// bases, base qualities, and tags
			if (!skipAdditionalParsing)
			{
				// retrieve the bases
				byte shift = 4;
				for (int i = 0; i < numBases; ++i, shift ^= 4)
				{
					_sequenceBuffer[i] = _baseLookupTable[(_byteBuffer[offset] >> shift) & 15];
					if (shift == 0) offset++;
				}

				if (shift == 0) offset++;

				alignment.Bases = new string(_sequenceBuffer, 0, (int)numBases);

				// retrieve the qualities
				if ((alignment.Qualities == null) || (alignment.Qualities.Length != numBases))
				{
					alignment.Qualities = new byte[numBases];
				}

				Buffer.BlockCopy(_byteBuffer, offset, alignment.Qualities, 0, (int)numBases);
				offset += (int)numBases;

				// retrieve the tags
				int numTagBytes = (int)alignmentDataLen - offset;
				alignment.TagData = new byte[numTagBytes];
				Array.Copy(_byteBuffer, offset, alignment.TagData, 0, numTagBytes);
			}

			return true;
		}
Example #40
0
            /// <summary>
            /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters.
            /// Decreases bin count if the second read of a pair does not pass all the filters.
            /// </summary>
            /// <param name="alignment"></param>
            /// <param name="qualityThreshold">minimum mapping quality</param>
            /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param>
            /// <param name="usableFragmentCount">number of usable fragments</param>
            /// <param name="bins">predefined bins</param>
            /// <param name="binIndexStart">bin index from which to start searching for the best bin</param>
            public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex,
                HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart)
            {
                if (!alignment.IsMapped()) { return; }
                if (!alignment.IsMateMapped()) { return; }
                if (!alignment.IsPrimaryAlignment()) { return; }
                if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; }

                bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold);

                // Check whether we have binned the fragment using the mate
                if (readNameToBinIndex.ContainsKey(alignment.Name))
                {
                    // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality
                    if (duplicateFailedQCLowQuality)
                    {
                        usableFragmentCount--;
                        bins[readNameToBinIndex[alignment.Name]].Count--;
                    }
                    readNameToBinIndex.Remove(alignment.Name); // clean up
                    return;
                }
                if (duplicateFailedQCLowQuality) { return; }

                if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen?

                if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair
                // handle the case where alignment.Position == alignment.MatePosition
                if (alignment.Position == alignment.MatePosition)
                {
                    if (samePositionReadNames.Contains(alignment.Name))
                    {
                        samePositionReadNames.Remove(alignment.Name);
                        return;
                    }
                    samePositionReadNames.Add(alignment.Name);
                }
                if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable

                // Try to bin the fragment
                int fragmentStart = alignment.Position; // 0-based, inclusive
                int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive
                while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment
                {
                    binIndexStart++;
                }
                if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin

                // now Bins[binIndexStart].Stop > fragmentStart
                int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop);
                if (bestBinIndex >= 0) // Bin the fragment
                {
                    usableFragmentCount++;
                    bins[bestBinIndex].Count++;
                    readNameToBinIndex[alignment.Name] = bestBinIndex;
                }
            }
Example #41
0
            /// <summary>
            /// Checks if any of the conditions is true:
            /// 1. The read is a duplicate,
            /// 2. The read failed QC,
            /// 3. The read is of low mapping quality.
            /// </summary>
            /// <param name="alignment"></param>
            /// <returns></returns>
            public static bool IsDuplicateFailedQCLowQuality(BamAlignment alignment, uint qualityThreshold)
            {
                if (alignment.IsDuplicate()) { return true; }
                if (alignment.IsFailedQC()) { return true; }
                if (alignment.MapQuality == FragmentBinnerConstants.MappingQualityNotAvailable
                    || alignment.MapQuality < qualityThreshold)
                {
                    return true;
                }

                return false;
            }
Example #42
0
        public void TestBinOneAlignment(int pos1, int pos2)
        {
            uint qualityThreshold = 3;
            Dictionary<string, int> readNameToBinIndex = new Dictionary<string, int>();
            HashSet<string> samePositionReadNames = new HashSet<string>();
            long usableFragmentCount = 0;
            List<GenomicBin> bins = new List<GenomicBin>()
            {
                new GenomicBin("chr1", 100, 200, 50, 0)
            };
            int binIndexStart = 0;

            BamAlignment alignment1 = new BamAlignment();
            BamAlignment alignment2 = new BamAlignment();
            alignment1.Name = alignment2.Name = "ReadName";
            alignment1.AlignmentFlag = 0x1 | 0x2;
            alignment2.AlignmentFlag = 0x1 | 0x2;
            alignment1.Position = pos1;
            alignment1.MatePosition = pos2;
            alignment1.FragmentLength = 100;
            alignment2.Position = pos2;
            alignment2.MatePosition = pos1;
            alignment2.FragmentLength = -100;
            alignment1.MapQuality = 10;
            alignment2.MapQuality = 10;

            // Both reads pass filters
            FragmentBinner.BinTask.BinOneAlignment(alignment1, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            FragmentBinner.BinTask.BinOneAlignment(alignment2, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            Assert.AreEqual(bins[0].Count, 1);

            // First read passes filters
            bins[0].Count = 0; // reset bin count
            alignment2.MapQuality = 2; // below quality threshold of 3
            FragmentBinner.BinTask.BinOneAlignment(alignment1, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            FragmentBinner.BinTask.BinOneAlignment(alignment2, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            Assert.AreEqual(bins[0].Count, 0);

            // Second read passes filters
            bins[0].Count = 0; // reset bin count
            alignment1.MapQuality = 2; // below quality threshold of 3
            alignment2.MapQuality = 10;
            FragmentBinner.BinTask.BinOneAlignment(alignment1, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            FragmentBinner.BinTask.BinOneAlignment(alignment2, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            Assert.AreEqual(bins[0].Count, 0);

            // Both fail filters
            bins[0].Count = 0; // reset bin count
            alignment1.MapQuality = 2; // below quality threshold of 3
            alignment2.MapQuality = 2; // below quality threshold of 3
            FragmentBinner.BinTask.BinOneAlignment(alignment1, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            FragmentBinner.BinTask.BinOneAlignment(alignment2, qualityThreshold, readNameToBinIndex, samePositionReadNames,
                ref usableFragmentCount, bins, ref binIndexStart);
            Assert.AreEqual(bins[0].Count, 0);
        }
Example #43
0
        /// <summary>
        ///     Updates the index with respect to the current alignment
        /// </summary>
        /// <returns>false if multiple reads without coordinates are encountered</returns>
        public bool UpdateReferenceIndex(ref BamAlignment alignment, ulong offset)
        {
            // record the number of unaligned reads
            if (alignment.RefID < 0) ++_numUnalignedWithoutCoordinates;

            // update the reference IDs and check that the alignment is sorted
            if (alignment.RefID != _lastRefID)
            {
                _lastRefID = alignment.RefID;
                _lastBin = uint.MaxValue;
            }
            else if (alignment.Position < _lastPosition)
            {
                throw new ApplicationException(
                    string.Format(
                        "ERROR: The BAM file is not sorted. An alignment ({0}) occurred before the preceding alignment ({1}).",
                        alignment.Position, _lastPosition));
            }

            if (alignment.RefID >= 0) AddOffset(ref _index[alignment.RefID].OffsetList, ref alignment, _lastOffset);

            if (alignment.Bin != _lastBin)
            {
                if (_saveBin != uint.MaxValue)
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, _saveBin, _saveOffset, _lastOffset);
                if ((_lastBin == uint.MaxValue) && (_saveRefID != int.MinValue))
                {
                    _endOffset = _lastOffset;
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, BamMaxBin, _beginOffset, _endOffset);
                    AddBamRegion(ref _index[_saveRefID].RegionsDictionary, BamMaxBin, _numAligned, _numUnaligned);
                    _numAligned = _numUnaligned = 0;
                    _beginOffset = _endOffset;
                }

                _saveOffset = _lastOffset;
                _saveBin = _lastBin = alignment.Bin;
                _saveRefID = alignment.RefID;

                if (_saveRefID < 0)
                {
                    _hasUnalignedReads = true;
                    return false;
                }
            }

            if (offset <= _lastOffset)
            {
                throw new ApplicationException(
                    "ERROR: While updating the BAM index, the offset did not increase after processing the last alignment.");
            }

            if (alignment.IsMapped()) ++_numAligned;
            else ++_numUnaligned;

            _lastOffset = offset;
            _lastPosition = alignment.Position;

            return true;
        }
Example #44
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount = 0;
            int nextVariantIndex = 0;
            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read = new BamAlignment();
                int refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result) break;
                    if (!read.HasPosition() || read.RefID > refID) break; // We're past our chromosome of interest.
                    if (read.RefID < refID) continue; // We're not yet on our chromosome of interest.
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment()) continue;
                    if (!read.IsMapped()) continue;
                    if (read.IsDuplicate()) continue;
                    if (read.MapQuality <= MinimumMapQ) continue;

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count) break;

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) continue;

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Example #45
0
            /// <summary>
            /// Bins fragments.
            /// </summary>
            private void binFragments()
            {
                // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome!
                if (!Bam.Index.Exists)
                {
                    throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName));
                }

                long pairedAlignmentCount = 0; // keep track of paired alignments
                usableFragmentCount = 0;
                using (BamReader reader = new BamReader(Bam.BamFile.FullName))
                {
                    int desiredRefIndex = -1;
                    desiredRefIndex = reader.GetReferenceIndex(Chromosome);
                    if (desiredRefIndex == -1)
                    {
                        throw new ApplicationException(
                            string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName));
                    }
                    bool result = reader.Jump(desiredRefIndex, 0);
                    if (!result)
                    {
                        // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                        // .bam file.  That is not uncommon e.g. for truseq amplicon.
                        return;
                    }

                    Dictionary<string, int> readNameToBinIndex = new Dictionary<string, int>();
                    HashSet<string> samePositionReadNames = new HashSet<string>();
                    int binIndexStart = 0;
                    int prevPosition = -1;
                    BamAlignment alignment = new BamAlignment();
                    while (reader.GetNextAlignment(ref alignment, true))
                    {
                        int refID = alignment.RefID;

                        // quit if the current reference index is different from the desired reference index
                        if (refID != desiredRefIndex)
                            break;

                        if (refID == -1)
                            continue;

                        if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted
                        {
                            throw new ApplicationException(
                                string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name));
                        }
                        prevPosition = alignment.Position;

                        if (alignment.IsPaired()) { pairedAlignmentCount++; }

                        BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex,
                            samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart);
                    }
                }
                if (pairedAlignmentCount == 0)
                {
                    throw new ApplicationException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName));
                }
            }
Example #46
0
 /// <summary>
 /// Is the read the right-most one (by genomic position) in a pair?
 /// </summary>
 /// <param name="alignment"></param>
 /// <returns></returns>
 public static bool IsRightMostInPair(BamAlignment alignment)
 {
     return alignment.Position > alignment.MatePosition;
 }
Example #47
0
        /// <summary>
        /// Serialize alignment to a byte array, for later flushing to output file.
        /// </summary>
        static public byte[] SerializeAlignment(ref BamAlignment al)
        {
            // initialize
            uint nameLen = (uint)al.Name.Length + 1;
            uint numBases = (uint)al.Bases.Length;
            uint numCigarOperations = (uint)al.CigarData.Count;
            uint packedCigarLen = numCigarOperations * 4;
            uint numEncodedBases = (uint)((numBases / 2.0) + 0.5);
            uint tagDataLen = (uint)al.TagData.Length;
            uint dataBlockSize = nameLen + packedCigarLen + numEncodedBases + numBases + tagDataLen;
            uint alignBlockSize = BamConstants.CoreAlignmentDataLen + dataBlockSize;
            uint blockSize = alignBlockSize + 4;
            byte[] buffer = new byte[blockSize];
            int offset = 0;

            // store the block size
            BinaryIO.AddUIntBytes(ref buffer, ref offset, alignBlockSize);

            // store the BAM core data
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.RefID);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.Position);
            BinaryIO.AddUIntBytes(ref buffer, ref offset, (al.Bin << 16) | (al.MapQuality << 8) | nameLen);
            BinaryIO.AddUIntBytes(ref buffer, ref offset, (al.AlignmentFlag << 16) | numCigarOperations);
            BinaryIO.AddUIntBytes(ref buffer, ref offset, numBases);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.MateRefID);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.MatePosition);
            BinaryIO.AddIntBytes(ref buffer, ref offset, al.FragmentLength);

            // store the alignment name
            BinaryIO.AddNullTerminatedString(ref buffer, ref offset, al.Name);

            // store the packed CIGAR string and packed bases
            PackCigar(ref offset, ref buffer, al.CigarData);
            PackBases(ref offset, ref buffer, numEncodedBases, al.Bases);

            // store the base qualities
            Buffer.BlockCopy(al.Qualities, 0, buffer, offset, al.Qualities.Length);
            offset += al.Qualities.Length;

            // store the tag data
            Buffer.BlockCopy(al.TagData, 0, buffer, offset, al.TagData.Length);
            offset += al.TagData.Length;

            return buffer;
        }