Ejemplo n.º 1
0
        public void TestJump()
        {
            var smallBam = Path.Combine(TestPaths.LocalTestDataDirectory, "bwaXC.bam");
            using (var reader = new BamReader(smallBam))
            {
                BamAlignment al = new BamAlignment();

                Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 20200));
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position > 18000);
                Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 200));
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position < 250);

                // now, forward-only jumping
                Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 20200));
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position > 18000); // a good forward jump
                var position = reader.Tell();
                Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 200));
                Assert.Equal(position, reader.Tell()); // we stayed put
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position > 18000); 
                
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount     = 0;
            int nextVariantIndex = 0;

            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read  = new BamAlignment();
                int          refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }
                    if (!read.HasPosition() || read.RefID > refID)
                    {
                        break;                                            // We're past our chromosome of interest.
                    }
                    if (read.RefID < refID)
                    {
                        continue;                     // We're not yet on our chromosome of interest.
                    }
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment())
                    {
                        continue;
                    }
                    if (!read.IsMapped())
                    {
                        continue;
                    }
                    if (read.IsDuplicate())
                    {
                        continue;
                    }
                    if (read.MapQuality <= MinimumMapQ)
                    {
                        continue;
                    }

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count)
                    {
                        break;
                    }

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition)
                    {
                        continue;
                    }

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Ejemplo n.º 3
0
            /// <summary>
            /// Bins fragments.
            /// </summary>
            private void binFragments()
            {
                // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome!
                if (!Bam.Index.Exists)
                {
                    throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName));
                }

                long pairedAlignmentCount = 0; // keep track of paired alignments

                usableFragmentCount = 0;
                using (BamReader reader = new BamReader(Bam.BamFile.FullName))
                {
                    int desiredRefIndex = -1;
                    desiredRefIndex = reader.GetReferenceIndex(Chromosome);
                    if (desiredRefIndex == -1)
                    {
                        throw new Illumina.Common.IlluminaException(
                                  string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName));
                    }
                    bool result = reader.Jump(desiredRefIndex, 0);
                    if (!result)
                    {
                        // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                        // .bam file.  That is not uncommon e.g. for truseq amplicon.
                        return;
                    }

                    Dictionary <string, int> readNameToBinIndex    = new Dictionary <string, int>();
                    HashSet <string>         samePositionReadNames = new HashSet <string>();
                    int          binIndexStart = 0;
                    int          prevPosition  = -1;
                    BamAlignment alignment     = new BamAlignment();
                    while (reader.GetNextAlignment(ref alignment, true))
                    {
                        int refID = alignment.RefID;

                        // quit if the current reference index is different from the desired reference index
                        if (refID != desiredRefIndex)
                        {
                            break;
                        }

                        if (refID == -1)
                        {
                            continue;
                        }

                        if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted
                        {
                            throw new Illumina.Common.IlluminaException(
                                      string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name));
                        }
                        prevPosition = alignment.Position;

                        if (alignment.IsPaired())
                        {
                            pairedAlignmentCount++;
                        }

                        BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex,
                                        samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart);
                    }
                }
                if (pairedAlignmentCount == 0)
                {
                    throw new Illumina.Common.IlluminaException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName));
                }
            }
Ejemplo n.º 4
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";

            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                              string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                                            bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int          readCount     = 0;
                int          keptReadCount = 0;
                string       header        = reader.GetHeader();
                BamAlignment alignment     = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped())
                    {
                        continue;
                    }
                    if (alignment.IsFailedQC())
                    {
                        continue;
                    }
                    if (alignment.IsDuplicate())
                    {
                        continue;
                    }
                    if (alignment.IsReverseStrand())
                    {
                        continue;
                    }
                    if (!alignment.IsMainAlignment())
                    {
                        continue;
                    }

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35)
                    {
                        continue;
                    }

                    if (isPairedEnd && !alignment.IsProperPair())
                    {
                        continue;
                    }

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                    {
                        break;
                    }

                    if (refID == -1)
                    {
                        continue;
                    }

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    {
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                    }
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }
Ejemplo n.º 5
0
 public void JumpToChromosome(string chromosomeName)
 {
     _bamIndexFilter = _bamReader.GetReferenceIndex(chromosomeName);
     _bamReader.Jump(_bamIndexFilter, 0);
 }
Ejemplo n.º 6
0
        public bool Jump(string chromosomeName, int positionIndex = 0)
        {
            var chrIndex = _references.First(r => r.Name == chromosomeName).Index;

            return(_bamReader.Jump(chrIndex, positionIndex));
        }