Example #1
0
        private void AdjustMates(string tmpFile, BamWriter writer)
        {
            // Second pass: Adjust flags on mates
            Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count);
            var read = new BamAlignment();

            using (var reader = new BamReader(tmpFile))
            {
                while (true)
                {
                    var result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }

                    // Adjust flags as needed:
                    var       mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1);
                    RemapInfo info;

                    if (!_remappings.TryGetValue(mateKey, out info))
                    {
                        writer.WriteAlignment(read);
                        continue;
                    }

                    if (info.Start == -1)
                    {
                        read.SetIsMateUnmapped(true);
                        read.SetIsProperPair(false);
                        read.FragmentLength = 0;
                    }
                    else
                    {
                        read.MatePosition = info.Start;
                    }
                    if (read.IsMateMapped() && read.IsProperPair())
                    {
                        int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1;
                        // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs
                        read.FragmentLength = (read.Position < info.Start
                            ? info.End - read.Position + 1
                            : info.Start - readEnd - 1);
                    }

                    writer.WriteAlignment(read);
                }
            }
        }
Example #2
0
    public static void Main(String[] args)
    {
        var aln    = new BamAlignment();
        var reader = new BamReader();

        reader.Open(args[0]);

        for (int i = 0; i < 10; i++)
        {
            reader.GetNextAlignment(aln);
            Console.WriteLine("{0} {1}", aln.Name, aln.Length);

            var foo = aln.CigarData[0];
        }
    }
Example #3
0
        public void Process(string inputBam, string outFolder, StitcherOptions stitcherOptions)
        {
            var jobManager   = new JobManager(10);
            var jobs         = new List <IJob>();
            var perChromBams = new List <string>();

            // Process each of the chromosomes separately
            foreach (var chrom in _chroms)
            {
                var intermediateOutput = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + "." + chrom + ".stitched.bam");
                perChromBams.Add(intermediateOutput);
                var stitcher = new BamStitcher(inputBam, intermediateOutput, stitcherOptions, chrFilter: chrom);
                jobs.Add(new GenericJob(() => stitcher.Execute(), "Stitcher_" + chrom));
            }

            jobManager.Process(jobs);

            // Combine the per-chromosome bams
            Logger.WriteToLog("Writing final bam.");

            var outputBam = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + ".final.stitched.bam");

            using (var finalOutput = new BamWriter(outputBam, _header, _references))
            {
                foreach (var bam in perChromBams)
                {
                    Logger.WriteToLog("Adding " + bam + " to final bam.");
                    var bamAlignment = new BamAlignment();

                    using (var bamReader = new BamReader(bam))
                    {
                        while (true)
                        {
                            var hasMoreReads = bamReader.GetNextAlignment(ref bamAlignment, false);
                            if (!hasMoreReads)
                            {
                                break;
                            }
                            finalOutput.WriteAlignment(bamAlignment);
                        }
                    }

                    File.Delete(bam);
                }
            }

            Logger.WriteToLog("Finished combining per-chromosome bams into final bam at " + outputBam);
        }
Example #4
0
        public bool GetNextAlignment(Read read)
        {
            if (_bamReader == null)
            {
                throw new Exception("Already disposed.");
            }

            while (true)
            {
                Region currentInterval = null;

                if (_rawAlignment != null)
                {
                    var currentChrIntervals = GetIntervalsForChr(_rawAlignment.RefID);
                    if (currentChrIntervals != null) // null signals not to apply interval jumping
                    {
                        if (!JumpIfNeeded(currentChrIntervals, out currentInterval))
                        {
                            Dispose();
                            return(false);
                        }
                    }
                }
                else
                {
                    _rawAlignment = new BamAlignment(); // first time pass
                }

                if (!_bamReader.GetNextAlignment(ref _rawAlignment, false) ||
                    ((_bamIndexFilter > -1) && (_rawAlignment.RefID != _bamIndexFilter)))
                {
                    Dispose();
                    return(false);
                }
                if (currentInterval == null || _rawAlignment.Position < currentInterval.EndPosition)
                {
                    var reference = _references.FirstOrDefault(r => r.Index == _rawAlignment.RefID);

                    read.Reset(reference?.Name, _rawAlignment);

                    return(true);
                }
                // read off the end of the interval - keep looping to jump to the next one or scan to the end
            }
        }
        public bool GetNextAlignment(Read read)
        {
            if (_bamReader == null)
            {
                throw new Exception("Already disposed.");
            }

            if (!_bamReader.GetNextAlignment(ref _rawAlignment, false) ||
                (_bamIndexFilter > -1 && _rawAlignment.RefID != _bamIndexFilter))
            {
                Dispose();
                return(false);
            }

            read.Reset(_bamReader.GetReferenceNameByID(_rawAlignment.RefID), _rawAlignment, _stitchReads);

            return(true);
        }
Example #6
0
        private static void RunProcessorTest(string inBam, string outBam, string expBam, string outFolder, bool threadbyChr, StitcherOptions stitcherOptions)
        {
            if (File.Exists(outBam))
            {
                File.Delete(outBam);
            }


            Logger.OpenLog(TestPaths.LocalScratchDirectory, "StitcherTestLog.txt", true);
            var processor = threadbyChr ? (IStitcherProcessor) new GenomeProcessor(inBam) : new BamProcessor();

            processor.Process(inBam, outFolder, stitcherOptions);
            Logger.CloseLog();


            Assert.True(File.Exists(outBam));

            var observedAlignment = new BamAlignment();
            var expectedAlignment = new BamAlignment();

            using (var outReader = new BamReader(outBam))
                using (var expReader = new BamReader(expBam))
                {
                    while (true)
                    {
                        var nextObservation = outReader.GetNextAlignment(ref observedAlignment, true);

                        var nextExpected = expReader.GetNextAlignment(ref expectedAlignment, true);

                        if ((nextExpected == false) || (expectedAlignment == null))
                        {
                            break;
                        }


                        Assert.Equal(expectedAlignment.Bases, observedAlignment.Bases);
                        Assert.Equal(expectedAlignment.Position, observedAlignment.Position);
                        Assert.Equal(expectedAlignment.Qualities, observedAlignment.Qualities);
                    }

                    outReader.Close();
                    expReader.Close();
                }
        }
Example #7
0
 /// <summary>
 /// Seek to the unaligned (and mate-unaligned) reads at the tail of the input file, and write them all out to the output file.
 /// </summary>
 private void WriteUnalignedReads(BamWriter writer)
 {
     Logger.WriteToLog("Writing unaligned reads");
     using (var reader = new BamReader(_inputFile))
     {
         reader.JumpToUnaligned();
         var read = new BamAlignment();
         while (true)
         {
             var result = reader.GetNextAlignment(ref read, false);
             if (!result)
             {
                 break;
             }
             if (read.RefID != -1)
             {
                 continue;                   // skip over last reads
             }
             writer.WriteAlignment(read);
         }
     }
 }
Example #8
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount     = 0;
            int nextVariantIndex = 0;

            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read  = new BamAlignment();
                int          refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }
                    if (!read.HasPosition() || read.RefID > refID)
                    {
                        break;                                            // We're past our chromosome of interest.
                    }
                    if (read.RefID < refID)
                    {
                        continue;                     // We're not yet on our chromosome of interest.
                    }
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment())
                    {
                        continue;
                    }
                    if (!read.IsMapped())
                    {
                        continue;
                    }
                    if (read.IsDuplicate())
                    {
                        continue;
                    }
                    if (read.MapQuality <= MinimumMapQ)
                    {
                        continue;
                    }

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count)
                    {
                        break;
                    }

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition)
                    {
                        continue;
                    }

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Example #9
0
            /// <summary>
            /// Bins fragments.
            /// </summary>
            private void binFragments()
            {
                // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome!
                if (!Bam.Index.Exists)
                {
                    throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName));
                }

                long pairedAlignmentCount = 0; // keep track of paired alignments

                usableFragmentCount = 0;
                using (BamReader reader = new BamReader(Bam.BamFile.FullName))
                {
                    int desiredRefIndex = -1;
                    desiredRefIndex = reader.GetReferenceIndex(Chromosome);
                    if (desiredRefIndex == -1)
                    {
                        throw new Illumina.Common.IlluminaException(
                                  string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName));
                    }
                    bool result = reader.Jump(desiredRefIndex, 0);
                    if (!result)
                    {
                        // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                        // .bam file.  That is not uncommon e.g. for truseq amplicon.
                        return;
                    }

                    Dictionary <string, int> readNameToBinIndex    = new Dictionary <string, int>();
                    HashSet <string>         samePositionReadNames = new HashSet <string>();
                    int          binIndexStart = 0;
                    int          prevPosition  = -1;
                    BamAlignment alignment     = new BamAlignment();
                    while (reader.GetNextAlignment(ref alignment, true))
                    {
                        int refID = alignment.RefID;

                        // quit if the current reference index is different from the desired reference index
                        if (refID != desiredRefIndex)
                        {
                            break;
                        }

                        if (refID == -1)
                        {
                            continue;
                        }

                        if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted
                        {
                            throw new Illumina.Common.IlluminaException(
                                      string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name));
                        }
                        prevPosition = alignment.Position;

                        if (alignment.IsPaired())
                        {
                            pairedAlignmentCount++;
                        }

                        BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex,
                                        samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart);
                    }
                }
                if (pairedAlignmentCount == 0)
                {
                    throw new Illumina.Common.IlluminaException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName));
                }
            }
Example #10
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";

            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                              string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                                            bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int          readCount     = 0;
                int          keptReadCount = 0;
                string       header        = reader.GetHeader();
                BamAlignment alignment     = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped())
                    {
                        continue;
                    }
                    if (alignment.IsFailedQC())
                    {
                        continue;
                    }
                    if (alignment.IsDuplicate())
                    {
                        continue;
                    }
                    if (alignment.IsReverseStrand())
                    {
                        continue;
                    }
                    if (!alignment.IsMainAlignment())
                    {
                        continue;
                    }

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35)
                    {
                        continue;
                    }

                    if (isPairedEnd && !alignment.IsProperPair())
                    {
                        continue;
                    }

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                    {
                        break;
                    }

                    if (refID == -1)
                    {
                        continue;
                    }

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    {
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                    }
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }
        public void TestMultithreaded()
        {
            BamAlignment bamAlignment = new BamAlignment()
            {
                Bases     = "ACGT",
                Bin       = 0,
                CigarData = new CigarAlignment("4M"),
                Name      = "Should have a constructor which initializes the members",
                Position  = 1,
                Qualities = new byte[4],
                TagData   = new byte[4]
            };

            List <BamAlignment> bamAlignments = new List <BamAlignment>();

            bamAlignments.Add(new BamAlignment(bamAlignment));

            bamAlignment.Position = 2;
            bamAlignments.Add(new BamAlignment(bamAlignment));

            bamAlignment.Position = 10;
            bamAlignments.Add(new BamAlignment(bamAlignment));

            bamAlignment.Position = 11;
            bamAlignments.Add(new BamAlignment(bamAlignment));

            MemoryStream memoryBuffer = new MemoryStream();

            var str = new Mock <MemoryStream>();

            str.Setup(x => x.Write(It.IsAny <byte[]>(), It.IsAny <int>(), It.IsAny <int>())).Callback <byte[], int, int>((buffer, offset, count) =>
                                                                                                                         { memoryBuffer.Write(buffer, offset, count); });
            str.SetupGet(x => x.CanWrite).Returns(true);

            using (var bamWriter = new BamWriterMultithreaded(
                       str.Object,
                       "",
                       new System.Collections.Generic.List <GenomeMetadata.SequenceMetadata>(),
                       2)) // 2 threads
            {
                var handles = bamWriter.GenerateHandles();

                // Write 2 alignments on the first handle
                // The positions are 1 and 10
                handles[0].WriteAlignment(bamAlignments[0]);
                handles[0].WriteAlignment(bamAlignments[2]);

                // Write 2 alignments on the second handle
                // The positions are 2 and 11
                handles[1].WriteAlignment(bamAlignments[1]);
                handles[1].WriteAlignment(bamAlignments[3]);

                // This will sort and merge the alignments, and write the results to the stream
                bamWriter.Flush();
            }

            memoryBuffer.Position = 0;
            BamReader bamReader = new BamReader();

            bamReader.Open(memoryBuffer);

            var bamAlignmentsWritten = new List <BamAlignment>();

            // Verify that all BamAlignment objects are found
            // and they are in the right order.
            for (int i = 0; i < 4; ++i)
            {
                BamAlignment al = new BamAlignment();
                Assert.True(bamReader.GetNextAlignment(ref al, false));

                bamAlignmentsWritten.Add(new BamAlignment(al));
            }

            bamReader.Close();
            bamReader.Dispose();

            bamAlignmentsWritten.Sort((al1, al2) => (al1.Position.CompareTo(al2.Position)));
            for (int i = 0; i < 4; ++i)
            {
                Assert.Equal(bamAlignmentsWritten[i].Position, bamAlignments[i].Position);
            }
        }