Exemplo n.º 1
0
        public void TestJump()
        {
            var smallBam = Path.Combine(TestPaths.LocalTestDataDirectory, "bwaXC.bam");
            using (var reader = new BamReader(smallBam))
            {
                BamAlignment al = new BamAlignment();

                Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 20200));
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position > 18000);
                Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 200));
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position < 250);

                // now, forward-only jumping
                Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 20200));
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position > 18000); // a good forward jump
                var position = reader.Tell();
                Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 200));
                Assert.Equal(position, reader.Tell()); // we stayed put
                Assert.True(reader.GetNextAlignment(ref al, true));
                Assert.True(al.Position > 18000); 
                
            }
        }
Exemplo n.º 2
0
        //wrapper should now handle all throwing and catching..
        protected override void ProgramExecution()
        {
            var optionsUsed = _appOptionParser.ParsingResult.OptionsUsed;

            var doNotPassToSubprocess = new List <string>()
            {
                "outFolder", "numProcesses", "exePath", "intermediateDir", "multiprocess", "chromosomes"
            };

            var cmdLineList = MultiProcessHelpers.GetCommandLineWithoutIgnoredArguments(optionsUsed, doNotPassToSubprocess);


            var refNameMapping = new Dictionary <string, int>();

            using (var bamReader = new BamReader(_options.InputBam))
            {
                var chroms = bamReader.GetReferenceNames();
                foreach (var referenceName in chroms)
                {
                    if (_options.Chromosomes != null && !_options.Chromosomes.ToList().Contains(referenceName))
                    {
                        continue;
                    }
                    refNameMapping.Add(referenceName, bamReader.GetReferenceIndex(referenceName));
                }
            }

            var taskManager     = new CliTaskManager(_options.NumProcesses);
            var geminiProcessor = new GeminiMultiProcessor(_options, new CliTaskCreator());

            var samtoolsWrapper = new SamtoolsWrapper(_options.GeminiOptions.SamtoolsPath, _options.GeminiOptions.IsWeirdSamtools);

            geminiProcessor.Execute(taskManager, refNameMapping, cmdLineList, samtoolsWrapper);
        }
        private List <GenomeMetadata.SequenceMetadata> GetReferences(string inBam)
        {
            List <GenomeMetadata.SequenceMetadata> bamReferences;
            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(inBam))
            {
                bamReferences = reader.GetReferences();
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }

            return(bamReferences);
        }
Exemplo n.º 4
0
        private IReadPairHandler CreatePairHandler(ReadStatusCounter readStatuses)
        {
            var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases,
                                             nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength);

            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }
            return(new PairHandler(refIdMapping, stitcher, _options.FilterUnstitchablePairs, readStatuses));
        }
Exemplo n.º 5
0
        private IBamWriter CreateBamWriter()
        {
            string bamHeader;
            List <GenomeMetadata.SequenceMetadata> bamReferences;
            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                bamReferences = reader.GetReferences();
                var oldBamHeader = reader.GetHeader();
                bamHeader = UpdateBamHeader(oldBamHeader);
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }

            return(new BamWriterWrapper(new BamWriter(_outBam, bamHeader, bamReferences)));
        }
Exemplo n.º 6
0
        private IBamWriterMultithreaded CreateBamWriter()
        {
            string bamHeader;
            List <GenomeMetadata.SequenceMetadata> bamReferences;
            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                bamReferences = reader.GetReferences();
                var oldBamHeader = reader.GetHeader();
                bamHeader = UpdateBamHeader(oldBamHeader);
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }

            if (_options.SortMemoryGB <= 0)
            {
                return(new BamWriterMultithreaded(_outBam, bamHeader, bamReferences, _options.NumThreads, 1));
            }

            return(new BamWriterInMem(_outBam, bamHeader, bamReferences, _options.SortMemoryGB, _options.NumThreads, 1));
        }
Exemplo n.º 7
0
        private List <IReadPairHandler> CreatePairHandlers(ReadStatusCounter readStatuses, int numThreads)
        {
            var handlers = new List <IReadPairHandler>(numThreads);

            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }

            for (int i = 0; i < numThreads; ++i)
            {
                var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases,
                                                 nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength, ignoreReadsAboveMaxLength: _options.IgnoreReadsAboveMaxLength, minMapQuality: _options.FilterMinMapQuality, dontStitchHomopolymerBridge: _options.DontStitchHomopolymerBridge);

                handlers.Add(new PairHandler(refIdMapping, stitcher, readStatuses, _options.FilterUnstitchablePairs, true));
            }

            return(handlers);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file.
        /// </summary>
        protected void ProcessBamFile(string bamPath)
        {
            Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath);
            int overallCount     = 0;
            int nextVariantIndex = 0;

            using (BamReader reader = new BamReader(bamPath))
            {
                BamAlignment read  = new BamAlignment();
                int          refID = reader.GetReferenceIndex(this.Chromosome);
                if (refID < 0)
                {
                    throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath));
                }
                Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome);
                reader.Jump(refID, 0);
                while (true)
                {
                    bool result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }
                    if (!read.HasPosition() || read.RefID > refID)
                    {
                        break;                                            // We're past our chromosome of interest.
                    }
                    if (read.RefID < refID)
                    {
                        continue;                     // We're not yet on our chromosome of interest.
                    }
                    overallCount++;
                    if (overallCount % 1000000 == 0)
                    {
                        Console.WriteLine("Record {0} at {1}...", overallCount, read.Position);
                    }

                    // Skip over unaligned or other non-count-worthy reads:
                    if (!read.IsPrimaryAlignment())
                    {
                        continue;
                    }
                    if (!read.IsMapped())
                    {
                        continue;
                    }
                    if (read.IsDuplicate())
                    {
                        continue;
                    }
                    if (read.MapQuality <= MinimumMapQ)
                    {
                        continue;
                    }

                    // Scan forward through the variants list, to keep up with our reads:
                    while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position)
                    {
                        nextVariantIndex++;
                    }
                    if (nextVariantIndex >= this.Variants.Count)
                    {
                        break;
                    }

                    // If the read doesn't look like it has a reasonable chance of touching the next variant, continue:
                    if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition)
                    {
                        continue;
                    }

                    // This read potentially overlaps next variant (and further variants).  Count bases!
                    ProcessReadBases(read, nextVariantIndex);
                }
            }
            Console.WriteLine("Looped over {0} bam records in all", overallCount);
        }
Exemplo n.º 9
0
            /// <summary>
            /// Bins fragments.
            /// </summary>
            private void binFragments()
            {
                // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome!
                if (!Bam.Index.Exists)
                {
                    throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName));
                }

                long pairedAlignmentCount = 0; // keep track of paired alignments

                usableFragmentCount = 0;
                using (BamReader reader = new BamReader(Bam.BamFile.FullName))
                {
                    int desiredRefIndex = -1;
                    desiredRefIndex = reader.GetReferenceIndex(Chromosome);
                    if (desiredRefIndex == -1)
                    {
                        throw new Illumina.Common.IlluminaException(
                                  string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName));
                    }
                    bool result = reader.Jump(desiredRefIndex, 0);
                    if (!result)
                    {
                        // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                        // .bam file.  That is not uncommon e.g. for truseq amplicon.
                        return;
                    }

                    Dictionary <string, int> readNameToBinIndex    = new Dictionary <string, int>();
                    HashSet <string>         samePositionReadNames = new HashSet <string>();
                    int          binIndexStart = 0;
                    int          prevPosition  = -1;
                    BamAlignment alignment     = new BamAlignment();
                    while (reader.GetNextAlignment(ref alignment, true))
                    {
                        int refID = alignment.RefID;

                        // quit if the current reference index is different from the desired reference index
                        if (refID != desiredRefIndex)
                        {
                            break;
                        }

                        if (refID == -1)
                        {
                            continue;
                        }

                        if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted
                        {
                            throw new Illumina.Common.IlluminaException(
                                      string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name));
                        }
                        prevPosition = alignment.Position;

                        if (alignment.IsPaired())
                        {
                            pairedAlignmentCount++;
                        }

                        BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex,
                                        samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart);
                    }
                }
                if (pairedAlignmentCount == 0)
                {
                    throw new Illumina.Common.IlluminaException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName));
                }
            }
Exemplo n.º 10
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";

            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                              string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                                            bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int          readCount     = 0;
                int          keptReadCount = 0;
                string       header        = reader.GetHeader();
                BamAlignment alignment     = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped())
                    {
                        continue;
                    }
                    if (alignment.IsFailedQC())
                    {
                        continue;
                    }
                    if (alignment.IsDuplicate())
                    {
                        continue;
                    }
                    if (alignment.IsReverseStrand())
                    {
                        continue;
                    }
                    if (!alignment.IsMainAlignment())
                    {
                        continue;
                    }

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35)
                    {
                        continue;
                    }

                    if (isPairedEnd && !alignment.IsProperPair())
                    {
                        continue;
                    }

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                    {
                        break;
                    }

                    if (refID == -1)
                    {
                        continue;
                    }

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    {
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                    }
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }
Exemplo n.º 11
0
 public void JumpToChromosome(string chromosomeName)
 {
     _bamIndexFilter = _bamReader.GetReferenceIndex(chromosomeName);
     _bamReader.Jump(_bamIndexFilter, 0);
 }