Example #1
0
        private void InitializeReader(string chromosomeFilter = null)
        {
            _bamReader  = new BamReader(_bamFilePath);
            _references = _bamReader.GetReferences().OrderBy(r => r.Index).ToList();

            if (!_bamIsStitched)
            {
                _bamIsStitched = CheckBamHeaderIfBamHasBeenStitched(_bamReader.GetHeader());
            }

            SourceIsCollapsed = CheckIfBamHasBeenCollapsed(_bamReader.GetHeader());

            if (!string.IsNullOrEmpty(chromosomeFilter))
            {
                var chrReference = _references.FirstOrDefault(r => r.Name == chromosomeFilter);
                if (chrReference == null)
                {
                    throw new InvalidDataException(string.Format("Cannot set chr filter to '{0}'.  This chr is not in the bam.", chromosomeFilter));
                }

                _bamIndexFilter = chrReference.Index;
            }
            var chrToStart = !string.IsNullOrEmpty(chromosomeFilter)
                ? chromosomeFilter
                : _references.First().Name;

            Jump(chrToStart);
        }
Example #2
0
        private void InitializeReader(string chromosomeFilter = null)
        {
            _bamReader     = new BamReader(_bamFilePath);
            _references    = _bamReader.GetReferences().OrderBy(r => r.Index).ToList();
            _bamIsStitched = CheckIfBamHasBeenStitched(_bamReader.GetHeader());

            if (!string.IsNullOrEmpty(chromosomeFilter))
            {
                var chrReference = _references.FirstOrDefault(r => r.Name == chromosomeFilter);
                if (chrReference == null)
                {
                    throw new Exception(string.Format("Cannot set chr filter to '{0}'.  This chr is not in the bam.", chromosomeFilter));
                }

                _bamIndexFilter = chrReference.Index;
            }
            var chrToStart = !string.IsNullOrEmpty(chromosomeFilter)
                ? chromosomeFilter
                : _references.First().Name;

            var position = 0;

            if (_remainingIntervals != null && _remainingIntervals.ContainsKey(chrToStart))
            {
                position = _remainingIntervals[chrToStart][0].StartPosition - 1;
            }
            Jump(chrToStart, position);
        }
 private string GetHeader(string inBam)
 {
     using (var reader = new BamReader(inBam))
     {
         var oldBamHeader = reader.GetHeader();
         return(UpdateBamHeader(oldBamHeader));
     }
 }
Example #4
0
        private void Initialize()
        {
            var baseReader = new BamReader(_inputBam);

            _chroms     = baseReader.GetReferenceNames();
            _header     = baseReader.GetHeader();
            _references = baseReader.GetReferences();
        }
Example #5
0
 private BamWriter GetWriter(string outputFile)
 {
     using (var reader = new BamReader(_inputFile))
     {
         var    genome            = reader.GetReferences();
         string originalSamHeader = reader.GetHeader();
         var    updatedHeader     = UpdateBamHeader(originalSamHeader);
         return(new BamWriter(outputFile, updatedHeader, genome));
     }
 }
Example #6
0
        public void Initialize()
        {
            var outputDirectory = Path.GetDirectoryName(_outputFile);

            if (!Directory.Exists(outputDirectory))
            {
                Directory.CreateDirectory(outputDirectory);
            }

            using (var reader = new BamReader(_inputFile))
            {
                var genome = reader.GetReferences();

                _bamWriter = new BamWriter(_temp1File, reader.GetHeader(), genome);
            }
        }
Example #7
0
        private IBamWriter CreateBamWriter()
        {
            string bamHeader;
            List <GenomeMetadata.SequenceMetadata> bamReferences;
            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                bamReferences = reader.GetReferences();
                var oldBamHeader = reader.GetHeader();
                bamHeader = UpdateBamHeader(oldBamHeader);
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }

            return(new BamWriterWrapper(new BamWriter(_outBam, bamHeader, bamReferences)));
        }
Example #8
0
        public void HappyPath()
        {
            //var bamFilePath = Path.Combine(TestPaths.SharedBamDirectory, "Chr17Chr19.bam");
            //Assert.True(File.Exists(bamFilePath));
            // TODO figure out how to access the shared bams

            var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam";

            if (File.Exists(tempPath))
            {
                File.Delete(tempPath);
            }

            using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>()))
            {
                bamWriter.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 1, 10, 30, true));
            }

            var bamWriterFactory = new BamWriterFactory(1, tempPath);

            var tempPath2 = $"TemporaryBamFile_{Guid.NewGuid()}.bam";

            if (File.Exists(tempPath2))
            {
                File.Delete(tempPath2);
            }

            var bamWriterHandle = bamWriterFactory.CreateSingleBamWriter(tempPath2);

            bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCAG", 1, 10, 30, true));
            bamWriterHandle.WriteAlignment(null);

            using (var reader = new BamReader(tempPath2))
            {
                // TODO more specific?
                var header = reader.GetHeader();
                Assert.Contains("ID:Gemini", header);
                Assert.Contains("PN:Gemini", header);
            }

            File.Delete(tempPath);
            File.Delete(tempPath2);
        }
Example #9
0
        private IBamWriterMultithreaded CreateBamWriter()
        {
            string bamHeader;
            List <GenomeMetadata.SequenceMetadata> bamReferences;
            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                bamReferences = reader.GetReferences();
                var oldBamHeader = reader.GetHeader();
                bamHeader = UpdateBamHeader(oldBamHeader);
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }

            if (_options.SortMemoryGB <= 0)
            {
                return(new BamWriterMultithreaded(_outBam, bamHeader, bamReferences, _options.NumThreads, 1));
            }

            return(new BamWriterInMem(_outBam, bamHeader, bamReferences, _options.SortMemoryGB, _options.NumThreads, 1));
        }
Example #10
0
        /// <summary>
        /// Reads in a bam file and marks within the BitArrays which genomic mers are present.
        /// </summary>
        /// <param name="bamFile">bam file read alignments from.</param>
        /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param>
        static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths)
        {
            // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome!
            string indexPath = bamFile + ".bai";

            if (!File.Exists(indexPath))
            {
                throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath));
            }

            using (BamReader reader = new BamReader(bamFile))
            {
                int desiredRefIndex = -1;
                desiredRefIndex = reader.GetReferenceIndex(chromosome);
                if (desiredRefIndex == -1)
                {
                    throw new ApplicationException(
                              string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome,
                                            bamFile));
                }
                bool result = reader.Jump(desiredRefIndex, 0);
                if (!result)
                {
                    // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this
                    // .bam file.  That is not uncommon e.g. for truseq amplicon.
                    return;
                }
                int          readCount     = 0;
                int          keptReadCount = 0;
                string       header        = reader.GetHeader();
                BamAlignment alignment     = new BamAlignment();
                while (reader.GetNextAlignment(ref alignment, true))
                {
                    readCount++;

                    // Flag check - Require reads to be aligned, passing filter, non-duplicate:
                    if (!alignment.IsMapped())
                    {
                        continue;
                    }
                    if (alignment.IsFailedQC())
                    {
                        continue;
                    }
                    if (alignment.IsDuplicate())
                    {
                        continue;
                    }
                    if (alignment.IsReverseStrand())
                    {
                        continue;
                    }
                    if (!alignment.IsMainAlignment())
                    {
                        continue;
                    }

                    // Require the alignment to start with 35 bases of non-indel:
                    if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35)
                    {
                        continue;
                    }

                    if (isPairedEnd && !alignment.IsProperPair())
                    {
                        continue;
                    }

                    int refID = alignment.RefID;

                    // quit if the current reference index is different from the desired reference index
                    if (refID != desiredRefIndex)
                    {
                        break;
                    }

                    if (refID == -1)
                    {
                        continue;
                    }

                    keptReadCount++;
                    if (coverageMode == CanvasCoverageMode.Binary)
                    {
                        observed.Data[alignment.Position] = 1;
                    }
                    else
                    {
                        observed.Set(alignment.Position);
                    }
                    // store fragment size, make sure it's within Int16 range and is positive (simplification for now)
                    if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    {
                        fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0));
                    }
                }
                Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount);
            }
        }