/// <summary> /// The default constructor which chooses the default encoding based on the alphabet. /// </summary> protected AbstractBAMParser(string bamFileName, string refSeqName) : this(bamFileName) { // verify whether there is any reads related to chromosome. var refSeqIndex = RefSeqNames.IndexOf(refSeqName); if (refSeqIndex < 0) { var message = string.Format("reference sequence not found : {0}", refSeqName); throw new ArgumentException(message, "refSeqName"); } var bamIndexFileName = GetBAMIndexFileName(bamFileName); using (var bamIndexFile = new BAMIndexFile(bamIndexFileName, FileMode.Open, FileAccess.Read)) { var bamIndexInfo = bamIndexFile.Read(); if (bamIndexInfo.RefIndexes.Count < refSeqIndex) { throw new ArgumentOutOfRangeException("refSeqIndex"); } var refIndex = bamIndexInfo.RefIndexes[refSeqIndex]; RefSeqChunks = GetChunks(refIndex); } RefSeqName = refSeqName; ResetChunks(); }
/// <summary> /// Parses specified BAM file using index file. /// Index file is assumed to be in the same location as that of the specified bam file with the name "filename".bai /// For example, if the specified bam file name is D:\BAMdata\sample.bam then index file name will be taken as D:\BAMdata\sample.bam.bai /// If index file is not available then this method throw an exception. /// </summary> /// <param name="fileName">BAM file name.</param> /// <param name="refSeqIndex">Index of reference sequence.</param> /// <param name="start">Start index.</param> /// <param name="end">End index.</param> /// <returns>SequenceAlignmentMap object which contains alignments overlaps with the specified start /// and end co-ordinate of the specified reference sequence.</returns> public SequenceAlignmentMap ParseRange(string fileName, int refSeqIndex, int start, int end) { using (FileStream bamStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { string bamIndexFileName = getBAMIndexFileName(fileName); using (BAMIndexFile bamIndexFile = new BAMIndexFile(bamIndexFileName, FileMode.Open, FileAccess.Read)) { return GetAlignment(bamStream, bamIndexFile, refSeqIndex, start, end); } } }
/// <summary> /// Parses specified BAM file using index file. /// </summary> /// <param name="fileName">BAM file name.</param> /// <param name="range">SequenceRange object which contains reference sequence name and start and end co-ordinates.</param> /// <returns>SequenceAlignmentMap object which contains alignments for specified reference sequence and for specified range.</returns> public SequenceAlignmentMap ParseRange(string fileName, SequenceRange range) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException("fileName"); } if (range == null) { throw new ArgumentNullException("range"); } if (string.IsNullOrEmpty(range.ID)) { throw new ArgumentException("Reference sequence name (range.ID) can't empty or null."); } int start = range.Start >= int.MaxValue ? int.MaxValue : (int)range.Start; int end = range.End >= int.MaxValue ? int.MaxValue : (int)range.End; if (start == 0 && end == int.MaxValue) { using (FileStream bamStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { string bamIndexFileName = getBAMIndexFileName(fileName); using (BAMIndexFile bamIndexFile = new BAMIndexFile(bamIndexFileName, FileMode.Open, FileAccess.Read)) { return GetAlignment(bamStream, bamIndexFile, range.ID); } } } else { using (FileStream bamStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { string bamIndexFileName = getBAMIndexFileName(fileName); using (BAMIndexFile bamIndexFile = new BAMIndexFile(bamIndexFileName, FileMode.Open, FileAccess.Read)) { return GetAlignment(bamStream, bamIndexFile, range.ID, start, end); } } } }
private IEnumerable<SAMAlignedSequence> GetAlignmentYield(Stream bamStream, BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end) { foreach (SAMAlignedSequence seq in GetAlignmentMapIterator(bamStream, bamIndexFile, null, refSeqIndex, start, end)) { yield return seq; } }
/// <summary> /// Parses specified BAM file using index file. /// Index file is assumed to be in the same location as that of the specified bam file with the name "filename".bai /// For example, if the specified bam file name is D:\BAMdata\sample.bam then index file name will be taken as D:\BAMdata\sample.bam.bai /// If index file is not available then this method throw an exception. /// </summary> /// <param name="fileName">BAM file name.</param> /// <param name="refSeqName">Name of reference sequence.</param> /// <param name="start">Start index.</param> /// <param name="end">End index.</param> /// <returns>SequenceAlignmentMap object which contains alignments overlaps with the specified start /// and end co-ordinate of the specified reference sequence.</returns> public SequenceAlignmentMap ParseRange(string fileName, string refSeqName, int start, int end) { if (refSeqName == null) { throw new ArgumentNullException("refSeqName"); } using (FileStream bamStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { string bamIndexFileName = getBAMIndexFileName(fileName); using (BAMIndexFile bamIndexFile = new BAMIndexFile(bamIndexFileName, FileMode.Open, FileAccess.Read)) { return GetAlignment(bamStream, bamIndexFile, refSeqName, start, end); } } }
private IEnumerable<SAMAlignedSequence> GetAlignmentWithIndexYield(BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end, SAMAlignmentHeader header) { BAMIndex bamIndexInfo; BAMReferenceIndexes refIndex; IList<Chunk> chunks; bamIndexInfo = bamIndexFile.Read(); if (refSeqIndex != -1 && bamIndexInfo.RefIndexes.Count <= refSeqIndex) { throw new ArgumentOutOfRangeException("refSeqIndex"); } refIndex = bamIndexInfo.RefIndexes[refSeqIndex]; if (start == 0 && end == int.MaxValue) { chunks = GetChunks(refIndex); } else { chunks = GetChunks(refIndex, start, end); } IList<SAMAlignedSequence> alignedSeqs = GetAlignedSequences(chunks, start, end); foreach (SAMAlignedSequence alignedSeq in alignedSeqs) { yield return alignedSeqs[0]; } readStream = null; }
private IEnumerable<SAMAlignedSequence> GetAlignmentYield(Stream bamStream, BAMIndexFile bamIndexFile, string refSeqName) { foreach (SAMAlignedSequence seq in GetAlignmentMapIterator(bamStream, bamIndexFile, refSeqName)) { yield return seq; } }
private void GetAlignmentWithIndex(BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end, SAMAlignmentHeader header, ref SequenceAlignmentMap seqMap) { BAMIndex bamIndexInfo; BAMReferenceIndexes refIndex; IList<Chunk> chunks; seqMap = new SequenceAlignmentMap(header); bamIndexInfo = bamIndexFile.Read(); if (refSeqIndex != -1 && bamIndexInfo.RefIndexes.Count <= refSeqIndex) { throw new ArgumentOutOfRangeException("refSeqIndex"); } refIndex = bamIndexInfo.RefIndexes[refSeqIndex]; if (start == 0 && end == int.MaxValue) { chunks = GetChunks(refIndex); } else { chunks = GetChunks(refIndex, start, end); } IList<SAMAlignedSequence> alignedSeqs = GetAlignedSequences(chunks, start, end); foreach (SAMAlignedSequence alignedSeq in alignedSeqs) { seqMap.QuerySequences.Add(alignedSeq); } readStream = null; }
private IEnumerable<SAMAlignedSequence> GetAlignmentMapIterator(Stream reader, BAMIndexFile bamIndexFile = null, string refSeqName = null, int? refSeq = null, int start = 0, int end = int.MaxValue) { SAMAlignmentHeader header; if (reader == null || reader.Length == 0) { throw new FileFormatException(Properties.Resource.BAM_InvalidBAMFile); } readStream = reader; ValidateReader(); header = GetHeader(); if (refSeq.HasValue && refSeqName == null) { // verify whether the chromosome index is there in the header or not. if (refSeq < 0 || refSeq >= header.ReferenceSequences.Count) { throw new ArgumentOutOfRangeException("refSeq"); } } else if (refSeqName != null && !refSeq.HasValue) { refSeq = refSeqNames.IndexOf(refSeqName); if (refSeq < 0 || !refSeq.HasValue) { string message = string.Format(CultureInfo.InvariantCulture, Properties.Resource.BAM_RefSeqNotFound, refSeqName); throw new ArgumentException(message, "refSeqName"); } } else if (refSeq.HasValue && refSeqName != null) { throw new ArgumentException("Received values for params reSeqIndex and refSeqName. Only one parameter can have a value, not both."); } if (refSeq.HasValue) { if (bamIndexFile != null) { foreach (SAMAlignedSequence seq in GetAlignmentWithIndexYield(bamIndexFile, (int)refSeq, start, end, header)) { yield return seq; } } else { throw new ArgumentNullException("refSeqIndex"); } } else { foreach (SAMAlignedSequence seq in GetAlignmentWithoutIndexYield(header)) { yield return seq; } } }
// Returns SequenceAlignmentMap by parsing specified BAM stream and BAMIndexFile for the specified reference sequence index. // this method uses linear index information also. private SequenceAlignmentMap GetAlignment(Stream bamStream, BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end) { return GetAlignmentMap(bamStream, bamIndexFile, null, refSeqIndex, start, end); }
// Returns SequenceAlignmentMap by parsing specified BAM stream and BAMIndexFile for the specified reference sequence index. // this method uses linear index information also. private SequenceAlignmentMap GetAlignment(Stream bamStream, BAMIndexFile bamIndexFile, string refSeqName, int start, int end) { return GetAlignmentMap(bamStream, bamIndexFile, refSeqName, -1, start, end); }
// Returns SequenceAlignmentMap by parsing specified BAM stream and BAMIndexFile for the specified reference sequence name. private SequenceAlignmentMap GetAlignment(Stream bamStream, BAMIndexFile bamIndexFile, string refSeqName) { return GetAlignmentMap(bamStream, bamIndexFile, refSeqName); }