/// <summary> /// Writes an ISequenceAlignment to the location specified by the writer. /// </summary> /// <param name="sequenceAlignment">The sequence alignment to format.</param> /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param> public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer) { if (sequenceAlignment == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameSequenceAlignment); } if (writer == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameWriter); } #region Write alignment header SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader; if (header != null) { WriteHeader(header, writer); } #endregion #region Write aligned sequences foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences) { WriteSAMAlignedSequence(alignedSequence, writer); } #endregion writer.Flush(); }
/// <summary> /// Writes an ISequenceAlignment to the location specified by the stream. /// </summary> /// <param name="stream">The Stream used to write the formatted sequence alignment text.</param> /// <param name="sequenceAlignment">The sequence alignment to format.</param> public void Format(Stream stream, ISequenceAlignment sequenceAlignment) { if (sequenceAlignment == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameSequenceAlignment); } if (stream == null) { throw new ArgumentNullException("stream"); } using (var writer = stream.OpenWrite()) { SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader; if (header != null) { WriteHeader(writer, header); } foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences) { WriteSAMAlignedSequence(writer, alignedSequence); } } }
/// <summary> /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns> protected SequenceAlignmentMap ParseOneWithSpecificFormat(TextReader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } // Parse the header lines and store them in a string. // This is being done as parsing the header using the textreader is parsing an extra line. List <string> headerStrings = new List <string>(); string line = ReadNextLine(reader); while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { headerStrings.Add(line); line = ReadNextLine(reader); } // Parse the alignment header strings. SAMAlignmentHeader header = ParseSamHeader(headerStrings); SequenceAlignmentMap sequenceAlignmentMap = new SequenceAlignmentMap(header); // Parse aligned sequences while (line != null && !line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { SAMAlignedSequence alignedSeq = ParseSequence(line, Alphabet, RefSequences); sequenceAlignmentMap.QuerySequences.Add(alignedSeq); line = ReadNextLine(reader); } return(sequenceAlignmentMap); }
/// <summary> /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns> protected SequenceAlignmentMap ParseOneWithSpecificFormat(TextReader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } // Parse the header lines and store them in a string. // This is being done as parsing the header using the textreader is parsing an extra line. List <string> headerStrings = new List <string>(); string line = ReadNextLine(reader); while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { headerStrings.Add(line); line = ReadNextLine(reader); } // Parse the alignment header strings. SAMAlignmentHeader header = ParseSamHeader(headerStrings); SequenceAlignmentMap sequenceAlignmentMap = new SequenceAlignmentMap(header); List <string> refSeqNames = null; bool hasSQHeader = header.ReferenceSequences.Count > 0; if (!hasSQHeader) { refSeqNames = new List <string>(); } // Parse aligned sequences // If the SQ header is not present in header then get the reference sequences information from reads. while (line != null && !line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { SAMAlignedSequence alignedSeq = ParseSequence(line, this.Alphabet); if (!hasSQHeader) { if (!alignedSeq.RName.Equals("*", StringComparison.OrdinalIgnoreCase) && !refSeqNames.Contains(alignedSeq.RName, StringComparer.OrdinalIgnoreCase)) { refSeqNames.Add(alignedSeq.RName); } } sequenceAlignmentMap.QuerySequences.Add(alignedSeq); line = ReadNextLine(reader); } if (!hasSQHeader) { foreach (string refname in refSeqNames) { header.ReferenceSequences.Add(new ReferenceSequenceInfo(refname, 0)); } } return(sequenceAlignmentMap); }
/// <summary> /// Creates SequenceAlignmentMap instance. /// </summary> /// <param name="header">SAM header.</param> public SequenceAlignmentMap(SAMAlignmentHeader header) { if (header == null) { throw new ArgumentNullException("header"); } this.header = header; Metadata = new Dictionary<string, object> { { Helper.SAMAlignmentHeaderKey, header } }; querySequences = new List<SAMAlignedSequence>(); }
/// <summary> /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns> public IEnumerable <SAMAlignedSequence> ParseSequencesAsEnumerable(string fileName) { FileInfo fileInfo = new FileInfo(fileName); using (StreamReader reader = new StreamReader(fileName)) { if (reader == null) { throw new ArgumentNullException("reader"); } // Parse the header lines and store them in a string. // This is being done as parsing the header using the textreader is parsing an extra line. List <string> headerStrings = new List <string>(); string line = ReadNextLine(reader); while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { headerStrings.Add(line); line = ReadNextLine(reader); } // Parse the alignment header strings. SAMAlignmentHeader header = ParseSamHeader(headerStrings); SequenceAlignmentMap sequenceAlignmentMap = new SequenceAlignmentMap(header); List <string> refSeqNames = null; bool hasSQHeader = header.ReferenceSequences.Count > 0; if (!hasSQHeader) { refSeqNames = new List <string>(); } // Parse aligned sequences // If the SQ header is not present in header then get the reference sequences information from reads. while (line != null && !line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { SAMAlignedSequence alignedSeq = ParseSequence(line, this.Alphabet); if (!hasSQHeader) { if (!alignedSeq.RName.Equals("*", StringComparison.OrdinalIgnoreCase) && !refSeqNames.Contains(alignedSeq.RName, StringComparer.OrdinalIgnoreCase)) { refSeqNames.Add(alignedSeq.RName); } } yield return(alignedSeq); //sequenceAlignmentMap.QuerySequences.Add(alignedSeq); line = ReadNextLine(reader); } } }
/// <summary> /// Creates SequenceAlignmentMap instance. /// </summary> /// <param name="header">SAM header.</param> public SequenceAlignmentMap(SAMAlignmentHeader header) { if (header == null) { throw new ArgumentNullException("header"); } this.header = header; metadata = new Dictionary <string, object>(); metadata.Add(Helper.SAMAlignmentHeaderKey, header); querySequences = new List <SAMAlignedSequence>(); }
/// <summary> /// Writes specified SAMAlignedHeader to specified text writer. /// </summary> /// <param name="writer">Text Writer</param> /// <param name="header">Header to write.</param> public static void WriteHeader(TextWriter writer, SAMAlignmentHeader header) { if (header == null) { return; } if (writer == null) { throw new ArgumentNullException("writer"); } string message = header.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new ArgumentException(message); } StringBuilder headerLine = null; foreach (SAMRecordField record in header.RecordFields) { headerLine = new StringBuilder(); headerLine.Append("@"); headerLine.Append(record.Typecode); foreach (SAMRecordFieldTag tag in record.Tags) { headerLine.Append("\t"); headerLine.Append(tag.Tag); headerLine.Append(":"); headerLine.Append(tag.Value); } writer.WriteLine(headerLine.ToString()); } foreach (string comment in header.Comments) { headerLine = new StringBuilder(); headerLine.Append("@CO"); headerLine.Append("\t"); headerLine.Append(comment); writer.WriteLine(headerLine.ToString()); } writer.Flush(); }
private static SAMAlignmentHeader ParseSamHeader(List <string> headerStrings) { SAMAlignmentHeader samHeader = new SAMAlignmentHeader(); foreach (string headerString in headerStrings) { string[] tokens = headerString.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries); string recordTypecode = tokens[0].Substring(1); // Validate the header format. ValidateHeaderLineFormat(headerString); SAMRecordField headerLine = null; if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0) { List <string> tags = new List <string>(); headerLine = new SAMRecordField(recordTypecode); for (int i = 1; i < tokens.Length; i++) { string tagToken = tokens[i]; string tagName = tagToken.Substring(0, 2); tags.Add(tagName); headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3))); } samHeader.RecordFields.Add(headerLine); } else { samHeader.Comments.Add(headerString.Substring(4)); } } IList <ReferenceSequenceInfo> referenceSeqsInfo = samHeader.GetReferenceSequencesInfoFromSQHeader(); foreach (var item in referenceSeqsInfo) { samHeader.ReferenceSequences.Add(item); } string message = samHeader.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new FormatException(message); } return(samHeader); }
/// <summary> /// Writes specified SAMAlignedHeader to specified text writer. /// </summary> /// <param name="header">Header to write.</param> /// <param name="writer">Text writer.</param> public static void WriteHeader(SAMAlignmentHeader header, TextWriter writer) { if (header == null) { return; } if (writer == null) { throw new ArgumentNullException("writer"); } string message = header.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new ArgumentException(message); } StringBuilder headerLine = null; for (int i = 0; i < header.RecordFields.Count; i++) { headerLine = new StringBuilder(); headerLine.Append("@"); headerLine.Append(header.RecordFields[i].Typecode); for (int j = 0; j < header.RecordFields[i].Tags.Count; j++) { headerLine.Append("\t"); headerLine.Append(header.RecordFields[i].Tags[j].Tag); headerLine.Append(":"); headerLine.Append(header.RecordFields[i].Tags[j].Value); } writer.WriteLine(headerLine.ToString()); } foreach (string comment in header.Comments) { headerLine = new StringBuilder(); headerLine.Append("@CO"); headerLine.Append("\t"); headerLine.Append(comment); writer.WriteLine(headerLine.ToString()); } writer.Flush(); }
/// <summary> /// Constructor for deserialization. /// </summary> /// <param name="info">Serialization Info.</param> /// <param name="context">Streaming context.</param> protected SequenceAlignmentMap(SerializationInfo info, StreamingContext context) { if (info == null) { throw new ArgumentNullException("info"); } header = (SAMAlignmentHeader)info.GetValue("header", typeof(SAMAlignmentHeader)); metadata = new Dictionary <string, object>(); metadata.Add(Helper.SAMAlignmentHeaderKey, header); querySequences = (IList <SAMAlignedSequence>)info.GetValue("sequences", typeof(IList <SAMAlignedSequence>)); if (querySequences == null) { querySequences = new List <SAMAlignedSequence>(); } }
/// <summary> /// Parses SAM alignment header from specified BioTextReader. /// </summary> /// <param name="bioReader">Bio text reader.</param> private static SAMAlignmentHeader ParserSAMHeader(BioTextReader bioReader) { SAMAlignmentHeader samHeader = new SAMAlignmentHeader(); if (bioReader.HasLines && bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { while (bioReader.HasLines && bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { string[] tokens = bioReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries); string recordTypecode = tokens[0].Substring(1); // Validate the header format. ValidateHeaderLineFormat(bioReader.Line); SAMRecordField headerLine = null; if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0) { List <string> tags = new List <string>(); headerLine = new SAMRecordField(recordTypecode); for (int i = 1; i < tokens.Length; i++) { string tagToken = tokens[i]; string tagName = tagToken.Substring(0, 2); tags.Add(tagName); headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3))); } samHeader.RecordFields.Add(headerLine); } else { samHeader.Comments.Add(bioReader.Line.Substring(4)); } bioReader.GoToNextLine(); } string message = samHeader.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new FormatException(message); } } return(samHeader); }
/// <summary> /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object. /// </summary> /// <param name="bioReader">A reader for a biological sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether sequencs in the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns> protected SequenceAlignmentMap ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly) { if (bioReader == null) { throw new ArgumentNullException("bioReader"); } // no empty files allowed if (!bioReader.HasLines) { throw new FormatException(Resource.Parser_NoTextErrorMessage); } // Parse the alignment header. SAMAlignmentHeader header = ParserSAMHeader(bioReader); SequenceAlignmentMap seqAlignt = new SequenceAlignmentMap(header); // Parse aligned sequences ParseSequences(seqAlignt, bioReader, isReadOnly); return(seqAlignt); }
/// <summary> /// Parses SAM alignment header from specified text reader. /// </summary> /// <param name="reader">Text reader.</param> public static SAMAlignmentHeader ParseSAMHeader(TextReader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } List <string> headerStrings = new List <string>(); SAMAlignmentHeader samHeader = null; string line = ReadNextLine(reader); if (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { headerStrings.Add(line); line = ReadNextLine(reader); } samHeader = ParseSamHeader(headerStrings); } return(samHeader); }
/// <summary> /// Updates the header with reference name from reads in input file. /// </summary> /// <param name="header">SAM alignment header.</param> private void UpdateReferenceInformationFromReads(SAMAlignmentHeader header) { // If the ReferenceNamesAndLength file name is not specified and there is no @SQ header, // then get the refernece names from read information. List<string> refSeqNames = new List<string>(); using (StreamReader textReader = new StreamReader(InputFilePath)) { foreach (SAMAlignedSequence alignedSeq in GetAlignedSequence(textReader)) { if (!alignedSeq.RName.Equals("*", StringComparison.OrdinalIgnoreCase) && !refSeqNames.Contains(alignedSeq.RName, StringComparer.OrdinalIgnoreCase)) { refSeqNames.Add(alignedSeq.RName); } } } foreach (string refname in refSeqNames) { header.ReferenceSequences.Add(new ReferenceSequenceInfo(refname, 0)); } }
/// <summary> /// Converts the input SAM to BAM file format. /// </summary> private void ConvertFromSAMTOBAM() { SAMAlignmentHeader header = null; try { using (var reader = new StreamReader(InputFilePath)) header = SAMParser.ParseSAMHeader(reader); } catch(Exception ex) { throw new InvalidOperationException(Resources.InvalidSAMFile,ex); } if (header == null) { Console.Error.WriteLine("Warning: SAM file doesn't contain header"); } if (HeaderOnly) { if (header != null) { WriteHeader(header); } } else { if (header == null) { header = new SAMAlignmentHeader(); } if (!string.IsNullOrEmpty(Library)) { rgRecFields = header.RecordFields.Where(R => R.Typecode.ToUpper().Equals("RG")).ToList(); } if (!string.IsNullOrEmpty(ReferenceNamesAndLength)) { this.UpdateReferenceInformationFromFile(header); } else if (header.ReferenceSequences.Count == 0) { this.UpdateReferenceInformationFromReads(header); } WriteHeader(header); using (StreamReader textReader = new StreamReader(InputFilePath)) { foreach (SAMAlignedSequence alignedSeq in GetAlignedSequence(textReader)) { WriteAlignedSequence(header, alignedSeq); } } } if (UnCompressedBAM) { bamUncompressedOutStream.Flush(); if (writer != null) { DisplayBAMContent(bamUncompressedOutStream); } } if (BAMOutput && !UnCompressedBAM) { bamUncompressedOutStream.Flush(); bamUncompressedOutStream.Seek(0, SeekOrigin.Begin); bamformatter.CompressBAMFile(bamUncompressedOutStream, bamCompressedOutStream); bamCompressedOutStream.Flush(); if (writer != null) { DisplayBAMContent(bamCompressedOutStream); } } }
/// <summary> /// Merge multiple sorted alignments. /// SAMUtil.exe out.bam in1.bam in2.bam /// </summary> public void DoMerge() { if (FilePaths == null) { throw new InvalidOperationException("FilePath"); } if (FilePaths.Length < 2) { throw new InvalidOperationException(Resources.MergeHelp); } IList<IList<BAMSortedIndex>> sortedIndexes = new List<IList<BAMSortedIndex>>(); IList<SequenceAlignmentMap> sequenceAlignmentMaps = new List<SequenceAlignmentMap>(); Parallel.For(0, FilePaths.Length, (int index) => { IList<BAMSortedIndex> sortedIndex; BAMParser parser = new BAMParser(); ; SequenceAlignmentMap map; if (index == 0) { try { map = parser.ParseOne<SequenceAlignmentMap>(FilePaths[0]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0) { throw new InvalidOperationException(Resources.HeaderMissing); } if (!string.IsNullOrEmpty(HeaderFile)) { SAMParser parse = new SAMParser(); SequenceAlignmentMap head; try { head = parse.ParseOne<SequenceAlignmentMap>(HeaderFile); } catch { throw new InvalidOperationException(Resources.IncorrectHeaderFile); } if (head == null) { throw new InvalidOperationException(Resources.EmptyFile); } header = head.Header; } else { header = map.Header; } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } else { try { map = parser.ParseOne<SequenceAlignmentMap>(FilePaths[index]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } lock (sortedIndexes) { sortedIndexes.Add(sortedIndex); sequenceAlignmentMaps.Add(map); } }); if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = "out.bam"; autoGeneratedOutputFilename = true; } string filePath = Path.GetTempFileName(); using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite)) { BAMFormatter formatter = new BAMFormatter(); formatter.WriteHeader(header, fstemp); if (SortByReadName) { IList<BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList(); WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps); } else { WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps); } using (FileStream fsoutput = new FileStream(OutputFilename, FileMode.Create, FileAccess.Write)) { fstemp.Seek(0, SeekOrigin.Begin); formatter.CompressBAMFile(fstemp, fsoutput); } } File.Delete(filePath); if (autoGeneratedOutputFilename) { Console.WriteLine(Properties.Resources.SuccessMessageWithOutputFileName, OutputFilename); } }
private IEnumerable<SAMAlignedSequence> GetAlignmentWithIndexYield(BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end, SAMAlignmentHeader header) { BAMIndex bamIndexInfo; BAMReferenceIndexes refIndex; IList<Chunk> chunks; bamIndexInfo = bamIndexFile.Read(); if (refSeqIndex != -1 && bamIndexInfo.RefIndexes.Count <= refSeqIndex) { throw new ArgumentOutOfRangeException("refSeqIndex"); } refIndex = bamIndexInfo.RefIndexes[refSeqIndex]; if (start == 0 && end == int.MaxValue) { chunks = GetChunks(refIndex); } else { chunks = GetChunks(refIndex, start, end); } IList<SAMAlignedSequence> alignedSeqs = GetAlignedSequences(chunks, start, end); foreach (SAMAlignedSequence alignedSeq in alignedSeqs) { yield return alignedSeqs[0]; } readStream = null; }
private void GetAlignmentWithoutIndex(SAMAlignmentHeader header, ref SequenceAlignmentMap seqMap) { Chunk lastChunk = null; ulong lastcOffset = 0; ushort lastuOffset = 0; BAMReferenceIndexes refIndices = null; if (createBamIndex) { bamIndex = new BAMIndex(); for (int i = 0; i < refSeqNames.Count; i++) { bamIndex.RefIndexes.Add(new BAMReferenceIndexes()); } refIndices = bamIndex.RefIndexes[0]; } if (!createBamIndex && seqMap == null) { seqMap = new SequenceAlignmentMap(header); } while (!IsEOF()) { if (createBamIndex) { lastcOffset = (ulong)currentCompressedBlockStartPos; lastuOffset = (ushort)deCompressedStream.Position; } SAMAlignedSequence alignedSeq = GetAlignedSequence(0, int.MaxValue); alignedSeq = BamIndexing(alignedSeq, refIndices, bamIndex, lastcOffset, lastuOffset, ref lastChunk); if (!createBamIndex && alignedSeq != null) { seqMap.QuerySequences.Add(alignedSeq); } alignedSeq = null; } #region BAM Indexing if (createBamIndex) { lastChunk.ChunkEnd.CompressedBlockOffset = (ulong)readStream.Position; if (deCompressedStream != null) { lastChunk.ChunkEnd.UncompressedBlockOffset = (ushort)deCompressedStream.Position; } else { lastChunk.ChunkEnd.UncompressedBlockOffset = 0; } } #endregion }
/// <summary> /// Writes aligned sequence to output stream. /// </summary> /// <param name="header">Alignment header.</param> /// <param name="alignedSequence">Aligned sequence to write.</param> private void WriteAlignedSequence(SAMAlignmentHeader header, SAMAlignedSequence alignedSequence) { if (UnCompressedBAM || BAMOutput) { // In case of compressed bamoutput uncompressed file will be compressed before sending it to output stream. bamformatter.WriteAlignedSequence(header, alignedSequence, bamUncompressedOutStream); } else { SAMFormatter.WriteSAMAlignedSequence(writer, alignedSequence); } }
private void GetAlignmentWithIndex(BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end, SAMAlignmentHeader header, ref SequenceAlignmentMap seqMap) { BAMIndex bamIndexInfo; BAMReferenceIndexes refIndex; IList<Chunk> chunks; seqMap = new SequenceAlignmentMap(header); bamIndexInfo = bamIndexFile.Read(); if (refSeqIndex != -1 && bamIndexInfo.RefIndexes.Count <= refSeqIndex) { throw new ArgumentOutOfRangeException("refSeqIndex"); } refIndex = bamIndexInfo.RefIndexes[refSeqIndex]; if (start == 0 && end == int.MaxValue) { chunks = GetChunks(refIndex); } else { chunks = GetChunks(refIndex, start, end); } IList<SAMAlignedSequence> alignedSeqs = GetAlignedSequences(chunks, start, end); foreach (SAMAlignedSequence alignedSeq in alignedSeqs) { seqMap.QuerySequences.Add(alignedSeq); } readStream = null; }
// Validates alignment header. private static void ValidateAlignmentHeader(SAMAlignmentHeader header) { string message = header.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new ArgumentException(message); } }
/// <summary> /// Writes SAMAlignedSequence to specified stream. /// </summary> /// <param name="header">Header from SAM object.</param> /// <param name="alignedSeq">SAMAlignedSequence object.</param> /// <param name="writer">Stream to write.</param> public void WriteAlignedSequence(SAMAlignmentHeader header, SAMAlignedSequence alignedSeq, Stream writer) { if (header == null) { throw new ArgumentNullException("header"); } if (alignedSeq == null) { throw new ArgumentNullException("alignedSeq"); } if (writer == null) { throw new ArgumentNullException("writer"); } if (this.refSequences == null) { this.refSequences = header.GetReferenceSequenceRanges(); } WriteAlignedSequence(alignedSeq, writer); }
/// <summary> /// Writes BAM header to the specified stream in BAM format. /// </summary> /// <param name="header">SAMAlignmentHeader object</param> /// <param name="writer">Stream to write.</param> public void WriteHeader(SAMAlignmentHeader header, Stream writer) { if (header == null) { throw new ArgumentNullException("header"); } if (writer == null) { throw new ArgumentNullException("writer"); } string samHeader; if (this.refSequences == null) { this.refSequences = header.GetReferenceSequenceRanges(); } using (StringWriter strwriter = new StringWriter(CultureInfo.InvariantCulture)) { SAMFormatter.WriteHeader(strwriter, header); samHeader = strwriter.ToString(); } int samHeaderLen = samHeader.Length; byte[] bytes = Encoding.UTF8.GetBytes(samHeader); byte[] bamMagicNumber = { 66, 65, 77, 1 }; // write BAM magic number writer.Write(bamMagicNumber, 0, 4); // Length of the header text writer.Write(Helper.GetLittleEndianByteArray(samHeaderLen), 0, 4); //Plain header text in SAM writer.Write(bytes, 0, bytes.Length); // number of reference sequences writer.Write(Helper.GetLittleEndianByteArray(this.refSequences.Count), 0, 4); foreach (SequenceRange range in this.refSequences) { int len = range.ID.Length; byte[] array = Encoding.UTF8.GetBytes(range.ID); writer.Write(Helper.GetLittleEndianByteArray(len + 1), 0, 4); writer.Write(array, 0, len); writer.WriteByte((byte)'\0'); writer.Write(Helper.GetLittleEndianByteArray((int)range.End), 0, 4); } }
// Gets new header with sorted SQ Fields. // If SQ fields are already sorted then returns the same header. private SAMAlignmentHeader GetHeaderWithSortedSQFields(SAMAlignmentHeader header, bool canChangeOtherTagPos) { if (IsSortedByChromosomeNames(GetSQHeaders(header.RecordFields))) return header; SAMAlignmentHeader newHeader = new SAMAlignmentHeader(); int i = 0; if (canChangeOtherTagPos) { List<SAMRecordField> sqHeaders = new List<SAMRecordField>(); for (; i < header.RecordFields.Count; i++) { SAMRecordField field = header.RecordFields[i]; if (field.Typecode.Equals("SQ")) { sqHeaders.Add(field); } else { newHeader.RecordFields.Add(field); } sqHeaders.Sort(CompareByChromosomeName); foreach (SAMRecordField sqfield in sqHeaders) { newHeader.RecordFields.Add(sqfield); } foreach (string str in header.Comments) { newHeader.Comments.Add(str); } } } else { Bio.Util.SortedList<SAMRecordField, int> map = new Bio.Util.SortedList<SAMRecordField, int>(new ComparisonWrapper<SAMRecordField>(CompareByChromosomeName)); for (; i < header.RecordFields.Count; i++) { SAMRecordField field = header.RecordFields[i]; if (field.Typecode.Equals("SQ")) { map.Add(field, i); } newHeader.RecordFields.Add(field); } i = 0; foreach (int index in map.Values.OrderBy(I => I)) { newHeader.RecordFields[index] = map.Keys[i++]; } foreach (string str in header.Comments) { newHeader.Comments.Add(str); } } return newHeader; }
/// <summary> /// Updates the header with reference name and length from ReferenceNamesAndLength file. /// </summary> /// <param name="header">SAM alignment header.</param> private void UpdateReferenceInformationFromFile(SAMAlignmentHeader header) { header.ReferenceSequences.Clear(); using (StreamReader reader = new StreamReader(ReferenceNamesAndLength)) { header.ReferenceSequences.Clear(); string read = reader.ReadLine(); while (!string.IsNullOrEmpty(read)) { string[] splitRegion = read.Split(new string[] { "\t" }, StringSplitOptions.RemoveEmptyEntries); if (splitRegion.Length > 1) { string name = splitRegion[0]; long len = long.Parse(splitRegion[1], CultureInfo.InvariantCulture); header.ReferenceSequences.Add(new ReferenceSequenceInfo(name, len)); } else { throw new InvalidOperationException("Invalid file for reference name and length"); } read = reader.ReadLine(); } } }
/// <summary> /// Writes the header to output stream /// </summary> /// <param name="header"></param> private void WriteHeader(SAMAlignmentHeader header) { if (!Header && !HeaderOnly) { return; } if (UnCompressedBAM || BAMOutput) { // Incase of compressed bamoutput uncompressed file will be compressed before sending it to output stream. bamformatter.WriteHeader(header, bamUncompressedOutStream); } else { SAMFormatter.WriteHeader(writer, header); } }
private IEnumerable<SAMAlignedSequence> GetAlignmentWithoutIndexYield(SAMAlignmentHeader header) { Chunk lastChunk = null; ulong lastcOffset = 0; ushort lastuOffset = 0; BAMReferenceIndexes refIndices = null; if (createBamIndex) { bamIndex = new BAMIndex(); for (int i = 0; i < refSeqNames.Count; i++) { bamIndex.RefIndexes.Add(new BAMReferenceIndexes()); } refIndices = bamIndex.RefIndexes[0]; } while (!IsEOF()) { if (createBamIndex) { lastcOffset = (ulong)currentCompressedBlockStartPos; lastuOffset = (ushort)deCompressedStream.Position; } SAMAlignedSequence alignedSeq = GetAlignedSequence(0, int.MaxValue); alignedSeq = BamIndexing(alignedSeq, refIndices, bamIndex, lastcOffset, lastuOffset, ref lastChunk); yield return alignedSeq; alignedSeq = null; } #region BAM Indexing if (createBamIndex) { lastChunk.ChunkEnd.CompressedBlockOffset = (ulong)readStream.Position; if (deCompressedStream != null) { lastChunk.ChunkEnd.UncompressedBlockOffset = (ushort)deCompressedStream.Position; } else { lastChunk.ChunkEnd.UncompressedBlockOffset = 0; } } #endregion }
/// <summary> /// Initialise bam output file, if required and if not already initialised /// </summary> private void InitBamOutputFiles() { if (writeToFilteredBam && bamStream == null && bamFormatter == null) { bamFormatter = new BAMFormatter(); newHeader = new SAMAlignmentHeader(); bamOutputQueue = new Queue<Collection<SAMAlignedSequence>>(); // Create the output file for filtered sequences string file = fileName + "\\sequences.bam"; if(File.Exists(file)) { File.Delete(file); } bamStream = File.Create(file); } }
public void InvalidateSAMWriteTextWriter() { SAMAlignmentHeader header = new SAMAlignmentHeader(); try { SAMFormatter.WriteHeader(null, header); Assert.Fail(); } catch (ArgumentNullException) { ApplicationLog.WriteLine( "SAM Formatter P2 : Successfully validated the exception"); } }
/// <summary> /// Parses the BAM file and returns the Header. /// </summary> private SAMAlignmentHeader GetHeader() { SAMAlignmentHeader header = new SAMAlignmentHeader(); refSeqNames = new List<string>(); refSeqLengths = new List<int>(); readStream.Seek(0, SeekOrigin.Begin); this.deCompressedStream = null; byte[] array = new byte[8]; ReadUnCompressedData(array, 0, 8); int l_text = Helper.GetInt32(array, 4); byte[] samHeaderData = new byte[l_text]; if (l_text != 0) { ReadUnCompressedData(samHeaderData, 0, l_text); } ReadUnCompressedData(array, 0, 4); int noofRefSeqs = Helper.GetInt32(array, 0); for (int i = 0; i < noofRefSeqs; i++) { ReadUnCompressedData(array, 0, 4); int len = Helper.GetInt32(array, 0); byte[] refName = new byte[len]; ReadUnCompressedData(refName, 0, len); ReadUnCompressedData(array, 0, 4); int refLen = Helper.GetInt32(array, 0); refSeqNames.Add(System.Text.ASCIIEncoding.ASCII.GetString(refName, 0, refName.Length - 1)); refSeqLengths.Add(refLen); } if (samHeaderData.Length != 0) { string str = System.Text.ASCIIEncoding.ASCII.GetString(samHeaderData); using (StringReader reader = new StringReader(str)) { header = SAMParser.ParseSAMHeader(reader); } } header.ReferenceSequences.Clear(); for (int i = 0; i < refSeqNames.Count; i++) { string refname = refSeqNames[i]; int length = refSeqLengths[i]; header.ReferenceSequences.Add(new ReferenceSequenceInfo(refname, length)); } return header; }
/// <summary> /// Writes an ISequenceAlignment to the location specified by the writer. /// </summary> /// <param name="sequenceAlignment">The sequence alignment to format.</param> /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param> public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer) { string message = string.Empty; if (sequenceAlignment == null) { throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment); } if (writer == null) { throw new ArgumentNullException(Resource.ParameterNameWriter); } #region Write alignment header SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader; if (header != null) { WriteHeader(header, writer); } #endregion #region Write aligned sequences foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences) { SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader; if (alignedHeader == null) { throw new ArgumentException(Resource.SAM_AlignedSequenceHeaderMissing); } StringBuilder alignmentLine = new StringBuilder(); message = alignedHeader.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new ArgumentException(message); } alignmentLine.Append(alignedHeader.QName); alignmentLine.Append("\t"); alignmentLine.Append((int)alignedHeader.Flag); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.RName); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.Pos); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.MapQ); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.CIGAR); alignmentLine.Append("\t"); if (string.Compare(alignedHeader.MRNM, alignedHeader.RName, StringComparison.InvariantCultureIgnoreCase) == 0) { alignmentLine.Append("="); } else { alignmentLine.Append(alignedHeader.MRNM); } alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.MPos); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.ISize); alignmentLine.Append("\t"); writer.Write(alignmentLine.ToString()); List <int> dotSymbolIndices = new List <int>(alignedHeader.DotSymbolIndices); List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices); if (alignedSequence.Sequences.Count > 0 && alignedSequence.Sequences[0] != null) { ISequence seq = alignedSequence.Sequences[0]; if (seq.Alphabet != Alphabets.DNA) { throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly); } for (int i = 0; i < seq.Count; i++) { char symbol = seq[i].Symbol; if (dotSymbolIndices.Count > 0) { if (dotSymbolIndices.Contains(i)) { symbol = '.'; dotSymbolIndices.Remove(i); } } if (equalSymbolIndices.Count > 0) { if (equalSymbolIndices.Contains(i)) { symbol = '='; equalSymbolIndices.Remove(i); } } writer.Write(symbol); } writer.Write("\t"); IQualitativeSequence qualSeq = seq as IQualitativeSequence; if (qualSeq != null) { writer.Write(ASCIIEncoding.ASCII.GetString(qualSeq.Scores)); } else { writer.Write("*"); } } else { writer.Write("*"); writer.Write("\t"); writer.Write("*"); } foreach (SAMOptionalField field in alignedHeader.OptionalFields) { writer.Write("\t"); writer.Write(field.Tag); writer.Write(":"); writer.Write(field.VType); writer.Write(":"); writer.Write(field.Value); } writer.WriteLine(); } #endregion writer.Flush(); }
/// <summary> /// Write a collection of ISequenceAlignments to a file. /// </summary> /// <param name="stream">The name of the file to write the formatted sequence alignments.</param> /// <param name="sequenceAlignments">The sequenceAlignments to write.</param> /// <param name="Header">The sequenceAlignments to write.</param> public void Format(Stream stream, List <SAMAlignedSequence> sequenceAlignments, SAMAlignmentHeader Header) { if (stream == null) { throw new ArgumentNullException("stream"); } if (sequenceAlignments == null) { throw new ArgumentNullException("sequenceAlignments"); } using (var writer = stream.OpenWrite()) { if (Header != null) { WriteHeader(writer, Header); } foreach (IAlignedSequence alignedSequence in sequenceAlignments) { WriteSAMAlignedSequence(writer, alignedSequence); } } }