/// <summary> /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object. /// </summary> /// <param name="mbfReader">A reader for a biological sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether sequences in the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns> protected SequenceAlignmentMap ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly) { _isReadOnly = isReadOnly; if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } // no empty files allowed if (!mbfReader.HasLines) { throw new FormatException(Resource.Parser_NoTextErrorMessage); } // Parse the alignment header. SAMAlignmentHeader header = ParserSAMHeader(mbfReader); SequenceAlignmentMap sequenceAlignmentMap = null; sequenceAlignmentMap = new SequenceAlignmentMap(header); // Parse aligned sequences ParseSequences(sequenceAlignmentMap, mbfReader, isReadOnly); return sequenceAlignmentMap; }
/// <summary> /// Writes an ISequenceAlignment to the location specified by the writer. /// </summary> /// <param name="sequenceAlignment">The sequence alignment to format.</param> /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param> public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer) { if (sequenceAlignment == null) { throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment); } if (writer == null) { throw new ArgumentNullException(Resource.ParameterNameWriter); } #region Write alignment header SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader; if (header != null) { WriteHeader(header, writer); } #endregion #region Write aligned sequences foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences) { WriteSAMAlignedSequence(alignedSequence, writer); } #endregion writer.Flush(); }
/// <summary> /// Creates SequenceAlignmentMap instance. /// </summary> /// <param name="header">SAM header.</param> public SequenceAlignmentMap(SAMAlignmentHeader header) { if (header == null) { throw new ArgumentNullException("header"); } _header = header; _metadata = new Dictionary <string, object>(); _metadata.Add(Helper.SAMAlignmentHeaderKey, _header); _querySequences = new List <SAMAlignedSequence>(); }
/// <summary> /// Parses a sequence alignment texts from a file. /// </summary> /// <param name="fileName">file name.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the sequence alignment should be in /// readonly mode or not. If this flag is set to true then the resulting sequences's /// isReadOnly property will be set to true, otherwise it will be set to false. /// </param> /// <returns>SequenceAlignmentMap object.</returns> public SequenceAlignmentMap Parse(string fileName, bool isReadOnly) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException("fileName"); } _fileName = fileName; // check if DV is required FileInfo fileInfo = new FileInfo(_fileName); _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes; if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) || _isDataVirtualizationEnforced) { EnforceDataVirtualization = true; } SequenceAlignmentMap sequenceAlignmentMap = null; SAMAlignmentHeader header = null; if (IsDataVirtualizationEnabled) { VirtualAlignedSequenceList<SAMAlignedSequence> queries = null; using (MBFStreamReader mbfReader = new MBFStreamReader(fileName)) { header = ParserSAMHeader(mbfReader); _sidecarFileProvider = new SidecarFileProvider(fileName); // if a valid sidecar does not exist then recreate it if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false) { ParseSequences(mbfReader); } if (_sidecarFileProvider.IsSidecarValid) { queries = new VirtualAlignedSequenceList<SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count); sequenceAlignmentMap = new SequenceAlignmentMap(header, queries); return sequenceAlignmentMap; } } } using (MBFTextReader mbfReader = new MBFTextReader(fileName)) { return Parse(mbfReader, isReadOnly); } }
/// <summary> /// Parses SAM alignment header from specified MBFTextReader. /// </summary> /// <param name="mbfReader">MBF text reader.</param> public static SAMAlignmentHeader ParseSAMHeader(MBFTextReader mbfReader) { if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } _headerLength = 0; SAMAlignmentHeader samHeader = new SAMAlignmentHeader(); if (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { while (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { _headerLength += mbfReader.Line.Length; string[] tokens = mbfReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries); string recordTypecode = tokens[0].Substring(1); // Validate the header format. ValidateHeaderLineFormat(mbfReader.Line); SAMRecordField headerLine = null; if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0) { List <string> tags = new List <string>(); headerLine = new SAMRecordField(recordTypecode); for (int i = 1; i < tokens.Length; i++) { string tagToken = tokens[i]; string tagName = tagToken.Substring(0, 2); tags.Add(tagName); headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3))); } samHeader.RecordFields.Add(headerLine); } else { samHeader.Comments.Add(mbfReader.Line.Substring(4)); } mbfReader.GoToNextLine(); } string message = samHeader.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new FormatException(message); } } return(samHeader); }
/// <summary> /// Writes specified SAMAlignedHeader to specified text writer. /// </summary> /// <param name="header">Header to write.</param> /// <param name="writer">Text writer.</param> public static void WriteHeader(SAMAlignmentHeader header, TextWriter writer) { if (header == null) { return; } if (writer == null) { throw new ArgumentNullException("writer"); } string message = header.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new ArgumentException(message); } StringBuilder headerLine = null; for (int i = 0; i < header.RecordFields.Count; i++) { headerLine = new StringBuilder(); headerLine.Append("@"); headerLine.Append(header.RecordFields[i].Typecode); for (int j = 0; j < header.RecordFields[i].Tags.Count; j++) { headerLine.Append("\t"); headerLine.Append(header.RecordFields[i].Tags[j].Tag); headerLine.Append(":"); headerLine.Append(header.RecordFields[i].Tags[j].Value); } writer.WriteLine(headerLine.ToString()); } foreach (string comment in header.Comments) { headerLine = new StringBuilder(); headerLine.Append("@CO"); headerLine.Append("\t"); headerLine.Append(comment); writer.WriteLine(headerLine.ToString()); } writer.Flush(); }
/// <summary> /// Constructor for deserialization. /// </summary> /// <param name="info">Serialization Info.</param> /// <param name="context">Streaming context.</param> protected SequenceAlignmentMap(SerializationInfo info, StreamingContext context) { if (info == null) { throw new ArgumentNullException("info"); } _header = (SAMAlignmentHeader)info.GetValue("header", typeof(SAMAlignmentHeader)); _metadata = new Dictionary <string, object>(); _metadata.Add(Helper.SAMAlignmentHeaderKey, _header); _querySequences = (IList <SAMAlignedSequence>)info.GetValue("sequences", typeof(IList <SAMAlignedSequence>)); if (_querySequences == null) { _querySequences = new List <SAMAlignedSequence>(); } }
/// <summary> /// Creates SequenceAlignmentMap instance. /// </summary> /// <param name="header">SAM header.</param> /// <param name="querySequences">A list of virtual sequences.</param> public SequenceAlignmentMap(SAMAlignmentHeader header, IVirtualAlignedSequenceList <SAMAlignedSequence> querySequences) : this(header) { _querySequences = querySequences; }
/// <summary> /// Writes an ISequenceAlignment to the location specified by the writer. /// </summary> /// <param name="sequenceAlignment">The sequence alignment to format.</param> /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param> public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer) { if (sequenceAlignment == null) { throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment); } if (writer == null) { throw new ArgumentNullException(Resource.ParameterNameWriter); } #region Write alignment header SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader; if (header != null) { WriteHeader(header, writer); } #endregion #region Write aligned sequences foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences) { SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader; if (alignedHeader == null) { throw new ArgumentException(Resource.SAM_AlignedSequenceHeaderMissing); } StringBuilder alignmentLine = new StringBuilder(); alignmentLine.Append(alignedHeader.QName); alignmentLine.Append("\t"); alignmentLine.Append((int)alignedHeader.Flag); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.RName); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.Pos); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.MapQ); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.CIGAR); alignmentLine.Append("\t"); if (string.Compare(alignedHeader.MRNM, alignedHeader.RName, StringComparison.InvariantCultureIgnoreCase) == 0) { alignmentLine.Append("="); } else { alignmentLine.Append(alignedHeader.MRNM); } alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.MPos); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.ISize); alignmentLine.Append("\t"); writer.Write(alignmentLine.ToString()); List <int> dotSymbolIndices = new List <int>(alignedHeader.DotSymbolIndices); List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices); if (alignedSequence.Sequences.Count > 0 && alignedSequence.Sequences[0] != null) { ISequence seq = alignedSequence.Sequences[0]; if (seq.Alphabet != Alphabets.DNA) { throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly); } for (int i = 0; i < seq.Count; i++) { char symbol = seq[i].Symbol; if (dotSymbolIndices.Count > 0) { if (dotSymbolIndices.Contains(i)) { symbol = '.'; dotSymbolIndices.Remove(i); } } if (equalSymbolIndices.Count > 0) { if (equalSymbolIndices.Contains(i)) { symbol = '='; equalSymbolIndices.Remove(i); } } writer.Write(symbol); } writer.Write("\t"); IQualitativeSequence qualSeq = seq as IQualitativeSequence; if (qualSeq != null) { writer.Write(ASCIIEncoding.ASCII.GetString(qualSeq.Scores)); } else { writer.Write("*"); } } else { writer.Write("*"); writer.Write("\t"); writer.Write("*"); } foreach (SAMOptionalField field in alignedHeader.OptionalFields) { writer.Write("\t"); writer.Write(field.Tag); writer.Write(":"); writer.Write(field.VType); writer.Write(":"); writer.Write(field.Value); } writer.WriteLine(); } #endregion writer.Flush(); }
/// <summary> /// Parses a sequence alignment texts from a file. /// </summary> /// <param name="fileName">file name.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the sequence alignment should be in /// readonly mode or not. If this flag is set to true then the resulting sequences's /// isReadOnly property will be set to true, otherwise it will be set to false. /// </param> /// <returns>SequenceAlignmentMap object.</returns> public SequenceAlignmentMap Parse(string fileName, bool isReadOnly) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException("fileName"); } _fileName = fileName; // check if DV is required FileInfo fileInfo = new FileInfo(_fileName); _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes; if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) || _isDataVirtualizationEnforced) { EnforceDataVirtualization = true; } SequenceAlignmentMap sequenceAlignmentMap = null; SAMAlignmentHeader header = null; if (IsDataVirtualizationEnabled) { VirtualAlignedSequenceList <SAMAlignedSequence> queries = null; using (MBFStreamReader mbfReader = new MBFStreamReader(fileName)) { header = ParseSAMHeader(mbfReader); if (header.Comments.Count == 0 && header.RecordFields.Count == 0) { try { // verify whether this is a valid SAM file by parsing a single sequence ParseSequence(mbfReader.Line, true, Alphabet, Encoding, RefSequences); } catch (IndexOutOfRangeException) { throw new FileFormatException(Resource.SAM_InvalidInputFile); } } _sidecarFileProvider = new SidecarFileProvider(fileName); // if a valid sidecar does not exist then recreate it if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false) { ParseSequences(mbfReader); } if (_sidecarFileProvider.IsSidecarValid) { queries = new VirtualAlignedSequenceList <SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count); sequenceAlignmentMap = new SequenceAlignmentMap(header, queries); return(sequenceAlignmentMap); } } } using (MBFTextReader mbfReader = new MBFTextReader(fileName)) { return(Parse(mbfReader, isReadOnly)); } }