/// <summary> /// Creates new instance of SAMAlignedSequence with specified SAMAlignedSequenceHeader. /// </summary> /// <param name="seqHeader"></param> public SAMAlignedSequence(SAMAlignedSequenceHeader seqHeader) { _seqHeader = seqHeader; _metadata = new Dictionary <string, object>(StringComparer.OrdinalIgnoreCase); _metadata.Add(Helper.SAMAlignedSequenceHeaderKey, _seqHeader); _sequences = new List <ISequence>(); _sequences.Add(null); }
/// <summary> /// Constructor for deserialization. /// </summary> /// <param name="info">Serialization Info.</param> /// <param name="context">Streaming context.</param> protected SAMAlignedSequence(SerializationInfo info, StreamingContext context) { if (info == null) { throw new ArgumentNullException("info"); } _seqHeader = (SAMAlignedSequenceHeader)info.GetValue("header", typeof(SAMAlignedSequenceHeader)); _metadata = new Dictionary <string, object>(StringComparer.OrdinalIgnoreCase); _metadata.Add(Helper.SAMAlignedSequenceHeaderKey, _seqHeader); QuerySequence = (ISequence)info.GetValue("sequence", typeof(ISequence)); }
/// <summary> /// Parse a single sequence using a MBFTextReader. /// </summary> /// <param name="bioText">sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the sequences in the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <param name="alphabet">Alphabet of the sequences.</param> /// <param name="encoding">Required encoding.</param> /// <param name="referenceSequences">Reference sequences.</param> private static SAMAlignedSequence ParseSequence(string bioText, bool isReadOnly, IAlphabet alphabet, IEncoding encoding, IList <ISequence> referenceSequences) { const int optionalTokenStartingIndex = 11; string[] tokens = bioText.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries); SAMAlignedSequence alignedSeq = new SAMAlignedSequence(); alignedSeq.QName = tokens[0]; alignedSeq.Flag = SAMAlignedSequenceHeader.GetFlag(tokens[1]); alignedSeq.RName = tokens[2]; alignedSeq.Pos = int.Parse(tokens[3], CultureInfo.InvariantCulture); alignedSeq.MapQ = int.Parse(tokens[4], CultureInfo.InvariantCulture); alignedSeq.CIGAR = tokens[5]; alignedSeq.MRNM = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6]; alignedSeq.MPos = int.Parse(tokens[7], CultureInfo.InvariantCulture); alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture); ISequence refSeq = null; if (referenceSequences != null && referenceSequences.Count > 0) { refSeq = referenceSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0); } ParseQualityNSequence(alignedSeq, alphabet, encoding, tokens[9], tokens[10], refSeq, isReadOnly); SAMOptionalField optField = null; string message; for (int i = optionalTokenStartingIndex; i < tokens.Length; i++) { optField = new SAMOptionalField(); if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i])) { message = string.Format(CultureInfo.CurrentCulture, Resource.InvalidOptionalField, tokens[i]); throw new FormatException(message); } string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries); optField.Tag = opttokens[0]; optField.VType = opttokens[1]; optField.Value = opttokens[2]; alignedSeq.OptionalFields.Add(optField); } return(alignedSeq); }
/// <summary> /// Writes SAMAlignedSequence to specified text writer. /// </summary> /// <param name="alignedSequence">SAM aligned sequence to write</param> /// <param name="writer">Text writer.</param> public static void WriteSAMAlignedSequence(IAlignedSequence alignedSequence, TextWriter writer) { if (writer == null) { throw new ArgumentNullException("writer"); } if (alignedSequence == null) { throw new ArgumentNullException("alignedSequence"); } SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader; if (alignedHeader == null) { throw new ArgumentException(Resource.SAM_AlignedSequenceHeaderMissing); } ISequence sequence = alignedSequence.Sequences[0]; if (sequence.Alphabet != Alphabets.DNA) { throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly); } List <int> dotSymbolIndices = new List <int>(alignedHeader.DotSymbolIndices); List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices); string seq = "*"; if (sequence.Count > 0) { char[] symbols = new char[sequence.Count]; for (int i = 0; i < sequence.Count; i++) { char symbol = sequence[i].Symbol; if (dotSymbolIndices.Count > 0) { if (dotSymbolIndices.Contains(i)) { symbol = '.'; dotSymbolIndices.Remove(i); } } if (equalSymbolIndices.Count > 0) { if (equalSymbolIndices.Contains(i)) { symbol = '='; equalSymbolIndices.Remove(i); } } symbols[i] = symbol; } seq = new string(symbols); } string qualValues = "*"; QualitativeSequence qualSeq = sequence as QualitativeSequence; if (qualSeq != null) { byte[] bytes = qualSeq.Scores; qualValues = System.Text.ASCIIEncoding.ASCII.GetString(bytes); } writer.Write(AlignedSequenceFormat, alignedHeader.QName, (int)alignedHeader.Flag, alignedHeader.RName, alignedHeader.Pos, alignedHeader.MapQ, alignedHeader.CIGAR, alignedHeader.MRNM.Equals(alignedHeader.RName) ? "=" : alignedHeader.MRNM, alignedHeader.MPos, alignedHeader.ISize, seq, qualValues); for (int j = 0; j < alignedHeader.OptionalFields.Count; j++) { writer.Write(OptionalFieldFormat, alignedHeader.OptionalFields[j].Tag, alignedHeader.OptionalFields[j].VType, alignedHeader.OptionalFields[j].Value); } writer.WriteLine(); }
/// <summary> /// Writes an ISequenceAlignment to the location specified by the writer. /// </summary> /// <param name="sequenceAlignment">The sequence alignment to format.</param> /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param> public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer) { if (sequenceAlignment == null) { throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment); } if (writer == null) { throw new ArgumentNullException(Resource.ParameterNameWriter); } #region Write alignment header SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader; if (header != null) { WriteHeader(header, writer); } #endregion #region Write aligned sequences foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences) { SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader; if (alignedHeader == null) { throw new ArgumentException(Resource.SAM_AlignedSequenceHeaderMissing); } StringBuilder alignmentLine = new StringBuilder(); alignmentLine.Append(alignedHeader.QName); alignmentLine.Append("\t"); alignmentLine.Append((int)alignedHeader.Flag); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.RName); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.Pos); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.MapQ); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.CIGAR); alignmentLine.Append("\t"); if (string.Compare(alignedHeader.MRNM, alignedHeader.RName, StringComparison.InvariantCultureIgnoreCase) == 0) { alignmentLine.Append("="); } else { alignmentLine.Append(alignedHeader.MRNM); } alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.MPos); alignmentLine.Append("\t"); alignmentLine.Append(alignedHeader.ISize); alignmentLine.Append("\t"); writer.Write(alignmentLine.ToString()); List <int> dotSymbolIndices = new List <int>(alignedHeader.DotSymbolIndices); List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices); if (alignedSequence.Sequences.Count > 0 && alignedSequence.Sequences[0] != null) { ISequence seq = alignedSequence.Sequences[0]; if (seq.Alphabet != Alphabets.DNA) { throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly); } for (int i = 0; i < seq.Count; i++) { char symbol = seq[i].Symbol; if (dotSymbolIndices.Count > 0) { if (dotSymbolIndices.Contains(i)) { symbol = '.'; dotSymbolIndices.Remove(i); } } if (equalSymbolIndices.Count > 0) { if (equalSymbolIndices.Contains(i)) { symbol = '='; equalSymbolIndices.Remove(i); } } writer.Write(symbol); } writer.Write("\t"); IQualitativeSequence qualSeq = seq as IQualitativeSequence; if (qualSeq != null) { writer.Write(ASCIIEncoding.ASCII.GetString(qualSeq.Scores)); } else { writer.Write("*"); } } else { writer.Write("*"); writer.Write("\t"); writer.Write("*"); } foreach (SAMOptionalField field in alignedHeader.OptionalFields) { writer.Write("\t"); writer.Write(field.Tag); writer.Write(":"); writer.Write(field.VType); writer.Write(":"); writer.Write(field.Value); } writer.WriteLine(); } #endregion writer.Flush(); }