// parses sequence. private void ParseSequences(SequenceAlignmentMap seqAlignment, BioTextReader bioReader, bool isReadOnly) { while (bioReader.HasLines && !bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { string[] tokens = bioReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries); SAMAlignedSequence alignedSeq = new SAMAlignedSequence(); alignedSeq.QName = tokens[0]; alignedSeq.Flag = SAMAlignedSequenceHeader.GetFlag(tokens[1]); alignedSeq.RName = tokens[2]; alignedSeq.Pos = int.Parse(tokens[3], CultureInfo.InvariantCulture); alignedSeq.MapQ = int.Parse(tokens[4], CultureInfo.InvariantCulture); alignedSeq.CIGAR = tokens[5]; alignedSeq.MRNM = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6]; alignedSeq.MPos = int.Parse(tokens[7], CultureInfo.InvariantCulture); alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture); string message = alignedSeq.IsValidHeader(); if (!string.IsNullOrEmpty(message)) { throw new FormatException(message); } ISequence refSeq = null; if (RefSequences != null && RefSequences.Count > 0) { refSeq = RefSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0); } ParseQualityNSequence(alignedSeq, Alphabet, Encoding, tokens[9], tokens[10], refSeq, isReadOnly); SAMOptionalField optField = null; for (int i = 11; i < tokens.Length; i++) { optField = new SAMOptionalField(); string optionalFieldRegExpn = OptionalFieldLinePattern; if (!Helper.IsValidRegexValue(optionalFieldRegExpn, tokens[i])) { message = string.Format(CultureInfo.CurrentCulture, Resource.InvalidOptionalField, tokens[i]); throw new FormatException(message); } string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries); optField.Tag = opttokens[0]; optField.VType = opttokens[1]; optField.Value = opttokens[2]; message = optField.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new FormatException(message); } alignedSeq.OptionalFields.Add(optField); } seqAlignment.QuerySequences.Add(alignedSeq); bioReader.GoToNextLine(); } }
/// <summary> /// Parse a single sequencer. /// </summary> /// <param name="bioText">sequence alignment text.</param> /// <param name="alphabet">Alphabet of the sequences.</param> /// <param name="referenceSequences">Reference sequences.</param> private static SAMAlignedSequence ParseSequence(string bioText, IAlphabet alphabet, IList <ISequence> referenceSequences) { const int optionalTokenStartingIndex = 11; string[] tokens = bioText.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries); SAMAlignedSequence alignedSeq = new SAMAlignedSequence(); alignedSeq.QName = tokens[0]; alignedSeq.Flag = SAMAlignedSequenceHeader.GetFlag(tokens[1]); alignedSeq.RName = tokens[2]; alignedSeq.Pos = int.Parse(tokens[3], CultureInfo.InvariantCulture); alignedSeq.MapQ = int.Parse(tokens[4], CultureInfo.InvariantCulture); alignedSeq.CIGAR = tokens[5]; alignedSeq.MRNM = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6]; alignedSeq.MPos = int.Parse(tokens[7], CultureInfo.InvariantCulture); alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture); ISequence refSeq = null; if (referenceSequences != null && referenceSequences.Count > 0) { refSeq = referenceSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0); } ParseQualityNSequence(alignedSeq, alphabet, tokens[9], tokens[10], refSeq); SAMOptionalField optField = null; string message; for (int i = optionalTokenStartingIndex; i < tokens.Length; i++) { optField = new SAMOptionalField(); if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i])) { message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidOptionalField, tokens[i]); throw new FormatException(message); } string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries); optField.Tag = opttokens[0]; optField.VType = opttokens[1]; optField.Value = opttokens[2]; alignedSeq.OptionalFields.Add(optField); } return(alignedSeq); }
/// <summary> /// Parse a single sequencer. /// </summary> /// <param name="bioText">sequence alignment text.</param> /// <param name="alphabet">Alphabet of the sequences.</param> public static SAMAlignedSequence ParseSequence(string bioText, IAlphabet alphabet) { const int optionalTokenStartingIndex = 11; string[] tokens = bioText.Split(TabDelim, StringSplitOptions.RemoveEmptyEntries); SAMAlignedSequence alignedSeq = new SAMAlignedSequence { QName = tokens[0], Flag = SAMAlignedSequenceHeader.GetFlag(tokens[1]), RName = tokens[2], Pos = int.Parse(tokens[3]), MapQ = int.Parse(tokens[4]), CIGAR = tokens[5] }; alignedSeq.MRNM = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6]; alignedSeq.MPos = int.Parse(tokens[7]); alignedSeq.ISize = int.Parse(tokens[8]); ParseQualityNSequence(alignedSeq, alphabet, tokens[9], tokens[10]); for (int i = optionalTokenStartingIndex; i < tokens.Length; i++) { SAMOptionalField optField = new SAMOptionalField(); if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i])) { throw new FormatException(string.Format(Properties.Resource.InvalidOptionalField, tokens[i])); } string[] opttokens = tokens[i].Split(ColonDelim, StringSplitOptions.RemoveEmptyEntries); optField.Tag = opttokens[0]; optField.VType = opttokens[1]; optField.Value = opttokens[2]; alignedSeq.OptionalFields.Add(optField); } return(alignedSeq); }