public override List <NamedSequence> Parse(TextReader reader) { List <NamedSequence> seqs = new List <NamedSequence>(); string line; //string name = null; NamedSequence seqToAdd = null; StringBuilder sequence = new StringBuilder(); while ((line = reader.ReadLine()) != null) { if (line.StartsWith(">")) { if (seqToAdd != null) { //seqs.Add(new NamedSequence(name, RemoveWhiteSpace(sequence.ToString()))); seqToAdd.Sequence = sequence.ToString(); seqs.Add(seqToAdd); } seqToAdd = NamedSequence.Parse(line.Substring(1).Trim()); //name = line.Substring(1).Trim(); sequence = new StringBuilder(sequence.Length); } else { sequence.Append(line.Trim()); } } //seqs.Add(new NamedSequence(name, RemoveWhiteSpace(sequence.ToString()))); seqToAdd.Sequence = sequence.ToString(); seqs.Add(seqToAdd); return(seqs); }
public override List <NamedSequence> Parse(TextReader reader) { if (NoHeader) { reader = new StringReader(OUTPUT_HEADER + "\n" + reader.ReadToEnd()); } List <NamedSequence> seqs = new List <NamedSequence>(); string[] header = null; foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(reader, "", true, false, true)) { if (header == null) { header = row[""].Split('\t'); Helper.CheckCondition(header.Length == 2, "Header does not conform to tab file format: " + row[""]); continue; } string name = row[header[0]]; string seq = row[header[1]]; NamedSequence namedSeq = NamedSequence.Parse(name); namedSeq.Sequence = RemoveWhiteSpace(seq); seqs.Add(namedSeq); } return(seqs); }
public override List <NamedSequence> Parse(TextReader reader) { string line; string[] fields; line = reader.ReadLine().Trim(); if (!ConformsToFileFormat(line)) { throw new FormatException("File does not conform to Phylip format. First line must be two integers specifying number of sequences and sequence length."); } fields = Regex.Split(line, @"\s+"); int lineCount, lineLength; try { lineCount = int.Parse(fields[0]); lineLength = int.Parse(fields[1]); } catch (FormatException e) { Console.WriteLine("Error parsing Phylip header: {0} split to {1} and {2}", line, fields[0], fields[1]); throw e; } List <NamedSequence> seqs = new List <NamedSequence>(lineCount); while ((line = reader.ReadLine()) != null) { if (!string.IsNullOrWhiteSpace(line)) { int endOfName = Math.Max(10, line.IndexOfAny(new char[] { ' ', '\t' }, 0, Math.Min(50, line.Length))); string name = line.Substring(0, endOfName).Trim(); string seq = RemoveWhiteSpace(line.Substring(endOfName)); //NamedSequence sequence = new NamedSequence(name, seq); NamedSequence sequence = NamedSequence.Parse(name); sequence.Sequence = seq; if (sequence.Sequence.Length != lineLength) { throw new FormatException(string.Format("Expected {0} to be {1} chars long, but its {2} long.", sequence.Name, lineLength, sequence.Sequence.Length)); } seqs.Add(sequence); } } if (seqs.Count != lineCount) { throw new FormatException(string.Format("Expected {0} sequences. Read {1}.", lineCount, seqs.Count)); } return(seqs); }