public NamedSequence Translate(bool dashAsMissing) { string aaSeq = Translate(Sequence, dashAsMissing); NamedSequence namedAaSeq = new NamedSequence(Name, Protein, aaSeq); return(namedAaSeq); }
public override List <NamedSequence> Parse(TextReader reader) { if (NoHeader) { reader = new StringReader(OUTPUT_HEADER + "\n" + reader.ReadToEnd()); } List <NamedSequence> seqs = new List <NamedSequence>(); string[] header = null; foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(reader, "", true, false, true)) { if (header == null) { header = row[""].Split('\t'); Helper.CheckCondition(header.Length == 2, "Header does not conform to tab file format: " + row[""]); continue; } string name = row[header[0]]; string seq = row[header[1]]; NamedSequence namedSeq = NamedSequence.Parse(name); namedSeq.Sequence = RemoveWhiteSpace(seq); seqs.Add(namedSeq); } return(seqs); }
public override List <NamedSequence> Parse(TextReader reader) { List <NamedSequence> seqs = new List <NamedSequence>(); string line; //string name = null; NamedSequence seqToAdd = null; StringBuilder sequence = new StringBuilder(); while ((line = reader.ReadLine()) != null) { if (line.StartsWith(">")) { if (seqToAdd != null) { //seqs.Add(new NamedSequence(name, RemoveWhiteSpace(sequence.ToString()))); seqToAdd.Sequence = sequence.ToString(); seqs.Add(seqToAdd); } seqToAdd = NamedSequence.Parse(line.Substring(1).Trim()); //name = line.Substring(1).Trim(); sequence = new StringBuilder(sequence.Length); } else { sequence.Append(line.Trim()); } } //seqs.Add(new NamedSequence(name, RemoveWhiteSpace(sequence.ToString()))); seqToAdd.Sequence = sequence.ToString(); seqs.Add(seqToAdd); return(seqs); }
public static NamedSequence Parse(string constructorArgs) { ConstructorArguments constArgs = new ConstructorArguments("(" + constructorArgs + ")"); NamedSequence result = new NamedSequence(); result.Protein = constArgs.ExtractOptional <string>("Protein", null); result.Name = constArgs.ExtractNext <string>("name"); return(result); }
public override List <NamedSequence> Parse(TextReader reader) { string line; string[] fields; line = reader.ReadLine().Trim(); if (!ConformsToFileFormat(line)) { throw new FormatException("File does not conform to Phylip format. First line must be two integers specifying number of sequences and sequence length."); } fields = Regex.Split(line, @"\s+"); int lineCount, lineLength; try { lineCount = int.Parse(fields[0]); lineLength = int.Parse(fields[1]); } catch (FormatException e) { Console.WriteLine("Error parsing Phylip header: {0} split to {1} and {2}", line, fields[0], fields[1]); throw e; } List <NamedSequence> seqs = new List <NamedSequence>(lineCount); while ((line = reader.ReadLine()) != null) { if (!string.IsNullOrWhiteSpace(line)) { int endOfName = Math.Max(10, line.IndexOfAny(new char[] { ' ', '\t' }, 0, Math.Min(50, line.Length))); string name = line.Substring(0, endOfName).Trim(); string seq = RemoveWhiteSpace(line.Substring(endOfName)); //NamedSequence sequence = new NamedSequence(name, seq); NamedSequence sequence = NamedSequence.Parse(name); sequence.Sequence = seq; if (sequence.Sequence.Length != lineLength) { throw new FormatException(string.Format("Expected {0} to be {1} chars long, but its {2} long.", sequence.Name, lineLength, sequence.Sequence.Length)); } seqs.Add(sequence); } } if (seqs.Count != lineCount) { throw new FormatException(string.Format("Expected {0} sequences. Read {1}.", lineCount, seqs.Count)); } return(seqs); }
public override bool Equals(object obj) { NamedSequence other = obj as NamedSequence; if (other == null) { return(false); } return(other.Name == this.Name && other.Sequence == this.Sequence); }
public static NamedSequence GetConsensus(List <NamedSequence> seqs) { int len = -1; //StringBuilder consensusSeq = new StringBuilder(); List <AASeq> aaSeqs = new List <AASeq>(); bool isDna = seqs[0].IsDna(); foreach (NamedSequence seq in seqs) { AASeq aaSeq = isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty) : AASeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty); len = Math.Max(len, aaSeq.Count); //if (len < 0) //{ // len = aaSeq.Count; //} //else //{ // Helper.CheckCondition(len == aaSeq.Count, String.Format("Sequence {0} is a different length from previous sequences", seq.Name)); //} aaSeqs.Add(aaSeq); } //for (int i = 0; i < len; i++) //{ // Dictionary<string, int> charToCount = new Dictionary<string, int>(); // KeyValuePair<string, int> currentConsensus = new KeyValuePair<string, int>("z", -1); // foreach (AASeq aaSeq in aaSeqs) // { // if (i < aaSeq.Count) // { // string residue = aaSeq.SubSeqAA0Pos(i, 1).ToString(); // charToCount[residue] = SpecialFunctions.GetValueOrDefault(charToCount, residue) + 1; // if (charToCount[residue] > currentConsensus.Value) // { // currentConsensus = new KeyValuePair<string, int>(residue, charToCount[residue]); // } // } // } // consensusSeq.Append(currentConsensus.Key); //} AASeq consensusAaSeq = GetAaSeqConsensus(aaSeqs); NamedSequence consensus = new NamedSequence("consensus", consensusAaSeq.ToString()); return(consensus); }
/// <param name="readingFrameToTranslate">Will start translation at start+readingFrameToTranslate-1. If RF>3, will take the reverse /// commplement, then translate the revComp using rf -= 3. The sequence will be padded at the end to make it the same length as the /// original, and the final result will be reversed, so that the sequence is read in the same order as the original string! /// ie RF is 1-based.</param> public static List <NamedSequence> Translate(List <NamedSequence> seqs, bool nucToAaDashAsMissing = true, int readingFrameToTranslate = 1) { Helper.CheckCondition(readingFrameToTranslate > 0 && readingFrameToTranslate <= 6, "readingFrameToTranslate must be between 0 and 6. " + readingFrameToTranslate + " is not valid."); bool isAntisense = false; if (readingFrameToTranslate > 3) { seqs = NucSeqReverseComplement(seqs); readingFrameToTranslate -= 3; isAntisense = true; } List <NamedSequence> result = new List <NamedSequence>(seqs.Count); int lastLen = -1; foreach (NamedSequence seq in seqs) { string seqToTranslate = seq.Sequence.Substring(readingFrameToTranslate - 1); if (seqToTranslate.Length != lastLen) { lastLen = seqToTranslate.Length; if (lastLen % 3 != 0) { Console.Error.WriteLine("WARNING: Sequence of length {0} is not divisible by 3. Cutting off the end for translation.", lastLen); } } if (seqToTranslate.Length % 3 != 0) { seqToTranslate = seqToTranslate.Substring(0, seqToTranslate.Length - seqToTranslate.Length % 3); } //seqToTranslate += Enumerable.Repeat("-", 3 - (seqToTranslate.Length % 3)).StringJoin(); var translatedSeq = new NamedSequence(seq.Name, seq.Protein, NamedSequence.Translate(seqToTranslate, nucToAaDashAsMissing)); result.Add(isAntisense ? translatedSeq.ReverseSequence() : translatedSeq); } return(result); }