/// <summary> /// Peptide Encoding Problem: Find substrings of a genome encoding a given amino acid sequence. /// </summary> /// <param name="DNA">A DNA string</param> /// <param name="Peptide">An amino acid string</param> /// <returns>All substrings of Text encoding Peptide (if any such substrings exist).</returns> public static string PeptideEncoding(string DNA, string Peptide) { List<string>[] encodes = new List<string>[Peptide.Length]; Protein p = new Protein(); for (int i = 0; i < Peptide.Length; i++) { encodes[i] = p.GetPeptideEncodes(Peptide[i].ToString()); } string cur_adv, cur_rev; List<int> adv_match, rev_match; Dictionary<int, string> matches = new Dictionary<int, string>(); RNAConstructorState cur_pos = new RNAConstructorState(0, "", true, true); Stack<RNAConstructorState> stack = new Stack<RNAConstructorState>(); stack.Push(cur_pos); FileStream fs1 = File.OpenWrite(@".\PA2_2_debug.txt"); TextWriter wr = new StreamWriter(fs1); foreach (List<string> e in encodes) { foreach(string s in e) wr.Write(s + " "); wr.WriteLine(); } do { if (cur_pos.Position < encodes[stack.Count - 1].Count) { // position in the range -> check substring cur_adv = stack.Peek().AdvRNA + Protein.UntranscribeRNA(encodes[stack.Count - 1][cur_pos.Position]); //adv_match = cur_pos.IsAdvMatch ? BioInf_PA1.FindPatternPositions(cur_adv, DNA) : null; adv_match = BioInf_PA1.FindPatternPositions(cur_adv, DNA); //todo Fix the check (is previous IsAdvMatch spoiled?) cur_rev = BioInf_PA1.ReverseComplement(cur_adv); //rev_match = cur_pos.IsRevMatch ? BioInf_PA1.FindPatternPositions(cur_rev, DNA) : null; rev_match = BioInf_PA1.FindPatternPositions(cur_rev, DNA); wr.WriteLine("Adv[{1}.{0}]: {2} - {3}", stack.Count, cur_pos.Position, cur_adv, adv_match != null ? adv_match.Count : 0); wr.WriteLine("Rev[{1}.{0}]: {2} - {3}", stack.Count, cur_pos.Position, cur_rev, rev_match != null ? rev_match.Count : 0); if (adv_match != null || rev_match != null) { // There are some matches if (stack.Count < encodes.Length) { // Not last level -> Go to next level (INTO) cur_pos.AdvRNA = cur_adv; cur_pos.IsAdvMatch = adv_match != null; cur_pos.IsRevMatch = rev_match != null; stack.Push(cur_pos); cur_pos.Position = 0; } else { // Last level -> Add the match and go to next substring (ADD + NEXT) if (adv_match != null) foreach(int pos in adv_match) matches.Add(pos, cur_adv); // Full string was matched if (rev_match != null) foreach(int pos in rev_match) matches.Add(pos, cur_rev); cur_pos.Position++; } } else { // Go to next substring on the current level (NEXT) cur_pos.Position++; } } else { // x out of the range for current level if (stack.Count > 1) { // Not the first level -> Go to previous level (OUT) cur_pos = stack.Pop(); cur_pos.Position++; } else { // Last substring of last level -> EXIT break; } } } while (true); string str = ""; foreach (KeyValuePair<int, string> kv in matches) { //str = str + kv.Value + " " + kv.Key + "\n"; str = str + kv.Value + " "; } return str.TrimEnd(' '); }
/// <summary> /// Peptide Encoding Problem: Find substrings of a genome encoding a given amino acid sequence. /// </summary> /// <param name="DNA">A DNA string</param> /// <param name="Peptide">An amino acid string</param> /// <returns>All substrings of Text encoding Peptide (if any such substrings exist).</returns> public static string PeptideEncoding(string DNA, string Peptide) { List <string>[] encodes = new List <string> [Peptide.Length]; Protein p = new Protein(); for (int i = 0; i < Peptide.Length; i++) { encodes[i] = p.GetPeptideEncodes(Peptide[i].ToString()); } string cur_adv, cur_rev; List <int> adv_match, rev_match; Dictionary <int, string> matches = new Dictionary <int, string>(); RNAConstructorState cur_pos = new RNAConstructorState(0, "", true, true); Stack <RNAConstructorState> stack = new Stack <RNAConstructorState>(); stack.Push(cur_pos); FileStream fs1 = File.OpenWrite(@".\PA2_2_debug.txt"); TextWriter wr = new StreamWriter(fs1); foreach (List <string> e in encodes) { foreach (string s in e) { wr.Write(s + " "); } wr.WriteLine(); } do { if (cur_pos.Position < encodes[stack.Count - 1].Count) // position in the range -> check substring { cur_adv = stack.Peek().AdvRNA + Protein.UntranscribeRNA(encodes[stack.Count - 1][cur_pos.Position]); //adv_match = cur_pos.IsAdvMatch ? BioInf_PA1.FindPatternPositions(cur_adv, DNA) : null; adv_match = BioInf_PA1.FindPatternPositions(cur_adv, DNA); //todo Fix the check (is previous IsAdvMatch spoiled?) cur_rev = BioInf_PA1.ReverseComplement(cur_adv); //rev_match = cur_pos.IsRevMatch ? BioInf_PA1.FindPatternPositions(cur_rev, DNA) : null; rev_match = BioInf_PA1.FindPatternPositions(cur_rev, DNA); wr.WriteLine("Adv[{1}.{0}]: {2} - {3}", stack.Count, cur_pos.Position, cur_adv, adv_match != null ? adv_match.Count : 0); wr.WriteLine("Rev[{1}.{0}]: {2} - {3}", stack.Count, cur_pos.Position, cur_rev, rev_match != null ? rev_match.Count : 0); if (adv_match != null || rev_match != null) { // There are some matches if (stack.Count < encodes.Length) { // Not last level -> Go to next level (INTO) cur_pos.AdvRNA = cur_adv; cur_pos.IsAdvMatch = adv_match != null; cur_pos.IsRevMatch = rev_match != null; stack.Push(cur_pos); cur_pos.Position = 0; } else // Last level -> Add the match and go to next substring (ADD + NEXT) { if (adv_match != null) { foreach (int pos in adv_match) { matches.Add(pos, cur_adv); // Full string was matched } } if (rev_match != null) { foreach (int pos in rev_match) { matches.Add(pos, cur_rev); } } cur_pos.Position++; } } else // Go to next substring on the current level (NEXT) { cur_pos.Position++; } } else // x out of the range for current level { if (stack.Count > 1) { // Not the first level -> Go to previous level (OUT) cur_pos = stack.Pop(); cur_pos.Position++; } else // Last substring of last level -> EXIT { break; } } } while (true); string str = ""; foreach (KeyValuePair <int, string> kv in matches) { //str = str + kv.Value + " " + kv.Key + "\n"; str = str + kv.Value + " "; } return(str.TrimEnd(' ')); }