Пример #1
0
        public void FrequentWordsWithMismatchesAndReverseComplements_TestPattern1()
        {
            // arrange
            string text = "ACGTTGCATGTCGCATGATGCATGAGAGCT";
            int k = 4;
            int d = 1;
            List<string> expected = new List<string>() { "ATGT", "ACAT" };

            // act
            BioInf_PA1 test_class = new BioInf_PA1();
            List<string> actual = test_class.FrequentWordsWithMismatchesAndReverseComplements(text, k, d);

            // assert
            actual.Sort();
            expected.Sort();
            CollectionAssert.AreEqual(expected, actual);
        }
Пример #2
0
        public void PrintFindMostFrequenlyKMers_TestPattern2()
        {
            // arrange
            string genome = "CGGAAGCGAGATTCGCGTGGCGTGATTCCGGCGGGCGTGGAGAAGCGAGATTCATTCAAGCCGGGAGGCGTGGCGTGGCGTGGCGTGCGGATTCAAGCCGGCGGGCGTGATTCGAGCGGCGGATTCGAGATTCCGGGCGTGCGGGCGTGAAGCGCGTGGAGGAGGCGTGGCGTGCGGGAGGAGAAGCGAGAAGCCGGATTCAAGCAAGCATTCCGGCGGGAGATTCGCGTGGAGGCGTGGAGGCGTGGAGGCGTGCGGCGGGAGATTCAAGCCGGATTCGCGTGGAGAAGCGAGAAGCGCGTGCGGAAGCGAGGAGGAGAAGCATTCGCGTGATTCCGGGAGATTCAAGCATTCGCGTGCGGCGGGAGATTCAAGCGAGGAGGCGTGAAGCAAGCAAGCAAGCGCGTGGCGTGCGGCGGGAGAAGCAAGCGCGTGATTCGAGCGGGCGTGCGGAAGCGAGCGG";
            int k = 12;
            string expected = "CGGCGGGAGATT CGGGAGATTCAA CGTGCGGCGGGA CGTGGAGGCGTG CGTGGCGTGCGG GCGTGCGGCGGG GCGTGGAGGCGT GCGTGGCGTGCG GGAGAAGCGAGA GGAGATTCAAGC GGCGGGAGATTC GGGAGATTCAAG GTGCGGCGGGAG TGCGGCGGGAGA";

            // act
            BioInf_PA1 test_class = new BioInf_PA1();
            string actual = test_class.PrintFindMostFrequenlyKMers(genome, k);

            // assert
            Assert.AreEqual(expected, actual);
        }
Пример #3
0
        public void PrintFindMostFrequenlyKMers_TestPattern1()
        {
            // arrange
            string genome = "ACGTTGCATGTCGCATGATGCATGAGAGCT";
            int k = 4;
            string expected = "CATG GCAT";

            // act
            BioInf_PA1 test_class = new BioInf_PA1();
            string actual = test_class.PrintFindMostFrequenlyKMers(genome, k);

            // assert
            Assert.AreEqual(expected, actual);
        }
Пример #4
0
        public void PrintClunps_TestPattern2()
        {
            // arrange
            string genome = "GCGGTTATGCACCGTTCAAATTAGCAAACCACTAAGCGACGTAGTCTGGATTGATTTCTCCCTACCAGTGACCCAAGACGCGTTAGTGAGTTAAGTTCATATCCAGTACCTGCCGCCCTCTGTACTTGGGCGTCCGATTCGCATGCTTACTCAGGTGGAGGACACGATAATCTGATTAAACTGAGCTAAACCAGGTGGAACCAGAAACCAGGTGGGGAGTCTCGCTTCAAGCCGTTCTTGCGATCAAACCAGGTGGTCCATTATGAAACCAGGTGGCTAAACCAGGTGGTCCAGATCCTCGAATGATGTCGGTGCACATCAAAACCAGGTGGGGTGGTGGAACGTAAAACCAGGTGGCATAAACCAGGTGGGCCGGTTCGTAAACCAGGTGAAACCAGGTGGGGTGGAAACCAGGTGGGTTACAAATTACGTTGAGATGGCCCAAACCAGGTGGTGGGCTTCACCCATGTCAACAAACCACCCTATGGAACTAAACCAGGTGGAACCAGGTGGTGAAGGCTTATCCTCAGGAAAAACCAGGTGGAGGTGGTGAAATAAAACCAGGTGGACCAGGTGGATAACCCTCGCCTCGCTTCTCAACCGAGACCTGGATAAACCAGGTGGGGTGGTCCACCGATTTTTGAGACACTAGAAACCAGGTGGGCGGGGAAACCAGGTGGCAAACCAGGTGGGGTGGACGGAAACCAGGTGGATATGTCATAAAACCAAACCAGGTGGTGCACCCCCATGGTGTGTCTTATCCGTGCGTATAAACCAGGTGGTCGCACGGCTTCCACTTGCTGAGAATAGGCCCGCAGGGTCAGTGCCATGCCCTCCGTCACTCGATATGTGTTGTAAGAGTGGTTACCCCTTCATTGAAGTCGCCCACAGCCCCACCTGCATTGCTAGACTATCACCCTACAGTAGGCCTTTTCGCCTTCTTCAAGCAGCAATCTCTTATCCGCGGATGGGCGCGGCGAGCGTGGCGTCCCCGAACATTTTTACCTAACGTGTTTTGTTGGCCGCAAGCCTTCCCTCTAGTCCACCTCAGCCATTCAGCCTAGTAGCTTTCAAGCCGAGCCTTCCATATCTAATGGACCGTCCAGAATTTCACACGTTTCACAGGGCTGTGTTCGACCGCCCGTAATGCTGTTTCACAGGCGATCGCCTTGCGGTTTTTTCACAGATCGCAGCCGATGGACATGCCAACTCGATTTTCACAGAGTTTTTCACAGCGGTTTCACAGCACAGCAGTGATTGTTTCACAGCAATTTTCACTTTCACAGGGGCCCTTTTCACAGCTCAGGGCTCTTTTCACTTTCACAGTTTCACAGCGCTCCTTTCACAGAGCGGGGAAATTTAAGGGAACACTCAAGGGAACAAGGGAACACACAAAGGGAACACAACACAACACATAAGGGAACACTTTCACAGAACACAAAAGTCCGAAATCATCAGCGGCGAAGGGATTTCACAGACAGACACTTTCACAGCGCATTTCACAGATACGTACTTTCACAGGCGTACTTTCACAGACTTTCACAGAGGACAAGCTCAATTTTCACAGACAGGCTGGATAAATTTCACAGCGGTAAGGGTTTCACAGCACACATAAGGGAACACGAATTTCACAGCAGGGAACACCTCTACGAGTAATCTATTACTCTACCTACTGAAGGGAACACACCGAAGACCTACTATTACCTATTACTCTTAAAGGGAACACATTACAAGGGAACACACTCTCTCGTCATATCTCACCTCTCTATTACTCTTAAGGGAACACCTTCTCGATCAACCTATTACTCTATGGAGATAGAGATATTCCAGACATATGGAGATAACATGGAGATATGGAGATAATGGAGATGGAGATAGCTCTTATATTTATCCTATGGAGATATGATACTATTAATGGAGATAATTCTAATGGAGATATAATTACTCTAAGAGGATGGGATCTCGGGCTATTACTCTAATGGAGATAAGCACTATTACTCTAGGAAATGGAGATATGTCAATGGAGATATGTAATGGAGATAGAGGGAGATGGAGTCGCCATTTCATAATCGCCATTTCATAGTTCAGGAATCGCCATTTCCGCCATTTCTAAGATGGAGTCGCCATTTCTACGTATGGAGATAGGATCGCCATTTCATACGACCCGTTGGATATCGCCATTTCCTCGCCATTTCTGGTGACATTTCTCGCCATTTCATTTCTGGAGATAGATGGATCTCGCCATTTCATAGGAATCGCCATTTCCACGTAGGGGGGGCCACAATCCGTAGGTCGGAATTCAGACTCGCCATTTCCCATCGCCATTTCTTCACCTGTATGCCGATCCCTTCGCCATTTCTCATGGAGATAACTCTCTCTCGCCATTTCTCGCCATTTCCATTTCACTCTCATTCGCCATCGCCATTTCCATTCGCCATTTCATCGCCATTTCTTCAGGATAAGATATCGCCATTTCGACTCTCATTCGCATACTGACTCTCATTCTCATCTCGCCATTTCTCATCTGACTCTCATCCTGGGGGAAACTTGCGACTCTCATCACACTTCCGTCGACTCTCATACTGGCGGATAGCATAGGAGCCATTTAAAGACTCTCATTCTCATTCGAGACTCTCATTCAAATCCTACGAGGACTCTCATATAGACTCTCATATCATTACGAGGACTCTCATATACGAGCCATGCATGTGGCGACGACTCTCATCTACGAGCCATGCAAGCAGAATCTACGAGCGACTCTCATTACGAGCCATGTGACCGTACGAGCCATGCATGCATGCCATGCTGACTCTCATCGAGTACGAGCCATGGAAGTTCTTGTTGGTTCGTAGCCCAAGAGCTGAAGTTACGAGCCTACGAGCCATGAAGTTACTTTTACGAGCCATGAAGCTTACGATACGAGCCATGCGAGCCATGCATCCGCGCTACGAGCCATGTTCCAGTACGAGCCATGTTAGTTGCTGAAGTTAAGTTTGGCGCTGAAGTTTGTACGAGCCATGTGCCCGCTGAAGTTTGTTGTACGAGCCATGCATGCTGAAGTTAATGGCTGAAGTTAGCGTTTGCGGGCAGATCCTCATTCTACGATACGAGCCATGCCATGCAGCTGAAGTTAAGTTGGGTTACGAGCCATGCGAGCCATGTGAAGTACGAGCCATGCTGGCTGAAGTTGTTTGTGCTGCTGAAGTTGCTCTTGTCTCTAGCTGAAGTTGCCAACAGGGCTGAAGCTGAAGTTTAAGCTGAAGTTGCGAGCAGGCTGAAGTTATCGGATTGGGGCTGAAGTTCAACCTCCCGTCCCCCCACACTATATTCCCGTCCCCCCCCGCGCACGCGCCGTCTCCCGTCCCCCCTATCCCGTGCGCACGCGACGCGATCCCGTCCCCCCAGAGTGCGCGCACGCGTCCCCCTTCCCGTCCCCCTCTCCCGGGCGCACGCGTCGCTCAACATTTCCGCGCACGCGTCGCGCACGCGGGCGCACGCGGGTCCCGTCCCCCCCCCTCTTCGGCGCACGCGGAATTCCCGTCGCGCACGCGTCCCGTCCCGCGCACGCGTCGCGCACGCGACTGCCCTAACCAACAGTGCGCACGCGCCGGTAACCCGGTAACCCGGTAACCGCGCACGCGGGCGCACGCGCGTAACCCGCGCACGCGCCGCGCACGCGGCCCGGTTCCCGTCCCCCCCGGTAACCCGGTAACTCCCGTCCCCCGTAACCCGGTGCGCACGCGCCCGGCGCACGCGGAGCGCACGCGCCCCCCCCGGTAATAGCGCACGCGCCCGGGCGCACGCGCCCGGTAACCCGGTAACCCGGGCGCGCGCACGCGGCGGCGCACGCGGCGCACGCGGCGCACGCG";
            int k = 11;
            int L = 566;
            int t = 18;
            string expected = "AAACCAGGTGG";

            // act
            BioInf_PA1 test_class = new BioInf_PA1();
            string actual = test_class.PrintClunps(genome, k, L, t);

            // assert
            Assert.AreEqual(expected, actual);
        }
Пример #5
0
        public void PrintClunps_TestPattern1()
        {
            // arrange
            string genome = "CGGACTCGACAGATGTGAAGAACGACAATGTGAAGACTCGACACGACAGAGTGAAGAGAAGAGGAAACATTGTAA";
            int k = 5;
            int L = 50;
            int t = 4;
            string expected = "CGACA GAAGA";

            // act
            BioInf_PA1 test_class = new BioInf_PA1();
            string actual = test_class.PrintClunps(genome, k, L, t);

            // assert
            Assert.AreEqual(expected, actual);
        }
Пример #6
0
        /// <summary>
        /// Peptide Encoding Problem: Find substrings of a genome encoding a given amino acid sequence.
        /// </summary>
        /// <param name="DNA">A DNA string</param>
        /// <param name="Peptide">An amino acid string</param>
        /// <returns>All substrings of Text encoding Peptide (if any such substrings exist).</returns>
        public static string PeptideEncoding(string DNA, string Peptide)
        {
            List <string>[] encodes = new List <string> [Peptide.Length];
            Protein         p       = new Protein();

            for (int i = 0; i < Peptide.Length; i++)
            {
                encodes[i] = p.GetPeptideEncodes(Peptide[i].ToString());
            }

            string     cur_adv, cur_rev;
            List <int> adv_match, rev_match;
            Dictionary <int, string> matches = new Dictionary <int, string>();

            RNAConstructorState         cur_pos = new RNAConstructorState(0, "", true, true);
            Stack <RNAConstructorState> stack   = new Stack <RNAConstructorState>();

            stack.Push(cur_pos);

            FileStream fs1 = File.OpenWrite(@".\PA2_2_debug.txt");
            TextWriter wr  = new StreamWriter(fs1);

            foreach (List <string> e in encodes)
            {
                foreach (string s in e)
                {
                    wr.Write(s + " ");
                }
                wr.WriteLine();
            }
            do
            {
                if (cur_pos.Position < encodes[stack.Count - 1].Count)       // position in the range -> check substring
                {
                    cur_adv = stack.Peek().AdvRNA + Protein.UntranscribeRNA(encodes[stack.Count - 1][cur_pos.Position]);
                    //adv_match = cur_pos.IsAdvMatch ? BioInf_PA1.FindPatternPositions(cur_adv, DNA) : null;
                    adv_match = BioInf_PA1.FindPatternPositions(cur_adv, DNA);  //todo Fix the check (is previous IsAdvMatch spoiled?)
                    cur_rev   = BioInf_PA1.ReverseComplement(cur_adv);
                    //rev_match = cur_pos.IsRevMatch ? BioInf_PA1.FindPatternPositions(cur_rev, DNA) : null;
                    rev_match = BioInf_PA1.FindPatternPositions(cur_rev, DNA);
                    wr.WriteLine("Adv[{1}.{0}]: {2} - {3}", stack.Count, cur_pos.Position, cur_adv, adv_match != null ? adv_match.Count : 0);
                    wr.WriteLine("Rev[{1}.{0}]: {2} - {3}", stack.Count, cur_pos.Position, cur_rev, rev_match != null ? rev_match.Count : 0);
                    if (adv_match != null || rev_match != null)
                    {
                        // There are some matches
                        if (stack.Count < encodes.Length)
                        {
                            // Not last level -> Go to next level (INTO)
                            cur_pos.AdvRNA     = cur_adv;
                            cur_pos.IsAdvMatch = adv_match != null;
                            cur_pos.IsRevMatch = rev_match != null;
                            stack.Push(cur_pos);
                            cur_pos.Position = 0;
                        }
                        else    // Last level -> Add the match and go to next substring (ADD + NEXT)
                        {
                            if (adv_match != null)
                            {
                                foreach (int pos in adv_match)
                                {
                                    matches.Add(pos, cur_adv); // Full string was matched
                                }
                            }
                            if (rev_match != null)
                            {
                                foreach (int pos in rev_match)
                                {
                                    matches.Add(pos, cur_rev);
                                }
                            }
                            cur_pos.Position++;
                        }
                    }
                    else        // Go to next substring on the current level (NEXT)
                    {
                        cur_pos.Position++;
                    }
                }
                else                    // x out of the range for current level
                {
                    if (stack.Count > 1)
                    {
                        // Not the first level -> Go to previous level (OUT)
                        cur_pos = stack.Pop();
                        cur_pos.Position++;
                    }
                    else                // Last substring of last level -> EXIT
                    {
                        break;
                    }
                }
            } while (true);



            string str = "";
            foreach (KeyValuePair <int, string> kv in matches)
            {
                //str = str + kv.Value + " " + kv.Key + "\n";
                str = str + kv.Value + " ";
            }
            return(str.TrimEnd(' '));
        }