Exemplo n.º 1
0
        public void TestContigToString()
        {
            // test parameters
            int    matchScore         = 5;
            int    mismatchScore      = -4;
            int    gapCost            = -10;
            double mergeThreshold     = 4;
            double consensusThreshold = 66;

            Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new NeedlemanWunschAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);
            Contig contig0        = seqAssembly.Contigs[0];
            string actualString   = contig0.ToString();
            string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]";

            Assert.AreEqual(actualString, expectedString);
        }
Exemplo n.º 2
0
        public void TestContigToString()
        {
            // test parameters
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;

            Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost      = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false,
            };

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> {
                seq1, seq2
            });
            Contig contig0      = seqAssembly.Contigs[0];
            string actualString = contig0.ToString();
            //string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]";
            string expectedString = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTR... +[1678]";

            Assert.AreEqual(actualString, expectedString);
        }
Exemplo n.º 3
0
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            OverlapDeNovoAssembler SSA = new OverlapDeNovoAssembler();

            Consensus = SSA.Assemble(Sequences);
            Contigs   = ((IOverlapDeNovoAssembly)Consensus).Contigs;

            return(ActivityExecutionStatus.Closed);
        }
Exemplo n.º 4
0
        public void TestOverlapDenovoAssemblyToString()
        {
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;

            ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            IOverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost      = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var inputs = new List <ISequence> {
                seq1, seq2
            };
            var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            assembler.OverlapAlgorithm = new SmithWatermanAligner();
            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]\r\n".Replace("\r\n", Environment.NewLine);
            string actualString   = seqAssembly.ToString();

            Assert.AreEqual(expectedString, actualString);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Do a simple sequence assembly.
        /// This sample uses NeedlemanWunschAligner.
        /// </summary>
        /// <param name="sequences">List of sequences to assemble.</param>
        /// <returns>IDeNovoAssembly which has the assembled result.</returns>
        static IDeNovoAssembly DoSimpleSequenceAssemble(List <ISequence> sequences)
        {
            // $TODO: Change the signature and initialization after OverlapDeNovoAssembler is migrated to PhaseOne
            // Create an assembler
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            // Setup the parameters
            // $TODO: Change the NeedlemanWunschAligner, DiagonalSimilarityMatrix, SimpleConsensusResolve initialization
            // after OverlapDeNovoAssembler is migrated to PhaseOne
            assembler.OverlapAlgorithm = new NeedlemanWunschAligner();
            assembler.OverlapAlgorithm.SimilarityMatrix = new DiagonalSimilarityMatrix(5, -4);
            assembler.OverlapAlgorithm.GapOpenCost      = -10;
            assembler.ConsensusResolver         = new Bio.SimpleConsensusResolver(66);
            assembler.AssumeStandardOrientation = false;

            return(assembler.Assemble(sequences));
        }
Exemplo n.º 6
0
        public void TestSimpleSequenceAssembler()
        {
            Trace.Set(Trace.AssemblyDetails);   // turn on log dump

            // test parameters
            int    matchScore         = 1;
            int    mismatchScore      = -8;
            int    gapCost            = -8;
            double mergeThreshold     = 4;
            double consensusThreshold = 66;

            Sequence seq1 = new Sequence(Alphabets.DNA, "GCCAAAATTTAGGC");
            Sequence seq2 = new Sequence(Alphabets.DNA, "TTATGGCGCCCACGGA");
            Sequence seq3 = new Sequence(Alphabets.DNA, "TATAAAGCGCCAA");

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);
            inputs.Add(seq3);

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];

            Assert.AreEqual("TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT", contig0.Consensus.ConvertToString());
            Assert.AreEqual(3, contig0.Sequences.Count);
        }
Exemplo n.º 7
0
        public static async void AssemblySequences(string fastqFileName)
        {
            var parser = new FastQParser();
            List <IQualitativeSequence> sequences = new List <IQualitativeSequence>();

            using (var fileStream = new FileStream(fastqFileName, FileMode.Open))
            {
                sequences = parser.Parse(fileStream).ToList();
            }
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
            IDeNovoAssembly        assembly  = assembler.Assemble(sequences);

            FastAFormatter outputFormatter = new FastAFormatter();

            outputFormatter.Open("assembled_sequences.fasta");
            outputFormatter.Format(assembly.AssembledSequences);
            outputFormatter.Close();
        }
Exemplo n.º 8
0
        /// <summary>
        /// When implemented in a derived class, performs the execution of the activity.
        /// </summary>
        /// <returns>
        /// The result of the activity’s execution.
        /// </returns>
        /// <param name="context">The execution context under which the activity executes.</param>
        protected override IDeNovoAssembly Execute(CodeActivityContext context)
        {
            // Setup the aligner to use
            string alignerName = (AlignerName ?? DefaultAligner).ToLowerInvariant();
            var    aligner     = SequenceAligners.All.FirstOrDefault(sa => sa.Name.ToLowerInvariant() == alignerName);

            if (aligner == null)
            {
                throw new ArgumentException("Could not find aligner: " + alignerName);
            }

            aligner.GapOpenCost      = GapOpenCost;
            aligner.GapExtensionCost = GapExtensionCost;

            var smName = SimilarityMatrix ?? DefaultMatrix;

            SimilarityMatrix.StandardSimilarityMatrix sm;
            if (Enum.TryParse(smName, true, out sm))
            {
                aligner.SimilarityMatrix = new SimilarityMatrix(sm);
            }

            // Create the assembler
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                AssumeStandardOrientation = this.AssumeStandardOrientation,
                MergeThreshold            = this.MergeThreshold,
                OverlapAlgorithm          = aligner
            };

            var consensus = assembler.Assemble(Sequences.Get(context));

            Contigs.Set(context, ((IOverlapDeNovoAssembly)consensus).Contigs);

            return(consensus);
        }
Exemplo n.º 9
0
        public void ValidateOverlapDenovoAssemblyToString()
        {
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;

            ISequence seq1 = new Sequence(Alphabets.DNA,
                                          this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                               Constants.Seq1StrNode));
            ISequence seq2 = new Sequence(Alphabets.DNA,
                                          this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                               Constants.Seq2StrNode));

            var assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix =
                        new DiagonalSimilarityMatrix(matchScore,
                                                     mismatchScore),
                    GapOpenCost = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var inputs = new List <ISequence> {
                seq1, seq2
            };
            var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            assembler.OverlapAlgorithm = new SmithWatermanAligner
            {
                SimilarityMatrix =
                    new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                GapOpenCost = gapCost
            };
            seqAssembly = (OverlapDeNovoAssembly)assembler.Assemble(inputs);

            string       actualString   = seqAssembly.ToString();
            const string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]";

            Assert.AreEqual(expectedString, actualString.Replace(System.Environment.NewLine, ""));

            // Get the parameters from Xml
            int matchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null);
            int mismatchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode),
                    null);
            int gapCost1 =
                int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode),
                          null);
            double mergeThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode),
                    null);
            double consensusThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                         Constants.ConsensusThresholdNode), null);

            string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode1);
            string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode2);
            string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode3);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                         Constants.AlphabetNameNode));

            var seq4 = new Sequence(alphabet, sequence1);
            var seq5 = new Sequence(alphabet, sequence2);
            var seq6 = new Sequence(alphabet, sequence3);

            var assembler1 = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold1,
                OverlapAlgorithm = new PairwiseOverlapAligner
                {
                    SimilarityMatrix =
                        new DiagonalSimilarityMatrix(matchScore1,
                                                     mismatchScore1),
                    GapOpenCost = gapCost1,
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold1),
                AssumeStandardOrientation = false,
            };

            var inputs1 = new List <ISequence> {
                seq4, seq5, seq6
            };

            // Assembles all the sequences.
            seqAssembly = (OverlapDeNovoAssembly)assembler1.Assemble(inputs1);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            assembler1.OverlapAlgorithm = new SmithWatermanAligner();
            seqAssembly = (OverlapDeNovoAssembly)assembler1.Assemble(inputs1);

            string expectedString1 = "TYMKWRRGCGCCAAAATTTAGGC" + System.Environment.NewLine;

            actualString = seqAssembly.ToString();
            Assert.AreEqual(expectedString1, actualString);
        }
Exemplo n.º 10
0
        public void ValidateContigToString()
        {
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;
            string       seq2Str            = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode);
            string       seq1Str            = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode);

            ISequence seq1 = new Sequence(Alphabets.DNA, seq1Str);
            ISequence seq2 = new Sequence(Alphabets.DNA, seq2Str);

            var assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix =
                        new DiagonalSimilarityMatrix(matchScore,
                                                     mismatchScore),
                    GapOpenCost = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> {
                seq1, seq2
            });

            Contig contig0        = seqAssembly.Contigs[0];
            string actualString   = contig0.ToString();
            string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                         Constants.OverlapDenovoExpectedNode);

            Assert.AreEqual(expectedString.Replace("\\r\\n", ""), actualString.Replace("\r\n", ""));

            // Get the parameters from Xml
            int matchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null);
            int mismatchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode),
                    null);
            int gapCost1 =
                int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode),
                          null);
            double mergeThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode),
                    null);
            double consensusThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                         Constants.ConsensusThresholdNode), null);
            string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode1);
            string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode2);
            string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode3);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                         Constants.AlphabetNameNode));

            ISequence seq4 = new Sequence(alphabet, sequence1);
            ISequence seq5 = new Sequence(alphabet, sequence2);
            ISequence seq6 = new Sequence(alphabet, sequence3);

            var assembler1 = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold1,
                OverlapAlgorithm = new PairwiseOverlapAligner
                {
                    SimilarityMatrix =
                        new DiagonalSimilarityMatrix(matchScore1,
                                                     mismatchScore1),
                    GapOpenCost = gapCost1
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold1),
                AssumeStandardOrientation = false
            };

            // Assembles all the sequences.
            var seqAssembly1 = (IOverlapDeNovoAssembly)assembler1.Assemble(new List <ISequence> {
                seq4, seq5, seq6
            });
            Contig       contig1         = seqAssembly1.Contigs[0];
            string       actualString1   = contig1.ToString();
            const string expectedString1 = "TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT";

            Assert.AreEqual(expectedString1, actualString1);
        }
Exemplo n.º 11
0
        public void TestSimpleSequenceAssemblerWithSwineflu()
        {
            Trace.Set(Trace.AssemblyDetails);   // turn on log dump
            // test parameters
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;

            ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost      = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var inputs = new List <ISequence> {
                seq1, seq2
            };
            var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0  = seqAssembly.Contigs[0];
            string expected = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTRYAGRYWMWYWWKSKAWMRRTTMWMMWGMSAMYRWWKMMACAGWMMYWGWARASAMWGTAMYAGWAAMRMAYKYWRYWRWMMYWCWMKMWGWYAASCWTMWMGRRRAMMWRYRYAAMSKRARASKRKKMRMMCYAWKRSRWKTRGSYMMATKKMAYWTKGSTRRMTGKAWCMTKGSWRRYYSRRWSYKKGRAWMWCYMKMSWSWGMAWSMYYMTSSWCMKMMAKYKYRKRRWCMTMYAKTKYRGAMAMWKSWASKTSWKACMMWGGARMKTKYWWCSMWKRWGAKKWSMTMRRWKAKSARKWGMKMWSAGWGYMATYRWKYKMARKGTYWKMRWTWKWMMSSWWKRMRAKWTYMYSSMMSAMWMRTKMMTSGMMCAAWSRTGWMWCGRMMRMAKGTSYWMMKGCWGSAKSWMMWMRYKYYKRMRMAAAWWKMWTMTRSMWARWTWWAAWAKGRMWWKYWWAMMMARRRMWYWSMWAMYCMWASMTYARYRAWWMMKRSAWWRAWGWYMWMGKGMWAKRRGKCMTYSWSCWWYSRKSYAYTMRYSMTSMMYMWMMWAGTSYYKAYCARMAWRSWSWYKMWYAKRWTKYWGWKGSRWMWKYWWKWKWSRGSWMRWMRWKMWAKMSSRARAWRKYMAWRMSRSMMAWAGYRAKRRRWCMMRAAGKGAGRRWKMAMKAWKRSWGRAYRMWMKWWKASYSGRSASWMRWARWRMCRKKMGAMRMAAYWRSAWWYSWAGYRRYWSSRARWYWWGYRKTMSCRAKRKAWRSAWWYGCWRKRKMWRGWAWTRYYRKWTCWGRTAYWMYMRTYYMMGATWSMMMWRYMMMYKRTYRSAMWMCMAMKKGTSMKAYAMMCAMSRGYSYYMYAWWYMMSARYMTMCMWYYKMWSAMWATWSRWMMRWKYMCAAWWKRWRWAWRWMSMAMAWAWKTRARAMKSRCMAMAKKRWKRMKGRMYRYMSSRTYKAKKMAWKYYMSRKSYMTWYWWKSKRSMRKYSYMKKTKKSRYYRWWGSSGGKTKSAYWGRRRKGGKRKRKRSAKGGWWSGKWKATSRMYRKYAMRRTKAKCASSRKYMARRWKAKSMRGSSKMMSKRWAKRSMRCMSASMWKRMSAKYRMMSAGAWTRCYAWYRAMGWRAWTWCTRWYAWWGWAAAKWYKRWTAYWSARWWSAYRRMWRTASRKWWMRMRKYMRWMSRYMWRGARWWMARMMWMSWGRAWWWAARWAWARARAWTKWWRATRRWWWMSTKGAYRWTKGKWYYYWSRAYRYYKRRMYKTWSRWTSYMKWRSWRWWKGWWMKAWYKKWRRAYKAMMRMRMTTYRRAYKWSMASRAYTYRWATGWRAAGRWMWKAWRYSARWWRRWAARMARYSMSMWRRAAAWYRRWRMCRRSKRMWTTGRAWWYKRCYRCWWWKRMKWTWACMMSWRMWKSGAWARYRYSWRMAWKGRRASTKWYRAMWAYSSRAMWTAYKMMKASSMARMAWAMTYARASRRAGMARAAWTARAYRGRGWARARMTRGAWKSRRYAARGMTKKAMYMRAYWWKGRYKWWCYAKWYWWYKGYSRYCWRTTCAWYKGTMSYSRKWKYMTYSSTRSKGGYARTCWSYYTSKGGRYRWKCWSTWWYKGGWYKYKMYMKWRTRGRWYWYKWMWKTRWAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";

            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "NeedlemanWunschAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new SmithWatermanAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0  = seqAssembly.Contigs[0];
            expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "SmithwatermanAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0  = seqAssembly.Contigs[0];
            expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "PairwiseOverlapAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new MUMmerAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "MUMmerAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);
        }
Exemplo n.º 12
0
        public void TestSimpleSequenceAssemblerWithRandomSequence()
        {
            // Test parameters.
            //
            // In theory, as long as all positions in the master sequence are
            // covered by at least one read, we should be able to pass this test.
            // But some parameter settings will make the test fail, for
            // various reasons, including:
            // 1. Short reads, caused by the strategy used to ensure full coverage
            //  at the ends, might not score well enough to merge.
            // 2. Uncovered positions are always possible due to the random
            //  generation of reads. (Increasing the number of reads helps with this)
            // 3. The assembler might construct the reverse or complement (or both)
            //  of the master sequence.
            // 4. Too low a merge threshold could cause incorrect merges, which
            //  the algorithm will not repair.
            int        matchScore          = 1;
            int        mismatchScore       = -8;
            int        gapCost             = -8;
            double     mergeThreshold      = 3;
            double     consensusThreshold  = 99;
            const int  MasterLength        = 100;
            const int  MinReadLength       = 10;
            const int  MaxReadLength       = 30;
            const int  NumReads            = 200;
            const bool AssumeOrientedReads = true;

            // if this is uncommented, assembly details appear in log.
            // this is extremely verbose.
            // Trace.Set(Trace.AssemblyDetails);

            // make random master sequence
            // (use seed for repeatability, or omit seed for
            // different test each time)
            // Random randGen = new Random();
            Random randGen = new Random(654321);

            StringBuilder randSeq = new StringBuilder();

            for (int i = 0; i < MasterLength; ++i)
            {
                int randm = randGen.Next(8);
                if (randm < 2)
                {
                    randSeq.Append('A');
                }
                else if (randm < 4)
                {
                    randSeq.Append('C');
                }
                else if (randm < 6)
                {
                    randSeq.Append('G');
                }
                else
                {
                    randSeq.Append('T');
                }
            }

            Sequence master = new Sequence(Alphabets.AmbiguousDNA, randSeq.ToString());

            // create the reads
            List <ISequence> inputs = new List <ISequence>();

            for (int i = 0; i < NumReads; ++i)
            {
                // try for uniform coverage clear to the ends (this can lead to short reads, though)
                int       rndPos  = Math.Max(0, randGen.Next(-MinReadLength, MasterLength - 1));
                int       rndLen  = Math.Min(MasterLength - rndPos, randGen.Next(MinReadLength, MaxReadLength + 1));
                string    data    = master.ConvertToString().Substring(Math.Max(0, rndPos), rndLen);
                bool      revcomp = randGen.Next(2) > 0;
                bool      reverse = randGen.Next(2) > 0 && !AssumeOrientedReads;
                ISequence read;
                if (reverse && revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString());
                }
                else if (revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString());
                }
                else
                {
                    read = new Sequence(Alphabets.DNA, data);
                }

                ApplicationLog.WriteLine("read {0}: {1}", i, read);
                inputs.Add(read);
            }

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = AssumeOrientedReads;

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            ApplicationLog.WriteLine(
                "Assembly finished. Contigs: {0}. Unmerged sequences: {1}.",
                seqAssembly.Contigs.Count,
                seqAssembly.UnmergedSequences.Count);
            Contig contig0 = seqAssembly.Contigs[0];

            ApplicationLog.WriteLine("master sequence and contig 0 consensus:");
            ApplicationLog.WriteLine(master.ConvertToString());
            ApplicationLog.WriteLine(contig0.Consensus.ConvertToString());

            Assert.AreEqual(2, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            // note that this is tricky, esp. without oriented reads - consensus
            // could be reversed and/or complemented relative to original
            Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString());
        }
Exemplo n.º 13
0
        public void TestSimpleSequenceAssemblerWithSemiRandomSequence()
        {
            // test parameters
            int        matchScore          = 1;
            int        mismatchScore       = -8;
            int        gapCost             = -8;
            double     mergeThreshold      = 4;
            double     consensusThreshold  = 66;
            const int  MasterLength        = 30;
            const int  ReadLength          = 10;
            const int  NumReads            = 5;
            const bool AssumeOrientedReads = false;

            // if this is uncommented, assembly details appear in log.
            // this is extremely verbose.
            Trace.Set(Trace.AssemblyDetails);

            // make random master sequence
            // (use seed for repeatability, or omit seed for
            // different test each time)
            // Random randGen = new Random();
            Random randGen = new Random(654321);

            StringBuilder randSeq = new StringBuilder();

            for (int i = 0; i < MasterLength; ++i)
            {
                int randm = randGen.Next(8);
                if (randm < 2)
                {
                    randSeq.Append('A');
                }
                else if (randm < 4)
                {
                    randSeq.Append('C');
                }
                else if (randm < 6)
                {
                    randSeq.Append('G');
                }
                else
                {
                    randSeq.Append('T');
                }
            }

            Sequence master = new Sequence(Alphabets.DNA, randSeq.ToString());

            // create the reads
            List <ISequence> inputs = new List <ISequence>();

            for (int i = 0; i < NumReads; ++i)
            {
                int       pos     = 5 * i;
                string    data    = master.ConvertToString().Substring(pos, ReadLength);
                bool      revcomp = randGen.Next(2) > 0;
                bool      reverse = randGen.Next(2) > 0 && !AssumeOrientedReads;
                ISequence read;
                if (reverse && revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString());
                }
                else if (revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString());
                }
                else
                {
                    read = new Sequence(Alphabets.DNA, data);
                }

                inputs.Add(read);
            }

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = AssumeOrientedReads;

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];

            ApplicationLog.WriteLine("master sequence and contig 0 consensus:");
            ApplicationLog.WriteLine(master.ConvertToString());
            ApplicationLog.WriteLine(contig0.Consensus.ConvertToString());

            // note that this is tricky, esp. without oriented reads - consensus
            // could be reversed and/or complemented relative to original
            Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString());
        }
Exemplo n.º 14
0
        /// <summary>
        /// Validate Sequence Assembler Test cases based on additional parameter values
        /// </summary>
        /// <param name="additionalParameter">Addtional parameters</param>
        IOverlapDeNovoAssembly GetSequenceAssembly(string additionalParameter)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                       Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                          Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(
                                                     Constants.AssemblyAlgorithmNodeName,
                                                     Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.ConsensusThresholdNode), null);
            string sequence1 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode1);
            string sequence2 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode2);
            string sequence3 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode3);
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.AlphabetNameNode));

            // Log based on the test cases
            switch (additionalParameter)
            {
            case "consensus":
                // Logs the sequences
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3));
                break;

            default:
                // Logs the sequences
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3));
                break;
            }

            Sequence seq1 = new Sequence(alphabet, sequence1);
            Sequence seq2 = new Sequence(alphabet, sequence2);
            Sequence seq3 = new Sequence(alphabet, sequence3);

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix =
                new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);
            inputs.Add(seq3);

            // Assembles all the sequences.
            return((IOverlapDeNovoAssembly)assembler.Assemble(inputs));
        }
Exemplo n.º 15
0
        public void ValidateOverlapDenovoAssemblyToString()
        {
            int    matchScore         = 5;
            int    mismatchScore      = -4;
            int    gapCost            = -10;
            double mergeThreshold     = 4;
            double consensusThreshold = 66;
            string seq2Str            = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                        Constants.Seq2StrNode);
            string seq1Str = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                             Constants.Seq1StrNode);
            Sequence seq2 = new Sequence(Alphabets.DNA, seq2Str);
            Sequence seq1 = new Sequence(Alphabets.DNA, seq1Str);

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new NeedlemanWunschAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix =
                new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            assembler.OverlapAlgorithm = new SmithWatermanAligner();
            seqAssembly = (OverlapDeNovoAssembly)assembler.Assemble(inputs);

            string actualString   = seqAssembly.ToString();
            string expectedString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                    Constants.OverlapDenovoExpectedNode);

            Assert.AreEqual(actualString.Replace("\r\n", ""), expectedString.Replace("\\r\\n", ""));


            // Get the parameters from Xml
            int matchScore1 = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                        Constants.MatchScoreNode), null);
            int mismatchScore1 = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                           Constants.MisMatchScoreNode), null);
            int gapCost1 = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                     Constants.GapCostNode), null);
            double mergeThreshold1 = double.Parse(utilityObj.xmlUtil.GetTextValue(
                                                      Constants.AssemblyAlgorithmNodeName,
                                                      Constants.MergeThresholdNode), null);
            double consensusThreshold1 = double.Parse(utilityObj.xmlUtil.GetTextValue(
                                                          Constants.AssemblyAlgorithmNodeName,
                                                          Constants.ConsensusThresholdNode), null);
            string sequence1 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode1);
            string sequence2 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode2);
            string sequence3 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode3);
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.AlphabetNameNode));

            Sequence seq4 = new Sequence(alphabet, sequence1);
            Sequence seq5 = new Sequence(alphabet, sequence2);
            Sequence seq6 = new Sequence(alphabet, sequence3);

            OverlapDeNovoAssembler assembler1 = new OverlapDeNovoAssembler();

            assembler1.MergeThreshold   = mergeThreshold1;
            assembler1.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler1.OverlapAlgorithm).SimilarityMatrix =
                new DiagonalSimilarityMatrix(matchScore1, mismatchScore1);
            ((IPairwiseSequenceAligner)assembler1.OverlapAlgorithm).GapOpenCost = gapCost1;
            assembler1.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold1);
            assembler1.AssumeStandardOrientation = false;

            List <ISequence> inputs1 = new List <ISequence>();

            inputs1.Add(seq4);
            inputs1.Add(seq5);
            inputs1.Add(seq6);

            // Assembles all the sequences.
            IOverlapDeNovoAssembly seqAssembly1 = (IOverlapDeNovoAssembly)assembler1.Assemble(inputs1);

            Assert.AreEqual(0, seqAssembly1.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly1.Contigs.Count);
            assembler1.OverlapAlgorithm = new SmithWatermanAligner();
            seqAssembly = (OverlapDeNovoAssembly)assembler1.Assemble(inputs1);

            string actualString1   = seqAssembly.ToString();
            string expectedString1 = "TYMKWRRGCGCCAAAATTTAGGC\r\n";

            Assert.AreEqual(actualString1, expectedString1);
        }
Exemplo n.º 16
0
        /// <summary>
        /// Validate Sequence Assembler Test cases based on additional parameter values
        /// </summary>
        /// <param name="additionalParameter">Addtional parameters</param>
        void ValidateSequenceAssemblerGeneral(string additionalParameter)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                         Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                            Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                      Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                     Constants.AssemblyAlgorithmNodeName,
                                                     Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.ConsensusThresholdNode), null);
            string sequence1 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                 Constants.SequenceNode1);
            string sequence2 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                 Constants.SequenceNode2);
            string sequence3 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                 Constants.SequenceNode3);
            IAlphabet alphabet = Utility.GetAlphabet(_utilityObj._xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.AlphabetNameNode));
            MoleculeType molType = Utility.GetMoleculeType(_utilityObj._xmlUtil.GetTextValue(
                                                               Constants.AssemblyAlgorithmNodeName,
                                                               Constants.MoleculeTypeNode));

            // Log based on the test cases
            switch (additionalParameter)
            {
            case "consensus":
                // Logs the sequences
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3));
                break;

            default:
                // Logs the sequences
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3));
                break;
            }

            Sequence seq1 = new Sequence(alphabet, sequence1);
            Sequence seq2 = new Sequence(alphabet, sequence2);
            Sequence seq3 = new Sequence(alphabet, sequence3);

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix =
                new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);
            inputs.Add(seq3);

            // Assembles all the sequences.
            IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            // Get the parameters from Xml in general
            int contigSequencesCount = int.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                     Constants.AssemblyAlgorithmNodeName,
                                                     Constants.ContigSequencesCountNode), null);
            string contigConsensus = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                       Constants.ContigConsensusNode);

            switch (additionalParameter.ToLower(CultureInfo.CurrentCulture))
            {
            case "assemble":
                // Get the parameters from Xml for Assemble() method test cases.
                int unMergedCount = int.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                  Constants.AssemblyAlgorithmNodeName,
                                                  Constants.UnMergedSequencesCountNode), null);
                int contigsCount = int.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                 Constants.AssemblyAlgorithmNodeName,
                                                 Constants.ContigsCountNode), null);

                Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count);
                Assert.AreEqual(contigsCount, assembly.Contigs.Count);
                Contig contigRead = assembly.Contigs[0];

                // Logs the concensus
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.",
                                                       assembly.UnmergedSequences.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Contigs Count is '{0}'.",
                                                       assembly.Contigs.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Contig Sequences Count is '{0}'.",
                                                       contigRead.Sequences.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                       contigRead.Consensus.ToString()));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                contigRead.Consensus.ToString()));

                Assert.AreEqual(contigConsensus, contigRead.Consensus.ToString());
                Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count);
                break;

            case "contig":
                // Read the contig from Contig method.
                Contig contigsRead = assembly.Contigs[0];

                // Log the required info.
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                       contigsRead.Consensus.ToString()));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                contigsRead.Consensus.ToString()));

                ApplicationLog.WriteLine("SequenceAssembly BVT : Successfully read the Contig.");
                Console.WriteLine("SequenceAssembly BVT : Successfully read the Contig.");

                Assert.AreEqual(contigConsensus, contigsRead.Consensus.ToString());
                Assert.AreEqual(contigSequencesCount, contigsRead.Sequences.Count);
                break;

            case "consensus":
                // Read the contig from Contig method.
                Contig contigReadForConsensus = assembly.Contigs[0];
                contigReadForConsensus.Consensus = null;
                OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler();
                simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
                simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus);

                // Log the required info.
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                                       contigReadForConsensus.Consensus.ToString()));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                                contigReadForConsensus.Consensus.ToString()));
                Assert.AreEqual(contigConsensus, contigReadForConsensus.Consensus.ToString());
                break;

            default:
                break;
            }
        }
Exemplo n.º 17
0
        public void TestSimpleSequenceAssemblerWithSwineflu()
        {
            Trace.Set(Trace.AssemblyDetails);   // turn on log dump
            // test parameters
            int    matchScore         = 5;
            int    mismatchScore      = -4;
            int    gapCost            = -10;
            double mergeThreshold     = 4;
            double consensusThreshold = 66;

            Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new NeedlemanWunschAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];

            Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings());
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new SmithWatermanAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings());
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings());
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new MUMmerAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings());
            Assert.AreEqual(2, contig0.Sequences.Count);
        }
Exemplo n.º 18
0
        /// <summary>
        ///     Validates the Sequence Assembler for all the general test cases.
        /// </summary>
        /// <param name="nodeName">Xml Node Name</param>
        /// <param name="additionalParameter">
        ///     Additional Parameter based
        ///     on which the validations are done.
        /// </param>
        /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param>
        private void ValidateSequenceAssemblerGeneral(string nodeName,
                                                      AssemblyParameters additionalParameter, bool isSeqAssemblyctr)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                       Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                          Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.ConsensusThresholdNode),
                                                     null);

            string[] sequences = utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                  Constants.SequencesNode);
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));
            string documentation = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                   Constants.DocumentaionNode);
            var info = new SerializationInfo(typeof(OverlapDeNovoAssembly),
                                             new FormatterConverter());
            var context = new StreamingContext(StreamingContextStates.All);

            var inputs = new List <ISequence>();

            switch (additionalParameter)
            {
            case AssemblyParameters.Consensus:
                for (int i = 0; i < sequences.Length; i++)
                {
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i, sequences[i]));

                    var seq = new Sequence(alphabet, sequences[i]);
                    inputs.Add(seq);
                }
                break;

            default:
                for (int i = 0; i < sequences.Length; i++)
                {
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i, sequences[i]));
                    var seq = new Sequence(alphabet, sequences[i]);
                    inputs.Add(seq);
                }
                break;
            }

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            var assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new PairwiseOverlapAligner()
            };

            switch (additionalParameter)
            {
            case AssemblyParameters.DiagonalSM:
                (assembler.OverlapAlgorithm).SimilarityMatrix =
                    new DiagonalSimilarityMatrix(matchScore, mismatchScore);
                break;

            case AssemblyParameters.SimilarityMatrix:
                string blosumFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
                (assembler.OverlapAlgorithm).SimilarityMatrix = new SimilarityMatrix(blosumFilePath);
                break;

            default:
                (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
                break;
            }

            (assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            // Assembles all the sequences.
            IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            // Set Documentation property.
            assembly.Documentation = documentation;

            // Get the parameters from Xml in general
            int    contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigSequencesCountNode), null);
            string contigConsensus      = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigConsensusNode);

            switch (additionalParameter)
            {
            case AssemblyParameters.Consensus:
                // Read the contig from Contig method.
                Contig contigReadForConsensus = assembly.Contigs[0];
                contigReadForConsensus.Consensus = null;
                var simpleSeqAssembler = new OverlapDeNovoAssembler
                {
                    ConsensusResolver = new SimpleConsensusResolver(consensusThreshold)
                };
                simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus);

                // Log the required info.
                ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus));
                Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char)a).ToArray()));
                break;

            default:
                // Get the parameters from Xml for Assemble() method test cases.
                int unMergedCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                              Constants.UnMergedSequencesCountNode),
                                              null);
                int contigsCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                             Constants.ContigsCountNode), null);

                Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count);
                Assert.AreEqual(contigsCount, assembly.Contigs.Count);
                Assert.AreEqual(documentation, assembly.Documentation);
                Contig contigRead = assembly.Contigs[0];

                // Logs the consensus
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count));
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count));
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count));
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus));

                Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char)a).ToArray()));
                Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count);
                break;
            }
        }
Exemplo n.º 19
0
        public void TestSequenceAssemblyWithBinaryFormatter()
        {
            Stream stream = null;

            try
            {
                stream = File.Open("SequenceAssembly.data", FileMode.Create);
                BinaryFormatter        formatter   = new BinaryFormatter();
                IOverlapDeNovoAssembly seqAssembly = null;

                #region Create OverlapDeNovoAssembly by calling OverlapDeNovoAssembler.Assembly()

                int        matchScore          = 1;
                int        mismatchScore       = -8;
                int        gapCost             = -8;
                double     mergeThreshold      = 3;
                double     consensusThreshold  = 99;
                const bool AssumeOrientedReads = true;

                OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
                assembler.MergeThreshold   = mergeThreshold;
                assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
                ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(
                    matchScore,
                    mismatchScore,
                    MoleculeType.DNA);
                ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
                assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
                assembler.AssumeStandardOrientation = AssumeOrientedReads;

                Sequence seq1 = new Sequence(Alphabets.DNA, "ACGACACG");
                Sequence seq2 = new Sequence(Alphabets.DNA, "ACGACCGGAGG");
                Sequence seq3 = new Sequence(Alphabets.DNA, "TTTTTT");
                seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence>()
                {
                    seq1, seq2, seq3
                });
                #endregion  Create OverlapDeNovoAssembly by calling OverlapDeNovoAssembler.Assembly()

                formatter.Serialize(stream, seqAssembly);
                stream.Seek(0, SeekOrigin.Begin);

                IOverlapDeNovoAssembly deserializedseqAssembly =
                    (IOverlapDeNovoAssembly)formatter.Deserialize(stream);
                Assert.AreNotSame(seqAssembly, deserializedseqAssembly);
                Assert.AreEqual(seqAssembly.Contigs.Count, deserializedseqAssembly.Contigs.Count);

                for (int i = 0; i < seqAssembly.Contigs.Count; i++)
                {
                    Assert.AreEqual(
                        seqAssembly.Contigs[i].Consensus.ToString(),
                        deserializedseqAssembly.Contigs[i].Consensus.ToString());
                    Assert.AreEqual(
                        seqAssembly.Contigs[i].Sequences.Count,
                        deserializedseqAssembly.Contigs[i].Sequences.Count);

                    for (int j = 0; j < seqAssembly.Contigs[i].Sequences.Count; j++)
                    {
                        Assert.AreEqual(
                            seqAssembly.Contigs[i].Sequences[j].ToString(),
                            deserializedseqAssembly.Contigs[i].Sequences[j].ToString());
                    }
                }

                for (int i = 0; i < seqAssembly.UnmergedSequences.Count; i++)
                {
                    Assert.AreEqual(
                        seqAssembly.UnmergedSequences[i].ToString(),
                        deserializedseqAssembly.UnmergedSequences[i].ToString());
                }
            }
            catch (Exception)
            {
                Assert.Fail();
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                    stream = null;
                }
            }
        }