public void TestSimpleSequenceAssembler()
        {
            Trace.Set(Trace.AssemblyDetails);   // turn on log dump

            // test parameters
            int matchScore = 1;
            int mismatchScore = -8;
            int gapCost = -8;
            double mergeThreshold = 4;
            double consensusThreshold = 66;

            Sequence seq1 = new Sequence(Alphabets.DNA, "GCCAAAATTTAGGC");
            Sequence seq2 = new Sequence(Alphabets.DNA, "TTATGGCGCCCACGGA");
            Sequence seq3 = new Sequence(Alphabets.DNA, "TATAAAGCGCCAA");

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            // 
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
            assembler.MergeThreshold = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List<ISequence> inputs = new List<ISequence>();
            inputs.Add(seq1);
            inputs.Add(seq2);
            inputs.Add(seq3);

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];
            Assert.AreEqual("TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT", contig0.Consensus.ConvertToString());
            Assert.AreEqual(3, contig0.Sequences.Count);
        }
Esempio n. 2
0
        /// <summary>
        ///     Validate Sequence Assembler Test cases based on additional parameter values
        /// </summary>
        /// <param name="additionalParameter">Additional parameters</param>
        private IOverlapDeNovoAssembly GetSequenceAssembly(string additionalParameter)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                       Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                          Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue(
                Constants.AssemblyAlgorithmNodeName,
                Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue(
                Constants.AssemblyAlgorithmNodeName,
                Constants.ConsensusThresholdNode), null);
            string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode1);
            string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode2);
            string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode3);
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(
                Constants.AssemblyAlgorithmNodeName,
                Constants.AlphabetNameNode));

            // Log based on the test cases
            switch (additionalParameter)
            {
                case "consensus":
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.",
                                                           sequence1));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.",
                                                           sequence2));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.",
                                                           sequence3));
                    break;
                default:
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3));
                    break;
            }

            var seq1 = new Sequence(alphabet, sequence1);
            var seq2 = new Sequence(alphabet, sequence2);
            var seq3 = new Sequence(alphabet, sequence3);

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            // 
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            var assembler = new OverlapDeNovoAssembler();
            assembler.MergeThreshold = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            (assembler.OverlapAlgorithm).SimilarityMatrix =
                new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            (assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            var inputs = new List<ISequence>();
            inputs.Add(seq1);
            inputs.Add(seq2);
            inputs.Add(seq3);

            // Assembles all the sequences.
            return (IOverlapDeNovoAssembly) assembler.Assemble(inputs);
        }
Esempio n. 3
0
        /// <summary>
        ///     Validates the Sequence Assembler for all the general test cases.
        /// </summary>
        /// <param name="nodeName">Xml Node Name</param>
        /// <param name="additionalParameter">
        ///     Additional Parameter based
        ///     on which the validations are done.
        /// </param>
        /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param>
        private void ValidateSequenceAssemblerGeneral(string nodeName,
                                                      AssemblyParameters additionalParameter, bool isSeqAssemblyctr)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                       Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                          Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.ConsensusThresholdNode),
                                                     null);
            string[] sequences = this.utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                  Constants.SequencesNode);
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));
            string documentation = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                   Constants.DocumentaionNode);
            var info = new SerializationInfo(typeof (OverlapDeNovoAssembly),
                                             new FormatterConverter());
            var context = new StreamingContext(StreamingContextStates.All);

            var inputs = new List<ISequence>();

            switch (additionalParameter)
            {
                case AssemblyParameters.Consensus:
                    for (int i = 0; i < sequences.Length; i++)
                    {
                        // Logs the sequences
                        ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i, sequences[i]));

                        var seq = new Sequence(alphabet, sequences[i]);
                        inputs.Add(seq);
                    }
                    break;
                default:
                    for (int i = 0; i < sequences.Length; i++)
                    {
                        // Logs the sequences
                        ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i, sequences[i]));
                        var seq = new Sequence(alphabet, sequences[i]);
                        inputs.Add(seq);
                    }
                    break;
            }

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            // 
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            var assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold = mergeThreshold,
                OverlapAlgorithm = new PairwiseOverlapAligner()
            };

            switch (additionalParameter)
            {
                case AssemblyParameters.DiagonalSM:
                    (assembler.OverlapAlgorithm).SimilarityMatrix =
                        new DiagonalSimilarityMatrix(matchScore, mismatchScore);
                    break;
                case AssemblyParameters.SimilarityMatrix:
                    string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
                    (assembler.OverlapAlgorithm).SimilarityMatrix = new SimilarityMatrix(new StreamReader(blosumFilePath));
                    break;
                default:
                    (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
                    break;
            }

            (assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            // Assembles all the sequences.
            IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs);

            // Set Documentation property.
            assembly.Documentation = documentation;

            // Get the parameters from Xml in general
            int contigSequencesCount = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigSequencesCountNode), null);
            string contigConsensus = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigConsensusNode);

            switch (additionalParameter)
            {
                case AssemblyParameters.Consensus:
                    // Read the contig from Contig method.
                    Contig contigReadForConsensus = assembly.Contigs[0];
                    contigReadForConsensus.Consensus = null;
                    var simpleSeqAssembler = new OverlapDeNovoAssembler
                    {
                        ConsensusResolver = new SimpleConsensusResolver(consensusThreshold)
                    };
                    simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus);

                    // Log the required info.
                    ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus));
                    Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char) a).ToArray()));
                    break;
                default:
                    // Get the parameters from Xml for Assemble() method test cases.
                    int unMergedCount = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                  Constants.UnMergedSequencesCountNode),
                                                  null);
                    int contigsCount = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.ContigsCountNode), null);

                    Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count);
                    Assert.AreEqual(contigsCount, assembly.Contigs.Count);
                    Assert.AreEqual(documentation, assembly.Documentation);
                    Contig contigRead = assembly.Contigs[0];

                    // Logs the consensus
                    ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count));
                    ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count));
                    ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count));
                    ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus));

                    Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char) a).ToArray()));
                    Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count);
                    break;
            }
        }
Esempio n. 4
0
        /// <summary>
        /// This method runs assembly on the list of sequences passed.
        /// Additionally the user is allowed to select the
        /// alignment algorithm.
        /// </summary>
        /// <param name="input">Input for the assembly process.</param>
        /// <param name="worker">The Assembly parser thread</param>
        /// <returns>IDeNovoAssembly instance.</returns>
        private static IDeNovoAssembly RunAssembly(AssemblyInputEventArgs input, BackgroundWorker worker)
        {
            double mergeThreshold = input.MergeThreshold;
            List<ISequence> sequence = input.Sequences.ToList();

            var assemble = new OverlapDeNovoAssembler();
            assemble.OverlapAlgorithm = input.Aligner;

            // Special casing for SW alignment.
            if (assemble.OverlapAlgorithm is SmithWatermanAligner)
            {
                // If we set the Threshold value lesser than the Max score, then the result will be “JUNK”.
                // So setting the threshold value to 25 approximately supports sequence length of 15,0000.
                mergeThreshold = 25;
            }

            assemble.MergeThreshold = mergeThreshold;
            assemble.OverlapAlgorithm.SimilarityMatrix = input.AlignerInput.SimilarityMatrix;

            assemble.OverlapAlgorithm.GapOpenCost = input.AlignerInput.GapCost;
            assemble.OverlapAlgorithm.GapExtensionCost = input.AlignerInput.GapExtensionCost;

            assemble.ConsensusResolver = new SimpleConsensusResolver(input.ConsensusThreshold);
            assemble.AssumeStandardOrientation = false;

            AssignAlignerParameter(assemble.OverlapAlgorithm, input.AlignerInput);

            IDeNovoAssembly assemblyOutput = assemble.Assemble(sequence);

            if (worker != null && worker.CancellationPending)
            {
                return null;
            }

            return assemblyOutput;
        }
Esempio n. 5
0
        public void ValidateOverlapDenovoAssemblyToString()
        {
            const int matchScore = 5;
            const int mismatchScore = -4;
            const int gapCost = -10;
            const double mergeThreshold = 4;
            const double consensusThreshold = 66;

            ISequence seq1 = new Sequence(Alphabets.DNA,
                                          this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                          Constants.Seq1StrNode));
            ISequence seq2 = new Sequence(Alphabets.DNA,
                                          this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                          Constants.Seq2StrNode));

            var assembler = new OverlapDeNovoAssembler
                                {
                                    MergeThreshold = mergeThreshold,
                                    OverlapAlgorithm = new NeedlemanWunschAligner
                                                           {
                                                               SimilarityMatrix =
                                                                   new DiagonalSimilarityMatrix(matchScore,
                                                                                                mismatchScore),
                                                               GapOpenCost = gapCost
                                                           },
                                    ConsensusResolver = new SimpleConsensusResolver(consensusThreshold),
                                    AssumeStandardOrientation = false
                                };

            var inputs = new List<ISequence> {seq1, seq2};
            var seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            assembler.OverlapAlgorithm = new SmithWatermanAligner
                                             {
                                                 SimilarityMatrix =
                                                     new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                                                 GapOpenCost = gapCost
                                             };
            seqAssembly = (OverlapDeNovoAssembly) assembler.Assemble(inputs);

            string actualString = seqAssembly.ToString();
            const string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]";
            Assert.AreEqual(expectedString, actualString.Replace(System.Environment.NewLine, ""));

            // Get the parameters from Xml
            int matchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null);
            int mismatchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode),
                    null);
            int gapCost1 =
                int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode),
                          null);
            double mergeThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode),
                    null);
            double consensusThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                    Constants.ConsensusThresholdNode), null);

            string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode1);
            string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode2);
            string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode3);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.AlphabetNameNode));

            var seq4 = new Sequence(alphabet, sequence1);
            var seq5 = new Sequence(alphabet, sequence2);
            var seq6 = new Sequence(alphabet, sequence3);

            var assembler1 = new OverlapDeNovoAssembler
                                 {
                                     MergeThreshold = mergeThreshold1,
                                     OverlapAlgorithm = new PairwiseOverlapAligner
                                                            {
                                                                SimilarityMatrix =
                                                                    new DiagonalSimilarityMatrix(matchScore1,
                                                                                                 mismatchScore1),
                                                                GapOpenCost = gapCost1,
                                                            },
                                     ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1),
                                     AssumeStandardOrientation = false,
                                 };

            var inputs1 = new List<ISequence> {seq4, seq5, seq6};

            // Assembles all the sequences.
            seqAssembly = (OverlapDeNovoAssembly) assembler1.Assemble(inputs1);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            assembler1.OverlapAlgorithm = new SmithWatermanAligner();
            seqAssembly = (OverlapDeNovoAssembly) assembler1.Assemble(inputs1);

            string expectedString1 = "TYMKWRRGCGCCAAAATTTAGGC" + System.Environment.NewLine;
            actualString = seqAssembly.ToString();
            Assert.AreEqual(expectedString1, actualString);
        }
Esempio n. 6
0
        public void ValidateContigToString()
        {
            const int matchScore = 5;
            const int mismatchScore = -4;
            const int gapCost = -10;
            const double mergeThreshold = 4;
            const double consensusThreshold = 66;
            string seq2Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode);
            string seq1Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode);

            ISequence seq1 = new Sequence(Alphabets.DNA, seq1Str);
            ISequence seq2 = new Sequence(Alphabets.DNA, seq2Str);

            var assembler = new OverlapDeNovoAssembler
                                {
                                    MergeThreshold = mergeThreshold,
                                    OverlapAlgorithm = new NeedlemanWunschAligner
                                                           {
                                                               SimilarityMatrix =
                                                                   new DiagonalSimilarityMatrix(matchScore,
                                                                                                mismatchScore),
                                                               GapOpenCost = gapCost
                                                           },
                                    ConsensusResolver = new SimpleConsensusResolver(consensusThreshold),
                                    AssumeStandardOrientation = false
                                };

            var seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(new List<ISequence> {seq1, seq2});

            Contig contig0 = seqAssembly.Contigs[0];
            string actualString = contig0.ToString();
            string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                    Constants.OverlapDenovoExpectedNode);
            Assert.AreEqual(expectedString.Replace("\\r\\n", ""), actualString.Replace("\r\n", ""));

            // Get the parameters from Xml
            int matchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null);
            int mismatchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode),
                    null);
            int gapCost1 =
                int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode),
                          null);
            double mergeThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode),
                    null);
            double consensusThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                    Constants.ConsensusThresholdNode), null);
            string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode1);
            string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode2);
            string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                               Constants.SequenceNode3);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.AlphabetNameNode));

            ISequence seq4 = new Sequence(alphabet, sequence1);
            ISequence seq5 = new Sequence(alphabet, sequence2);
            ISequence seq6 = new Sequence(alphabet, sequence3);

            var assembler1 = new OverlapDeNovoAssembler
                                 {
                                     MergeThreshold = mergeThreshold1,
                                     OverlapAlgorithm = new PairwiseOverlapAligner
                                                            {
                                                                SimilarityMatrix =
                                                                    new DiagonalSimilarityMatrix(matchScore1,
                                                                                                 mismatchScore1),
                                                                GapOpenCost = gapCost1
                                                            },
                                     ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1),
                                     AssumeStandardOrientation = false
                                 };

            // Assembles all the sequences.
            var seqAssembly1 = (IOverlapDeNovoAssembly) assembler1.Assemble(new List<ISequence> {seq4, seq5, seq6});
            Contig contig1 = seqAssembly1.Contigs[0];
            string actualString1 = contig1.ToString();
            const string expectedString1 = "TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT";

            Assert.AreEqual(expectedString1, actualString1);
        }
        public void TestSimpleSequenceAssemblerWithSwineflu()
        {
            Trace.Set(Trace.AssemblyDetails);   // turn on log dump
            // test parameters
            const int matchScore = 5;
            const int mismatchScore = -4;
            const int gapCost = -10;
            const double mergeThreshold = 4;
            const double consensusThreshold = 66;

            ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost = gapCost
                },
                ConsensusResolver = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var inputs = new List<ISequence> {seq1, seq2};
            var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];
            string expected = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTRYAGRYWMWYWWKSKAWMRRTTMWMMWGMSAMYRWWKMMACAGWMMYWGWARASAMWGTAMYAGWAAMRMAYKYWRYWRWMMYWCWMKMWGWYAASCWTMWMGRRRAMMWRYRYAAMSKRARASKRKKMRMMCYAWKRSRWKTRGSYMMATKKMAYWTKGSTRRMTGKAWCMTKGSWRRYYSRRWSYKKGRAWMWCYMKMSWSWGMAWSMYYMTSSWCMKMMAKYKYRKRRWCMTMYAKTKYRGAMAMWKSWASKTSWKACMMWGGARMKTKYWWCSMWKRWGAKKWSMTMRRWKAKSARKWGMKMWSAGWGYMATYRWKYKMARKGTYWKMRWTWKWMMSSWWKRMRAKWTYMYSSMMSAMWMRTKMMTSGMMCAAWSRTGWMWCGRMMRMAKGTSYWMMKGCWGSAKSWMMWMRYKYYKRMRMAAAWWKMWTMTRSMWARWTWWAAWAKGRMWWKYWWAMMMARRRMWYWSMWAMYCMWASMTYARYRAWWMMKRSAWWRAWGWYMWMGKGMWAKRRGKCMTYSWSCWWYSRKSYAYTMRYSMTSMMYMWMMWAGTSYYKAYCARMAWRSWSWYKMWYAKRWTKYWGWKGSRWMWKYWWKWKWSRGSWMRWMRWKMWAKMSSRARAWRKYMAWRMSRSMMAWAGYRAKRRRWCMMRAAGKGAGRRWKMAMKAWKRSWGRAYRMWMKWWKASYSGRSASWMRWARWRMCRKKMGAMRMAAYWRSAWWYSWAGYRRYWSSRARWYWWGYRKTMSCRAKRKAWRSAWWYGCWRKRKMWRGWAWTRYYRKWTCWGRTAYWMYMRTYYMMGATWSMMMWRYMMMYKRTYRSAMWMCMAMKKGTSMKAYAMMCAMSRGYSYYMYAWWYMMSARYMTMCMWYYKMWSAMWATWSRWMMRWKYMCAAWWKRWRWAWRWMSMAMAWAWKTRARAMKSRCMAMAKKRWKRMKGRMYRYMSSRTYKAKKMAWKYYMSRKSYMTWYWWKSKRSMRKYSYMKKTKKSRYYRWWGSSGGKTKSAYWGRRRKGGKRKRKRSAKGGWWSGKWKATSRMYRKYAMRRTKAKCASSRKYMARRWKAKSMRGSSKMMSKRWAKRSMRCMSASMWKRMSAKYRMMSAGAWTRCYAWYRAMGWRAWTWCTRWYAWWGWAAAKWYKRWTAYWSARWWSAYRRMWRTASRKWWMRMRKYMRWMSRYMWRGARWWMARMMWMSWGRAWWWAARWAWARARAWTKWWRATRRWWWMSTKGAYRWTKGKWYYYWSRAYRYYKRRMYKTWSRWTSYMKWRSWRWWKGWWMKAWYKKWRRAYKAMMRMRMTTYRRAYKWSMASRAYTYRWATGWRAAGRWMWKAWRYSARWWRRWAARMARYSMSMWRRAAAWYRRWRMCRRSKRMWTTGRAWWYKRCYRCWWWKRMKWTWACMMSWRMWKSGAWARYRYSWRMAWKGRRASTKWYRAMWAYSSRAMWTAYKMMKASSMARMAWAMTYARASRRAGMARAAWTARAYRGRGWARARMTRGAWKSRRYAARGMTKKAMYMRAYWWKGRYKWWCYAKWYWWYKGYSRYCWRTTCAWYKGTMSYSRKWKYMTYSSTRSKGGYARTCWSYYTSKGGRYRWKCWSTWWYKGGWYKYKMYMKWRTRGRWYWYKWMWKTRWAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "NeedlemanWunschAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new SmithWatermanAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "SmithwatermanAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "PairwiseOverlapAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new MUMmerAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "MUMmerAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);
        }
        public void TestSimpleSequenceAssemblerWithRandomSequence()
        {
            // Test parameters.
            //
            // In theory, as long as all positions in the master sequence are 
            // covered by at least one read, we should be able to pass this test.
            // But some parameter settings will make the test fail, for
            // various reasons, including:
            // 1. Short reads, caused by the strategy used to ensure full coverage
            //  at the ends, might not score well enough to merge.
            // 2. Uncovered positions are always possible due to the random 
            //  generation of reads. (Increasing the number of reads helps with this)
            // 3. The assembler might construct the reverse or complement (or both)
            //  of the master sequence.
            // 4. Too low a merge threshold could cause incorrect merges, which
            //  the algorithm will not repair.
            int matchScore = 1;
            int mismatchScore = -8;
            int gapCost = -8;
            double mergeThreshold = 3;
            double consensusThreshold = 99;
            const int MasterLength = 100;
            const int MinReadLength = 10;
            const int MaxReadLength = 30;
            const int NumReads = 200;
            const bool AssumeOrientedReads = true;

            // if this is uncommented, assembly details appear in log.
            // this is extremely verbose.
            // Trace.Set(Trace.AssemblyDetails);

            // make random master sequence
            // (use seed for repeatability, or omit seed for 
            // different test each time)
            // Random randGen = new Random();
            Random randGen = new Random(654321);

            StringBuilder randSeq = new StringBuilder();
            for (int i = 0; i < MasterLength; ++i)
            {
                int randm = randGen.Next(8);
                if (randm < 2)
                {
                    randSeq.Append('A');
                }
                else if (randm < 4)
                {
                    randSeq.Append('C');
                }
                else if (randm < 6)
                {
                    randSeq.Append('G');
                }
                else
                {
                    randSeq.Append('T');
                }
            }

            Sequence master = new Sequence(Alphabets.AmbiguousDNA, randSeq.ToString());

            // create the reads
            List<ISequence> inputs = new List<ISequence>();
            for (int i = 0; i < NumReads; ++i)
            {
                // try for uniform coverage clear to the ends (this can lead to short reads, though)
                int rndPos = Math.Max(0, randGen.Next(-MinReadLength, MasterLength - 1));
                int rndLen = Math.Min(MasterLength - rndPos, randGen.Next(MinReadLength, MaxReadLength + 1));
                string data = master.ConvertToString().Substring(Math.Max(0, rndPos), rndLen);
                bool revcomp = randGen.Next(2) > 0;
                bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads;
                ISequence read;
                if (reverse && revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString());
                }
                else if (revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString());
                }
                else
                {
                    read = new Sequence(Alphabets.DNA, data);
                }

                ApplicationLog.WriteLine("read {0}: {1}", i, read);
                inputs.Add(read);
            }

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
            assembler.MergeThreshold = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = AssumeOrientedReads;

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            ApplicationLog.WriteLine(
                "Assembly finished. Contigs: {0}. Unmerged sequences: {1}.",
                seqAssembly.Contigs.Count,
                seqAssembly.UnmergedSequences.Count);
            Contig contig0 = seqAssembly.Contigs[0];
            ApplicationLog.WriteLine("master sequence and contig 0 consensus:");
            ApplicationLog.WriteLine(master.ConvertToString());
            ApplicationLog.WriteLine(contig0.Consensus.ConvertToString());

            Assert.AreEqual(2, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            // note that this is tricky, esp. without oriented reads - consensus
            // could be reversed and/or complemented relative to original
            Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString());
        }
        public void TestSimpleSequenceAssemblerWithSemiRandomSequence()
        {
            // test parameters
            int matchScore = 1;
            int mismatchScore = -8;
            int gapCost = -8;
            double mergeThreshold = 4;
            double consensusThreshold = 66;
            const int MasterLength = 30;
            const int ReadLength = 10;
            const int NumReads = 5;
            const bool AssumeOrientedReads = false;

            // if this is uncommented, assembly details appear in log.
            // this is extremely verbose.
            Trace.Set(Trace.AssemblyDetails);

            // make random master sequence
            // (use seed for repeatability, or omit seed for 
            // different test each time)
            // Random randGen = new Random();
            Random randGen = new Random(654321);

            StringBuilder randSeq = new StringBuilder();
            for (int i = 0; i < MasterLength; ++i)
            {
                int randm = randGen.Next(8);
                if (randm < 2)
                {
                    randSeq.Append('A');
                }
                else if (randm < 4)
                {
                    randSeq.Append('C');
                }
                else if (randm < 6)
                {
                    randSeq.Append('G');
                }
                else
                {
                    randSeq.Append('T');
                }
            }

            Sequence master = new Sequence(Alphabets.DNA, randSeq.ToString());

            // create the reads
            List<ISequence> inputs = new List<ISequence>();
            for (int i = 0; i < NumReads; ++i)
            {
                int pos = 5 * i;
                string data = master.ConvertToString().Substring(pos, ReadLength);
                bool revcomp = randGen.Next(2) > 0;
                bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads;
                ISequence read;
                if (reverse && revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString());
                }
                else if (revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString());
                }
                else
                {
                    read = new Sequence(Alphabets.DNA, data);
                }

                inputs.Add(read);
            }

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
            assembler.MergeThreshold = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = AssumeOrientedReads;

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];
            ApplicationLog.WriteLine("master sequence and contig 0 consensus:");
            ApplicationLog.WriteLine(master.ConvertToString());
            ApplicationLog.WriteLine(contig0.Consensus.ConvertToString());

            // note that this is tricky, esp. without oriented reads - consensus
            // could be reversed and/or complemented relative to original
            Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString());
        }
Esempio n. 10
0
        /// <summary>
        /// Do a simple sequence assembly.
        /// This sample uses NeedlemanWunschAligner.
        /// </summary>
        /// <param name="sequences">List of sequences to assemble.</param>
        /// <returns>IDeNovoAssembly which has the assembled result.</returns>
        public static IDeNovoAssembly DoSimpleSequenceAssemble(List<ISequence> sequences)
        {
            // Create an assembler
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            // Setup the parameters
            assembler.OverlapAlgorithm = new NeedlemanWunschAligner();
            assembler.OverlapAlgorithm.SimilarityMatrix = new DiagonalSimilarityMatrix(5, -4);
            assembler.OverlapAlgorithm.GapOpenCost = -10;
            assembler.ConsensusResolver = new SimpleConsensusResolver(66);
            assembler.AssumeStandardOrientation = false;

            return assembler.Assemble(sequences);
        }
Esempio n. 11
0
        public void TestOverlapDenovoAssemblyToString()
        {
            const int matchScore = 5;
            const int mismatchScore = -4;
            const int gapCost = -10;
            const double mergeThreshold = 4;
            const double consensusThreshold = 66;

            ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            IOverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner 
                { 
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost = gapCost
                },
                ConsensusResolver = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var inputs = new List<ISequence> {seq1, seq2};
            var seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            
            assembler.OverlapAlgorithm = new SmithWatermanAligner();
            seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs);

            string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]\r\n".Replace("\r\n",Environment.NewLine);
            string actualString = seqAssembly.ToString();
            Assert.AreEqual(expectedString, actualString);
        }
Esempio n. 12
0
        public void TestContigToString()
        {
            // test parameters
            const int matchScore = 5;
            const int mismatchScore = -4;
            const int gapCost = -10;
            const double mergeThreshold = 4;
            const double consensusThreshold = 66;

            Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner 
                {
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost = gapCost
                },
                ConsensusResolver = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false,
            };

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(new List<ISequence> {seq1, seq2});
            Contig contig0 = seqAssembly.Contigs[0];
            string actualString = contig0.ToString();
            //string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]";
            string expectedString = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTR... +[1678]";
            Assert.AreEqual(actualString, expectedString); 
        }