public void TestContigToString() { // test parameters const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false, }; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> { seq1, seq2 }); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); //string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]"; string expectedString = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTR... +[1678]"; Assert.AreEqual(actualString, expectedString); }
public void ValidateSequenceAssemblerProperties() { // Get the parameters from Xml string SimpleSeqAlgoName = Utility._xmlUtil.GetTextValue( Constants.SequenceAssemblerPropertiesNode, Constants.NameNode); string mergeThreshold = Utility._xmlUtil.GetTextValue( Constants.SequenceAssemblerPropertiesNode, Constants.MergeThresholdNode); string overlapAlgoName = Utility._xmlUtil.GetTextValue( Constants.SequenceAssemblerPropertiesNode, Constants.OverlapAlgoNameNode); string algoDescription = Utility._xmlUtil.GetTextValue( Constants.SequenceAssemblerPropertiesNode, Constants.Description); // Get SimpleSequenceAssembler properties. IList <IDeNovoAssembler> assemberCount = SequenceAssemblers.All; OverlapDeNovoAssembler assemblerDescriptions = SequenceAssemblers.Simple; // Validate SequenceAssembler Assert.AreEqual(1, assemberCount.Count); Assert.IsTrue(assemblerDescriptions.AssumeStandardOrientation); Assert.AreEqual(algoDescription, assemblerDescriptions.Description); Assert.IsNull(assemblerDescriptions.ConsensusResolver); Assert.AreEqual(mergeThreshold, assemblerDescriptions.MergeThreshold.ToString()); Assert.AreEqual(SimpleSeqAlgoName, assemblerDescriptions.Name.ToString()); Assert.AreEqual(overlapAlgoName, assemblerDescriptions.OverlapAlgorithm.Name.ToString()); }
public void TestContigToString() { // test parameters int matchScore = 5; int mismatchScore = -4; int gapCost = -10; double mergeThreshold = 4; double consensusThreshold = 66; Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new NeedlemanWunschAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]"; Assert.AreEqual(actualString, expectedString); }
public void TestAssemblerProperties() { IDeNovoAssembler assembler = new OverlapDeNovoAssembler(); Assert.AreEqual(assembler.Name, Properties.Resource.SIMPLE_NAME); Assert.AreEqual(assembler.Description, Properties.Resource.SIMPLE_DESCRIPTION); }
public void SimpleConsensusWithMakeConsensusMethod() { IOverlapDeNovoAssembly assembly = GetSequenceAssembly("consensus"); string contigConsensus = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ContigConsensusNode); double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler(); simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char)a).ToArray())); // Log the required info. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); }
protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { OverlapDeNovoAssembler SSA = new OverlapDeNovoAssembler(); Consensus = SSA.Assemble(Sequences); Contigs = ((IOverlapDeNovoAssembly)Consensus).Contigs; return(ActivityExecutionStatus.Closed); }
/// <summary> /// Do a simple sequence assembly. /// This sample uses NeedlemanWunschAligner. /// </summary> /// <param name="sequences">List of sequences to assemble.</param> /// <returns>IDeNovoAssembly which has the assembled result.</returns> static IDeNovoAssembly DoSimpleSequenceAssemble(List <ISequence> sequences) { // $TODO: Change the signature and initialization after OverlapDeNovoAssembler is migrated to PhaseOne // Create an assembler OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); // Setup the parameters // $TODO: Change the NeedlemanWunschAligner, DiagonalSimilarityMatrix, SimpleConsensusResolve initialization // after OverlapDeNovoAssembler is migrated to PhaseOne assembler.OverlapAlgorithm = new NeedlemanWunschAligner(); assembler.OverlapAlgorithm.SimilarityMatrix = new DiagonalSimilarityMatrix(5, -4); assembler.OverlapAlgorithm.GapOpenCost = -10; assembler.ConsensusResolver = new Bio.SimpleConsensusResolver(66); assembler.AssumeStandardOrientation = false; return(assembler.Assemble(sequences)); }
public void TestSimpleSequenceAssembler() { Trace.Set(Trace.AssemblyDetails); // turn on log dump // test parameters int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 4; double consensusThreshold = 66; Sequence seq1 = new Sequence(Alphabets.DNA, "GCCAAAATTTAGGC"); Sequence seq2 = new Sequence(Alphabets.DNA, "TTATGGCGCCCACGGA"); Sequence seq3 = new Sequence(Alphabets.DNA, "TATAAAGCGCCAA"); // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); inputs.Add(seq3); IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; Assert.AreEqual("TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT", contig0.Consensus.ConvertToString()); Assert.AreEqual(3, contig0.Sequences.Count); }
public static async void AssemblySequences(string fastqFileName) { var parser = new FastQParser(); List <IQualitativeSequence> sequences = new List <IQualitativeSequence>(); using (var fileStream = new FileStream(fastqFileName, FileMode.Open)) { sequences = parser.Parse(fileStream).ToList(); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); IDeNovoAssembly assembly = assembler.Assemble(sequences); FastAFormatter outputFormatter = new FastAFormatter(); outputFormatter.Open("assembled_sequences.fasta"); outputFormatter.Format(assembly.AssembledSequences); outputFormatter.Close(); }
public void TestOverlapDenovoAssemblyToString() { const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); IOverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var inputs = new List <ISequence> { seq1, seq2 }; var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]\r\n".Replace("\r\n", Environment.NewLine); string actualString = seqAssembly.ToString(); Assert.AreEqual(expectedString, actualString); }
/// <summary> /// When implemented in a derived class, performs the execution of the activity. /// </summary> /// <returns> /// The result of the activity’s execution. /// </returns> /// <param name="context">The execution context under which the activity executes.</param> protected override IDeNovoAssembly Execute(CodeActivityContext context) { // Setup the aligner to use string alignerName = (AlignerName ?? DefaultAligner).ToLowerInvariant(); var aligner = SequenceAligners.All.FirstOrDefault(sa => sa.Name.ToLowerInvariant() == alignerName); if (aligner == null) { throw new ArgumentException("Could not find aligner: " + alignerName); } aligner.GapOpenCost = GapOpenCost; aligner.GapExtensionCost = GapExtensionCost; var smName = SimilarityMatrix ?? DefaultMatrix; SimilarityMatrix.StandardSimilarityMatrix sm; if (Enum.TryParse(smName, true, out sm)) { aligner.SimilarityMatrix = new SimilarityMatrix(sm); } // Create the assembler OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { AssumeStandardOrientation = this.AssumeStandardOrientation, MergeThreshold = this.MergeThreshold, OverlapAlgorithm = aligner }; var consensus = assembler.Assemble(Sequences.Get(context)); Contigs.Set(context, ((IOverlapDeNovoAssembly)consensus).Contigs); return(consensus); }
/// <summary> /// Validates the Sequence Assembler for all the general test cases. /// </summary> /// <param name="nodeName">Xml Node Name</param> /// <param name="additionalParameter"> /// Additional Parameter based /// on which the validations are done. /// </param> /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param> private void ValidateSequenceAssemblerGeneral(string nodeName, AssemblyParameters additionalParameter, bool isSeqAssemblyctr) { // Get the parameters from Xml int matchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ConsensusThresholdNode), null); string[] sequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencesNode); IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string documentation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DocumentaionNode); var info = new SerializationInfo(typeof(OverlapDeNovoAssembly), new FormatterConverter()); var context = new StreamingContext(StreamingContextStates.All); var inputs = new List <ISequence>(); switch (additionalParameter) { case AssemblyParameters.Consensus: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i, sequences[i])); var seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; default: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i, sequences[i])); var seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; } // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new PairwiseOverlapAligner() }; switch (additionalParameter) { case AssemblyParameters.DiagonalSM: (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); break; case AssemblyParameters.SimilarityMatrix: string blosumFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); (assembler.OverlapAlgorithm).SimilarityMatrix = new SimilarityMatrix(blosumFilePath); break; default: (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); break; } (assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; // Assembles all the sequences. IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); // Set Documentation property. assembly.Documentation = documentation; // Get the parameters from Xml in general int contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigSequencesCountNode), null); string contigConsensus = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigConsensusNode); switch (additionalParameter) { case AssemblyParameters.Consensus: // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; var simpleSeqAssembler = new OverlapDeNovoAssembler { ConsensusResolver = new SimpleConsensusResolver(consensusThreshold) }; simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); // Log the required info. ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus)); Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char)a).ToArray())); break; default: // Get the parameters from Xml for Assemble() method test cases. int unMergedCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.UnMergedSequencesCountNode), null); int contigsCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsCountNode), null); Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count); Assert.AreEqual(contigsCount, assembly.Contigs.Count); Assert.AreEqual(documentation, assembly.Documentation); Contig contigRead = assembly.Contigs[0]; // Logs the consensus ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus)); Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char)a).ToArray())); Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count); break; } }
public void ValidateOverlapDenovoAssemblyToString() { const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; ISequence seq1 = new Sequence(Alphabets.DNA, this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode)); ISequence seq2 = new Sequence(Alphabets.DNA, this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode)); var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var inputs = new List <ISequence> { seq1, seq2 }; var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }; seqAssembly = (OverlapDeNovoAssembly)assembler.Assemble(inputs); string actualString = seqAssembly.ToString(); const string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]"; Assert.AreEqual(expectedString, actualString.Replace(System.Environment.NewLine, "")); // Get the parameters from Xml int matchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost1 = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); var seq4 = new Sequence(alphabet, sequence1); var seq5 = new Sequence(alphabet, sequence2); var seq6 = new Sequence(alphabet, sequence3); var assembler1 = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold1, OverlapAlgorithm = new PairwiseOverlapAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore1, mismatchScore1), GapOpenCost = gapCost1, }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1), AssumeStandardOrientation = false, }; var inputs1 = new List <ISequence> { seq4, seq5, seq6 }; // Assembles all the sequences. seqAssembly = (OverlapDeNovoAssembly)assembler1.Assemble(inputs1); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); assembler1.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (OverlapDeNovoAssembly)assembler1.Assemble(inputs1); string expectedString1 = "TYMKWRRGCGCCAAAATTTAGGC" + System.Environment.NewLine; actualString = seqAssembly.ToString(); Assert.AreEqual(expectedString1, actualString); }
public void ValidateContigToString() { const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; string seq2Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode); string seq1Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode); ISequence seq1 = new Sequence(Alphabets.DNA, seq1Str); ISequence seq2 = new Sequence(Alphabets.DNA, seq2Str); var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> { seq1, seq2 }); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.OverlapDenovoExpectedNode); Assert.AreEqual(expectedString.Replace("\\r\\n", ""), actualString.Replace("\r\n", "")); // Get the parameters from Xml int matchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost1 = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); ISequence seq4 = new Sequence(alphabet, sequence1); ISequence seq5 = new Sequence(alphabet, sequence2); ISequence seq6 = new Sequence(alphabet, sequence3); var assembler1 = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold1, OverlapAlgorithm = new PairwiseOverlapAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore1, mismatchScore1), GapOpenCost = gapCost1 }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1), AssumeStandardOrientation = false }; // Assembles all the sequences. var seqAssembly1 = (IOverlapDeNovoAssembly)assembler1.Assemble(new List <ISequence> { seq4, seq5, seq6 }); Contig contig1 = seqAssembly1.Contigs[0]; string actualString1 = contig1.ToString(); const string expectedString1 = "TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT"; Assert.AreEqual(expectedString1, actualString1); }
public void TestSimpleSequenceAssemblerWithSwineflu() { Trace.Set(Trace.AssemblyDetails); // turn on log dump // test parameters const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var inputs = new List <ISequence> { seq1, seq2 }; var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; string expected = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTRYAGRYWMWYWWKSKAWMRRTTMWMMWGMSAMYRWWKMMACAGWMMYWGWARASAMWGTAMYAGWAAMRMAYKYWRYWRWMMYWCWMKMWGWYAASCWTMWMGRRRAMMWRYRYAAMSKRARASKRKKMRMMCYAWKRSRWKTRGSYMMATKKMAYWTKGSTRRMTGKAWCMTKGSWRRYYSRRWSYKKGRAWMWCYMKMSWSWGMAWSMYYMTSSWCMKMMAKYKYRKRRWCMTMYAKTKYRGAMAMWKSWASKTSWKACMMWGGARMKTKYWWCSMWKRWGAKKWSMTMRRWKAKSARKWGMKMWSAGWGYMATYRWKYKMARKGTYWKMRWTWKWMMSSWWKRMRAKWTYMYSSMMSAMWMRTKMMTSGMMCAAWSRTGWMWCGRMMRMAKGTSYWMMKGCWGSAKSWMMWMRYKYYKRMRMAAAWWKMWTMTRSMWARWTWWAAWAKGRMWWKYWWAMMMARRRMWYWSMWAMYCMWASMTYARYRAWWMMKRSAWWRAWGWYMWMGKGMWAKRRGKCMTYSWSCWWYSRKSYAYTMRYSMTSMMYMWMMWAGTSYYKAYCARMAWRSWSWYKMWYAKRWTKYWGWKGSRWMWKYWWKWKWSRGSWMRWMRWKMWAKMSSRARAWRKYMAWRMSRSMMAWAGYRAKRRRWCMMRAAGKGAGRRWKMAMKAWKRSWGRAYRMWMKWWKASYSGRSASWMRWARWRMCRKKMGAMRMAAYWRSAWWYSWAGYRRYWSSRARWYWWGYRKTMSCRAKRKAWRSAWWYGCWRKRKMWRGWAWTRYYRKWTCWGRTAYWMYMRTYYMMGATWSMMMWRYMMMYKRTYRSAMWMCMAMKKGTSMKAYAMMCAMSRGYSYYMYAWWYMMSARYMTMCMWYYKMWSAMWATWSRWMMRWKYMCAAWWKRWRWAWRWMSMAMAWAWKTRARAMKSRCMAMAKKRWKRMKGRMYRYMSSRTYKAKKMAWKYYMSRKSYMTWYWWKSKRSMRKYSYMKKTKKSRYYRWWGSSGGKTKSAYWGRRRKGGKRKRKRSAKGGWWSGKWKATSRMYRKYAMRRTKAKCASSRKYMARRWKAKSMRGSSKMMSKRWAKRSMRCMSASMWKRMSAKYRMMSAGAWTRCYAWYRAMGWRAWTWCTRWYAWWGWAAAKWYKRWTAYWSARWWSAYRRMWRTASRKWWMRMRKYMRWMSRYMWRGARWWMARMMWMSWGRAWWWAARWAWARARAWTKWWRATRRWWWMSTKGAYRWTKGKWYYYWSRAYRYYKRRMYKTWSRWTSYMKWRSWRWWKGWWMKAWYKKWRRAYKAMMRMRMTTYRRAYKWSMASRAYTYRWATGWRAAGRWMWKAWRYSARWWRRWAARMARYSMSMWRRAAAWYRRWRMCRRSKRMWTTGRAWWYKRCYRCWWWKRMKWTWACMMSWRMWKSGAWARYRYSWRMAWKGRRASTKWYRAMWAYSSRAMWTAYKMMKASSMARMAWAMTYARASRRAGMARAAWTARAYRGRGWARARMTRGAWKSRRYAARGMTKKAMYMRAYWWKGRYKWWCYAKWYWWYKGYSRYCWRTTCAWYKGTMSYSRKWKYMTYSSTRSKGGYARTCWSYYTSKGGRYRWKCWSTWWYKGGWYKYKMYMKWRTRGRWYWYKWMWKTRWAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "NeedlemanWunschAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "SmithwatermanAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "PairwiseOverlapAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new MUMmerAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "MUMmerAligner"); Assert.AreEqual(2, contig0.Sequences.Count); }
public void TestSimpleSequenceAssemblerWithRandomSequence() { // Test parameters. // // In theory, as long as all positions in the master sequence are // covered by at least one read, we should be able to pass this test. // But some parameter settings will make the test fail, for // various reasons, including: // 1. Short reads, caused by the strategy used to ensure full coverage // at the ends, might not score well enough to merge. // 2. Uncovered positions are always possible due to the random // generation of reads. (Increasing the number of reads helps with this) // 3. The assembler might construct the reverse or complement (or both) // of the master sequence. // 4. Too low a merge threshold could cause incorrect merges, which // the algorithm will not repair. int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 3; double consensusThreshold = 99; const int MasterLength = 100; const int MinReadLength = 10; const int MaxReadLength = 30; const int NumReads = 200; const bool AssumeOrientedReads = true; // if this is uncommented, assembly details appear in log. // this is extremely verbose. // Trace.Set(Trace.AssemblyDetails); // make random master sequence // (use seed for repeatability, or omit seed for // different test each time) // Random randGen = new Random(); Random randGen = new Random(654321); StringBuilder randSeq = new StringBuilder(); for (int i = 0; i < MasterLength; ++i) { int randm = randGen.Next(8); if (randm < 2) { randSeq.Append('A'); } else if (randm < 4) { randSeq.Append('C'); } else if (randm < 6) { randSeq.Append('G'); } else { randSeq.Append('T'); } } Sequence master = new Sequence(Alphabets.AmbiguousDNA, randSeq.ToString()); // create the reads List <ISequence> inputs = new List <ISequence>(); for (int i = 0; i < NumReads; ++i) { // try for uniform coverage clear to the ends (this can lead to short reads, though) int rndPos = Math.Max(0, randGen.Next(-MinReadLength, MasterLength - 1)); int rndLen = Math.Min(MasterLength - rndPos, randGen.Next(MinReadLength, MaxReadLength + 1)); string data = master.ConvertToString().Substring(Math.Max(0, rndPos), rndLen); bool revcomp = randGen.Next(2) > 0; bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads; ISequence read; if (reverse && revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString()); } else if (revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString()); } else { read = new Sequence(Alphabets.DNA, data); } ApplicationLog.WriteLine("read {0}: {1}", i, read); inputs.Add(read); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = AssumeOrientedReads; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); ApplicationLog.WriteLine( "Assembly finished. Contigs: {0}. Unmerged sequences: {1}.", seqAssembly.Contigs.Count, seqAssembly.UnmergedSequences.Count); Contig contig0 = seqAssembly.Contigs[0]; ApplicationLog.WriteLine("master sequence and contig 0 consensus:"); ApplicationLog.WriteLine(master.ConvertToString()); ApplicationLog.WriteLine(contig0.Consensus.ConvertToString()); Assert.AreEqual(2, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); // note that this is tricky, esp. without oriented reads - consensus // could be reversed and/or complemented relative to original Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString()); }
public void TestSimpleSequenceAssemblerWithSemiRandomSequence() { // test parameters int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 4; double consensusThreshold = 66; const int MasterLength = 30; const int ReadLength = 10; const int NumReads = 5; const bool AssumeOrientedReads = false; // if this is uncommented, assembly details appear in log. // this is extremely verbose. Trace.Set(Trace.AssemblyDetails); // make random master sequence // (use seed for repeatability, or omit seed for // different test each time) // Random randGen = new Random(); Random randGen = new Random(654321); StringBuilder randSeq = new StringBuilder(); for (int i = 0; i < MasterLength; ++i) { int randm = randGen.Next(8); if (randm < 2) { randSeq.Append('A'); } else if (randm < 4) { randSeq.Append('C'); } else if (randm < 6) { randSeq.Append('G'); } else { randSeq.Append('T'); } } Sequence master = new Sequence(Alphabets.DNA, randSeq.ToString()); // create the reads List <ISequence> inputs = new List <ISequence>(); for (int i = 0; i < NumReads; ++i) { int pos = 5 * i; string data = master.ConvertToString().Substring(pos, ReadLength); bool revcomp = randGen.Next(2) > 0; bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads; ISequence read; if (reverse && revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString()); } else if (revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString()); } else { read = new Sequence(Alphabets.DNA, data); } inputs.Add(read); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = AssumeOrientedReads; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; ApplicationLog.WriteLine("master sequence and contig 0 consensus:"); ApplicationLog.WriteLine(master.ConvertToString()); ApplicationLog.WriteLine(contig0.Consensus.ConvertToString()); // note that this is tricky, esp. without oriented reads - consensus // could be reversed and/or complemented relative to original Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString()); }
/// <summary> /// Validate Sequence Assembler Test cases based on additional parameter values /// </summary> /// <param name="additionalParameter">Addtional parameters</param> IOverlapDeNovoAssembly GetSequenceAssembly(string additionalParameter) { // Get the parameters from Xml int matchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); // Log based on the test cases switch (additionalParameter) { case "consensus": // Logs the sequences ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3)); break; default: // Logs the sequences ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3)); break; } Sequence seq1 = new Sequence(alphabet, sequence1); Sequence seq2 = new Sequence(alphabet, sequence2); Sequence seq3 = new Sequence(alphabet, sequence3); // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); inputs.Add(seq3); // Assembles all the sequences. return((IOverlapDeNovoAssembly)assembler.Assemble(inputs)); }
public void ValidateOverlapDenovoAssemblyToString() { int matchScore = 5; int mismatchScore = -4; int gapCost = -10; double mergeThreshold = 4; double consensusThreshold = 66; string seq2Str = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode); string seq1Str = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode); Sequence seq2 = new Sequence(Alphabets.DNA, seq2Str); Sequence seq1 = new Sequence(Alphabets.DNA, seq1Str); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new NeedlemanWunschAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (OverlapDeNovoAssembly)assembler.Assemble(inputs); string actualString = seqAssembly.ToString(); string expectedString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.OverlapDenovoExpectedNode); Assert.AreEqual(actualString.Replace("\r\n", ""), expectedString.Replace("\\r\\n", "")); // Get the parameters from Xml int matchScore1 = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore1 = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost1 = int.Parse(utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold1 = double.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold1 = double.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); Sequence seq4 = new Sequence(alphabet, sequence1); Sequence seq5 = new Sequence(alphabet, sequence2); Sequence seq6 = new Sequence(alphabet, sequence3); OverlapDeNovoAssembler assembler1 = new OverlapDeNovoAssembler(); assembler1.MergeThreshold = mergeThreshold1; assembler1.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler1.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore1, mismatchScore1); ((IPairwiseSequenceAligner)assembler1.OverlapAlgorithm).GapOpenCost = gapCost1; assembler1.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1); assembler1.AssumeStandardOrientation = false; List <ISequence> inputs1 = new List <ISequence>(); inputs1.Add(seq4); inputs1.Add(seq5); inputs1.Add(seq6); // Assembles all the sequences. IOverlapDeNovoAssembly seqAssembly1 = (IOverlapDeNovoAssembly)assembler1.Assemble(inputs1); Assert.AreEqual(0, seqAssembly1.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly1.Contigs.Count); assembler1.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (OverlapDeNovoAssembly)assembler1.Assemble(inputs1); string actualString1 = seqAssembly.ToString(); string expectedString1 = "TYMKWRRGCGCCAAAATTTAGGC\r\n"; Assert.AreEqual(actualString1, expectedString1); }
public void TestSimpleSequenceAssemblerWithSwineflu() { Trace.Set(Trace.AssemblyDetails); // turn on log dump // test parameters int matchScore = 5; int mismatchScore = -4; int gapCost = -10; double mergeThreshold = 4; double consensusThreshold = 66; Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new NeedlemanWunschAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings()); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings()); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings()); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new MUMmerAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; Assert.AreEqual("ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA", contig0.Consensus.ToStrings()); Assert.AreEqual(2, contig0.Sequences.Count); }
public void TestSequenceAssemblyWithBinaryFormatter() { Stream stream = null; try { stream = File.Open("SequenceAssembly.data", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); IOverlapDeNovoAssembly seqAssembly = null; #region Create OverlapDeNovoAssembly by calling OverlapDeNovoAssembler.Assembly() int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 3; double consensusThreshold = 99; const bool AssumeOrientedReads = true; OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix( matchScore, mismatchScore, MoleculeType.DNA); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = AssumeOrientedReads; Sequence seq1 = new Sequence(Alphabets.DNA, "ACGACACG"); Sequence seq2 = new Sequence(Alphabets.DNA, "ACGACCGGAGG"); Sequence seq3 = new Sequence(Alphabets.DNA, "TTTTTT"); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence>() { seq1, seq2, seq3 }); #endregion Create OverlapDeNovoAssembly by calling OverlapDeNovoAssembler.Assembly() formatter.Serialize(stream, seqAssembly); stream.Seek(0, SeekOrigin.Begin); IOverlapDeNovoAssembly deserializedseqAssembly = (IOverlapDeNovoAssembly)formatter.Deserialize(stream); Assert.AreNotSame(seqAssembly, deserializedseqAssembly); Assert.AreEqual(seqAssembly.Contigs.Count, deserializedseqAssembly.Contigs.Count); for (int i = 0; i < seqAssembly.Contigs.Count; i++) { Assert.AreEqual( seqAssembly.Contigs[i].Consensus.ToString(), deserializedseqAssembly.Contigs[i].Consensus.ToString()); Assert.AreEqual( seqAssembly.Contigs[i].Sequences.Count, deserializedseqAssembly.Contigs[i].Sequences.Count); for (int j = 0; j < seqAssembly.Contigs[i].Sequences.Count; j++) { Assert.AreEqual( seqAssembly.Contigs[i].Sequences[j].ToString(), deserializedseqAssembly.Contigs[i].Sequences[j].ToString()); } } for (int i = 0; i < seqAssembly.UnmergedSequences.Count; i++) { Assert.AreEqual( seqAssembly.UnmergedSequences[i].ToString(), deserializedseqAssembly.UnmergedSequences[i].ToString()); } } catch (Exception) { Assert.Fail(); } finally { if (stream != null) { stream.Close(); stream = null; } } }
/// <summary> /// Validate Sequence Assembler Test cases based on additional parameter values /// </summary> /// <param name="additionalParameter">Addtional parameters</param> void ValidateSequenceAssemblerGeneral(string additionalParameter) { // Get the parameters from Xml int matchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); MoleculeType molType = Utility.GetMoleculeType(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.MoleculeTypeNode)); // Log based on the test cases switch (additionalParameter) { case "consensus": // Logs the sequences ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1)); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2)); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3)); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3)); break; default: // Logs the sequences ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1)); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2)); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3)); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3)); break; } Sequence seq1 = new Sequence(alphabet, sequence1); Sequence seq2 = new Sequence(alphabet, sequence2); Sequence seq3 = new Sequence(alphabet, sequence3); // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); inputs.Add(seq3); // Assembles all the sequences. IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); // Get the parameters from Xml in general int contigSequencesCount = int.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ContigSequencesCountNode), null); string contigConsensus = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ContigConsensusNode); switch (additionalParameter.ToLower(CultureInfo.CurrentCulture)) { case "assemble": // Get the parameters from Xml for Assemble() method test cases. int unMergedCount = int.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.UnMergedSequencesCountNode), null); int contigsCount = int.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ContigsCountNode), null); Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count); Assert.AreEqual(contigsCount, assembly.Contigs.Count); Contig contigRead = assembly.Contigs[0]; // Logs the concensus ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); Assert.AreEqual(contigConsensus, contigRead.Consensus.ToString()); Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count); break; case "contig": // Read the contig from Contig method. Contig contigsRead = assembly.Contigs[0]; // Log the required info. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigsRead.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigsRead.Consensus.ToString())); ApplicationLog.WriteLine("SequenceAssembly BVT : Successfully read the Contig."); Console.WriteLine("SequenceAssembly BVT : Successfully read the Contig."); Assert.AreEqual(contigConsensus, contigsRead.Consensus.ToString()); Assert.AreEqual(contigSequencesCount, contigsRead.Sequences.Count); break; case "consensus": // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler(); simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); // Log the required info. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Assert.AreEqual(contigConsensus, contigReadForConsensus.Consensus.ToString()); break; default: break; } }