public void AssemblerTestWithScaffoldBuilder() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = kmerLength; assembler.DanglingLinksThreshold = dangleThreshold; assembler.RedundantPathLengthThreshold = redundantThreshold; assembler.ScaffoldRedundancy = 0; assembler.Depth = 3; CloneLibrary.Instance.AddLibrary("abc", 5, 20); PadenaAssembly result = (PadenaAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true); HashSet <string> expectedContigs = new HashSet <string> { "TTTTTT", "CGCGCG", "TTAGCGCG", "CGCGCCGCGC", "GCGCGC", "TTTTTA", "TTTTAA", "TTTAAA", "TTTTAGC", "ATGCCTCCTATCTTAGC" }; AlignmentHelpers.CompareSequenceLists(expectedContigs, result.ContigSequences); HashSet <string> expectedScaffolds = new HashSet <string> { "ATGCCTCCTATCTTAGCGCGC", "TTTAAA", "TTTTTT", "TTTTAGC", "TTTTAA", "CGCGCCGCGC", "TTTTTA", "CGCGCG" }; AlignmentHelpers.CompareSequenceLists(expectedScaffolds, result.Scaffolds); } }
public void TestDeBruijnGraphBuilderTiny() { const int KmerLength = 3; List <ISequence> reads = TestInputs.GetTinyReads(); this.KmerLength = KmerLength; this.SequenceReads.Clear(); this.SetSequenceReads(reads); this.CreateGraph(); DeBruijnGraph graph = this.Graph; Assert.AreEqual(9, graph.NodeCount); HashSet <string> nodeStrings = new HashSet <string>(graph.GetNodes().Select(n => new string(graph.GetNodeSequence(n).Select(a => (char)a).ToArray()))); Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT")); Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA")); Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC")); Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA")); Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG")); Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG")); Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA")); Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT")); Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG")); long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); Assert.AreEqual(31, totalEdges); }
public void TestDeBruijnGraphBuilderSmall() { const int KmerLength = 6; List <ISequence> reads = TestInputs.GetSmallReads(); this.KmerLength = KmerLength; this.SequenceReads.Clear(); this.SetSequenceReads(reads); this.CreateGraph(); DeBruijnGraph graph = this.Graph; Assert.AreEqual(20, graph.NodeCount); HashSet <string> nodeStrings = GetGraphNodesForSmallReads(); string nodeStr, nodeStrRC; foreach (DeBruijnNode node in graph.GetNodes()) { nodeStr = new string(graph.GetNodeSequence(node).Select(a => (char)a).ToArray()); nodeStrRC = new string(graph.GetNodeSequence(node).GetReverseComplementedSequence().Select(a => (char)a).ToArray()); Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC)); } long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); Assert.AreEqual(51, totalEdges); }
public void AssemblerTest() { const int KmerLength = 11; const int DangleThreshold = 3; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = KmerLength; assembler.DanglingLinksThreshold = DangleThreshold; assembler.RedundantPathLengthThreshold = RedundantThreshold; IDeNovoAssembly result = assembler.Assemble(readSeqs); // Compare the two graphs Assert.AreEqual(1, result.AssembledSequences.Count()); HashSet <string> expectedContigs = new HashSet <string>() { "ATCGCTAGCATCGAACGATCATT" }; foreach (ISequence contig in result.AssembledSequences) { Assert.IsTrue(expectedContigs.Contains(new string(contig.Select(a => (char)a).ToArray()))); } } }
public void TestContigBuilder2() { const int KmerLength = 6; const int RedundantThreshold = 10; List<ISequence> readSeqs = TestInputs.GetRedundantPathReads(); SequenceReads.Clear(); this.SetSequenceReads(readSeqs); this.KmerLength = KmerLength; RedundantPathLengthThreshold = RedundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); ContigBuilder = new SimplePathContigBuilder(); CreateGraph(); RemoveRedundancy(); long graphCount = Graph.NodeCount; long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); IEnumerable<ISequence> contigs = BuildContigs(); long contigsBuiltGraphCount = Graph.NodeCount; long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(1, contigs.Count()); string s = new string(contigs.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", s); Assert.AreEqual(graphCount, contigsBuiltGraphCount); Assert.AreEqual(graphEdges, contigsBuilt); }
public void TestRedundantPathsPurger() { const int KmerLength = 5; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetRedundantPathReads(); this.SequenceReads.Clear(); this.SetSequenceReads(readSeqs); this.KmerLength = KmerLength; this.RedundantPathLengthThreshold = RedundantThreshold; this.RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); this.CreateGraph(); long graphCount = this.Graph.NodeCount; long graphEdges = this.Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); this.RemoveRedundancy(); long redundancyRemovedGraphCount = this.Graph.NodeCount; long redundancyRemovedGraphEdge = this.Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(5, graphCount - redundancyRemovedGraphCount); Assert.AreEqual(12, graphEdges - redundancyRemovedGraphEdge); }
public void TestContigBuilder1() { const int KmerLength = 11; const int DangleThreshold = 3; const int RedundantThreshold = 10; List<ISequence> readSeqs = TestInputs.GetDanglingReads(); this.SequenceReads.Clear(); this.SetSequenceReads(readSeqs); this.KmerLength = KmerLength; DanglingLinksThreshold = DangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(DangleThreshold); RedundantPathLengthThreshold = RedundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); ContigBuilder = new SimplePathContigBuilder(); CreateGraph(); UnDangleGraph(); RemoveRedundancy(); long graphCount = Graph.NodeCount; long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); IEnumerable<ISequence> contigs = BuildContigs(); long contigsBuiltGraphCount = this.Graph.NodeCount; long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(1, contigs.Count()); HashSet<string> expectedContigs = new HashSet<string>() { "ATCGCTAGCATCGAACGATCATT" }; foreach (ISequence contig in contigs) { string s = new string(contig.Select(a => (char)a).ToArray()); Assert.IsTrue(expectedContigs.Contains(s)); } Assert.AreEqual(graphCount, contigsBuiltGraphCount); Assert.AreEqual(graphEdges, contigsBuilt); }
public void TestDanglingLinksPurger() { const int KmerLength = 11; const int DangleThreshold = 3; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); SequenceReads.Clear(); this.SetSequenceReads(readSeqs); this.KmerLength = KmerLength; DanglingLinksThreshold = DangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(DangleThreshold); CreateGraph(); long graphCount = Graph.NodeCount; long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); var graphNodes = Graph.GetNodes().Select(n => Graph.GetNodeSequence(n)).ToList(); DanglingLinksThreshold = DangleThreshold; UnDangleGraph(); long dangleRemovedGraphCount = Graph.NodeCount; long dangleRemovedGraphEdge = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); var dangleRemovedGraphNodes = Graph.GetNodes().Select(n => Graph.GetNodeSequence(n)).ToList(); // Compare the two graphs Assert.AreEqual(2, graphCount - dangleRemovedGraphCount); Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge); var checkList = graphNodes.Except(dangleRemovedGraphNodes, new SequenceEqualityComparer()); HashSet <string> expected = new HashSet <string> { "ATCGAACGATG", "TCGAACGATGA" }; AlignmentHelpers.CompareSequenceLists(expected, checkList); }