public void BuildScaffold() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; List <ISequence> sequences = TestInputs.GetReadsForScaffolds(); KmerLength = kmerLength; SequenceReads.Clear(); this.AddSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs(); DeBruijnGraph graph = Graph; CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20); GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder(); IList <ISequence> scaffoldSeq = scaffold.BuildScaffold( sequences, contigs, this.KmerLength, 3, 0); Assert.AreEqual(scaffoldSeq.Count, 8); Assert.IsTrue(scaffoldSeq[0].ToString().Equals( "ATGCCTCCTATCTTAGCGCGC")); }
public void TestDanglingLinksPurger() { const int KmerLength = 11; const int DangleThreshold = 3; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); SequenceReads.Clear(); this.AddSequenceReads(readSeqs); this.KmerLength = KmerLength; DanglingLinksThreshold = DangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(DangleThreshold); CreateGraph(); int graphCount = Graph.Nodes.Count; int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); HashSet <string> graphNodes = new HashSet <string>( Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString())); DanglingLinksThreshold = DangleThreshold; UnDangleGraph(); int dangleRemovedGraphCount = Graph.Nodes.Count; int dangleRemovedGraphEdge = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); HashSet <string> dangleRemovedGraphNodes = new HashSet <string>( Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString())); // Compare the two graphs Assert.AreEqual(2, graphCount - dangleRemovedGraphCount); Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge); graphNodes.ExceptWith(dangleRemovedGraphNodes); Assert.IsTrue(graphNodes.Contains("TCGAACGATGA")); Assert.IsTrue(graphNodes.Contains("ATCGAACGATG")); }
public void TestRedundantPathsPurger() { const int KmerLength = 5; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetRedundantPathReads(); this.SequenceReads.Clear(); this.AddSequenceReads(readSeqs); this.KmerLength = KmerLength; this.RedundantPathLengthThreshold = RedundantThreshold; this.RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); this.CreateGraph(); int graphCount = this.Graph.Nodes.Count; int graphEdges = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); this.RemoveRedundancy(); int redundancyRemovedGraphCount = this.Graph.Nodes.Count; int redundancyRemovedGraphEdge = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(5, graphCount - redundancyRemovedGraphCount); Assert.AreEqual(12, graphEdges - redundancyRemovedGraphEdge); }
public void TestDeBruijnGraphBuilderTiny() { const int KmerLength = 3; List <ISequence> reads = TestInputs.GetTinyReads(); this.KmerLength = KmerLength; this.SequenceReads.Clear(); this.AddSequenceReads(reads); this.CreateGraph(); DeBruijnGraph graph = this.Graph; Assert.AreEqual(9, graph.Nodes.Count); HashSet <string> nodeStrings = new HashSet <string>(graph.Nodes.Select(n => graph.GetNodeSequence(n).ToString())); Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT")); Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA")); Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC")); Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA")); Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG")); Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG")); Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA")); Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT")); Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG")); int totalEdges = graph.Nodes.Select(n => n.ExtensionsCount).Sum(); Assert.AreEqual(31, totalEdges); }
public void TestDeBruijnGraphBuilderSmall() { const int KmerLength = 6; List <ISequence> reads = TestInputs.GetSmallReads(); this.KmerLength = KmerLength; this.SequenceReads.Clear(); this.AddSequenceReads(reads); this.CreateGraph(); DeBruijnGraph graph = this.Graph; Assert.AreEqual(20, graph.Nodes.Count); HashSet <string> nodeStrings = GetGraphNodesForSmallReads(); string nodeStr, nodeStrRC; foreach (DeBruijnNode node in graph.Nodes) { nodeStr = graph.GetNodeSequence(node).ToString(); nodeStrRC = graph.GetNodeSequence(node).ReverseComplement.ToString(); Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC)); } int totalEdges = graph.Nodes.Select(n => n.ExtensionsCount).Sum(); Assert.AreEqual(51, totalEdges); }
public void TestContigBuilder2() { const int KmerLength = 6; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetRedundantPathReads(); SequenceReads.Clear(); this.AddSequenceReads(readSeqs); this.KmerLength = KmerLength; RedundantPathLengthThreshold = RedundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); ContigBuilder = new SimplePathContigBuilder(); CreateGraph(); RemoveRedundancy(); int graphCount = Graph.Nodes.Count; int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); IList <ISequence> contigs = BuildContigs(); int contigsBuiltGraphCount = Graph.Nodes.Count; int contigsBuilt = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(1, contigs.Count); Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", contigs[0].ToString()); Assert.AreEqual(graphCount, contigsBuiltGraphCount); Assert.AreEqual(graphEdges, contigsBuilt); }
public void AssemblerTest() { const int KmerLength = 11; const int DangleThreshold = 3; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler(); assembler.KmerLength = KmerLength; assembler.DanglingLinksThreshold = DangleThreshold; assembler.RedundantPathLengthThreshold = RedundantThreshold; IDeNovoAssembly result = assembler.Assemble(readSeqs); // Compare the two graphs Assert.AreEqual(1, result.AssembledSequences.Count); HashSet <string> expectedContigs = new HashSet <string>() { "ATCGCTAGCATCGAACGATCATT" }; foreach (ISequence contig in result.AssembledSequences) { Assert.IsTrue(expectedContigs.Contains(contig.ToString())); } }
public void TestContigGraphBuilder2() { const int KmerLength = 6; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetRedundantPathReads(); this.SequenceReads.Clear(); this.AddSequenceReads(readSeqs); this.KmerLength = KmerLength; this.RedundantPathLengthThreshold = RedundantThreshold; this.RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); this.ContigBuilder = new SimplePathContigBuilder(); this.CreateGraph(); this.RemoveRedundancy(); IList <ISequence> contigs = this.BuildContigs(); this.Graph.BuildContigGraph(contigs, KmerLength); int contigGraphCount = this.Graph.Nodes.Count; int contigGraphEdges = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); Assert.AreEqual(contigs.Count, contigGraphCount); Assert.AreEqual(0, contigGraphEdges); HashSet <string> contigSeqs = new HashSet <string>(contigs.Select(c => c.ToString())); foreach (DeBruijnNode node in this.Graph.Nodes) { Assert.IsTrue(contigSeqs.Contains(this.Graph.GetNodeSequence(node).ToString())); } }
public void TestContigBuilder1() { const int KmerLength = 11; const int DangleThreshold = 3; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); this.SequenceReads.Clear(); this.AddSequenceReads(readSeqs); this.KmerLength = KmerLength; DanglingLinksThreshold = DangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(DangleThreshold); RedundantPathLengthThreshold = RedundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); ContigBuilder = new SimplePathContigBuilder(); CreateGraph(); UnDangleGraph(); RemoveRedundancy(); int graphCount = Graph.Nodes.Count; int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); IList <ISequence> contigs = BuildContigs(); int contigsBuiltGraphCount = this.Graph.Nodes.Count; int contigsBuilt = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(1, contigs.Count); HashSet <string> expectedContigs = new HashSet <string>() { "ATCGCTAGCATCGAACGATCATT" }; foreach (ISequence contig in contigs) { Assert.IsTrue(expectedContigs.Contains(contig.ToString())); } Assert.AreEqual(graphCount, contigsBuiltGraphCount); Assert.AreEqual(graphEdges, contigsBuilt); }
public void AssemblerTestWithScaffoldBuilder() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler(); assembler.KmerLength = kmerLength; assembler.DanglingLinksThreshold = dangleThreshold; assembler.RedundantPathLengthThreshold = redundantThreshold; assembler.ScaffoldRedundancy = 0; assembler.Depth = 3; CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20); PaDeNAAssembly result = (PaDeNAAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true); Assert.AreEqual(10, result.ContigSequences.Count); HashSet <string> expectedContigs = new HashSet <string> { "GCGCGC", "TTTTTT", "TTTTTA", "TTTTAA", "TTTAAA", "ATGCCTCCTATCTTAGC", "TTTTAGC", "TTAGCGCG", "CGCGCCGCGC", "CGCGCG" }; foreach (ISequence contig in result.ContigSequences) { string contigSeq = contig.ToString(); Assert.IsTrue( expectedContigs.Contains(contigSeq) || expectedContigs.Contains(contigSeq.GetReverseComplement(new char[contigSeq.Length]))); } Assert.AreEqual(8, result.Scaffolds.Count); HashSet <string> expectedScaffolds = new HashSet <string> { "ATGCCTCCTATCTTAGCGCGC", "TTTTTT", "TTTTTA", "TTTTAA", "TTTAAA", "CGCGCCGCGC", "TTTTAGC", "CGCGCG" }; foreach (ISequence scaffold in result.Scaffolds) { string scaffoldSeq = scaffold.ToString(); Assert.IsTrue( expectedScaffolds.Contains(scaffoldSeq) || expectedScaffolds.Contains(scaffoldSeq.GetReverseComplement(new char[scaffoldSeq.Length]))); } }