Beispiel #1
0
        public void TestRedundantPathsPurger()
        {
            const int KmerLength         = 5;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            this.RedundantPathLengthThreshold = RedundantThreshold;
            this.RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);

            this.CreateGraph();
            int graphCount = this.Graph.Nodes.Count;
            int graphEdges = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            this.RemoveRedundancy();
            int redundancyRemovedGraphCount = this.Graph.Nodes.Count;
            int redundancyRemovedGraphEdge  = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(5, graphCount - redundancyRemovedGraphCount);
            Assert.AreEqual(12, graphEdges - redundancyRemovedGraphEdge);
        }
Beispiel #2
0
        public void TestDeBruijnGraphBuilderTiny()
        {
            const int        KmerLength = 3;
            List <ISequence> reads      = TestInputs.GetTinyReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.AddSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(9, graph.Nodes.Count);
            HashSet <string> nodeStrings = new HashSet <string>(graph.Nodes.Select(n => graph.GetNodeSequence(n).ToString()));

            Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT"));
            Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA"));
            Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC"));
            Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA"));
            Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG"));
            Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG"));
            Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA"));
            Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT"));
            Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG"));
            int totalEdges = graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(31, totalEdges);
        }
Beispiel #3
0
        public void TestDeBruijnGraphBuilderSmall()
        {
            const int        KmerLength = 6;
            List <ISequence> reads      = TestInputs.GetSmallReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.AddSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(20, graph.Nodes.Count);
            HashSet <string> nodeStrings = GetGraphNodesForSmallReads();
            string           nodeStr, nodeStrRC;

            foreach (DeBruijnNode node in graph.Nodes)
            {
                nodeStr   = graph.GetNodeSequence(node).ToString();
                nodeStrRC = graph.GetNodeSequence(node).ReverseComplement.ToString();
                Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC));
            }

            int totalEdges = graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(51, totalEdges);
        }
Beispiel #4
0
        public void BuildScaffold()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = TestInputs.GetReadsForScaffolds();

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);
            GraphScaffoldBuilder scaffold    = new GraphScaffoldBuilder();
            IList <ISequence>    scaffoldSeq = scaffold.BuildScaffold(
                sequences, contigs, this.KmerLength, 3, 0);

            Assert.AreEqual(scaffoldSeq.Count, 8);
            Assert.IsTrue(scaffoldSeq[0].ToString().Equals(
                              "ATGCCTCCTATCTTAGCGCGC"));
            scaffold.Dispose();
        }
Beispiel #5
0
        public void TestDanglingLinksPurger()
        {
            const int        KmerLength      = 11;
            const int        DangleThreshold = 3;
            List <ISequence> readSeqs        = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> graphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();
            int dangleRemovedGraphCount = Graph.Nodes.Count;
            int dangleRemovedGraphEdge  = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> dangleRemovedGraphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            graphNodes.ExceptWith(dangleRemovedGraphNodes);
            Assert.IsTrue(graphNodes.Contains("TCGAACGATGA"));
            Assert.IsTrue(graphNodes.Contains("ATCGAACGATG"));
        }
Beispiel #6
0
        public void AssemblerTest()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = KmerLength;
                assembler.DanglingLinksThreshold       = DangleThreshold;
                assembler.RedundantPathLengthThreshold = RedundantThreshold;
                IDeNovoAssembly result = assembler.Assemble(readSeqs);

                // Compare the two graphs
                Assert.AreEqual(1, result.AssembledSequences.Count);
                HashSet <string> expectedContigs = new HashSet <string>()
                {
                    "ATCGCTAGCATCGAACGATCATT"
                };

                foreach (ISequence contig in result.AssembledSequences)
                {
                    Assert.IsTrue(expectedContigs.Contains(contig.ToString()));
                }
            }
        }
Beispiel #7
0
        public void TestContigBuilder2()
        {
            const int KmerLength         = 6;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            RemoveRedundancy();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            IList <ISequence> contigs  = BuildContigs();
            int contigsBuiltGraphCount = Graph.Nodes.Count;
            int contigsBuilt           = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count);
            Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", contigs[0].ToString());
            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Beispiel #8
0
        public void TestContigGraphBuilder2()
        {
            const int KmerLength         = 6;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            this.RedundantPathLengthThreshold = RedundantThreshold;
            this.RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            this.ContigBuilder = new SimplePathContigBuilder();

            this.CreateGraph();
            this.RemoveRedundancy();
            IList <ISequence> contigs = this.BuildContigs();

            this.Graph.BuildContigGraph(contigs, KmerLength);

            int contigGraphCount = this.Graph.Nodes.Count;
            int contigGraphEdges = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(contigs.Count, contigGraphCount);
            Assert.AreEqual(0, contigGraphEdges);
            HashSet <string> contigSeqs = new HashSet <string>(contigs.Select(c => c.ToString()));

            foreach (DeBruijnNode node in this.Graph.Nodes)
            {
                Assert.IsTrue(contigSeqs.Contains(this.Graph.GetNodeSequence(node).ToString()));
            }
        }
Beispiel #9
0
        public void TestContigBuilder1()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength              = KmerLength;
            DanglingLinksThreshold       = DangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(DangleThreshold);
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            UnDangleGraph();
            RemoveRedundancy();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            IList <ISequence> contigs  = BuildContigs();
            int contigsBuiltGraphCount = this.Graph.Nodes.Count;
            int contigsBuilt           = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count);
            HashSet <string> expectedContigs = new HashSet <string>()
            {
                "ATCGCTAGCATCGAACGATCATT"
            };

            foreach (ISequence contig in contigs)
            {
                Assert.IsTrue(expectedContigs.Contains(contig.ToString()));
            }

            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Beispiel #10
0
        public void AssemblerTestWithScaffoldBuilder()
        {
            const int kmerLength         = 6;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 7;

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = kmerLength;
                assembler.DanglingLinksThreshold       = dangleThreshold;
                assembler.RedundantPathLengthThreshold = redundantThreshold;

                assembler.ScaffoldRedundancy = 0;
                assembler.Depth = 3;
                CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);

                PaDeNAAssembly result = (PaDeNAAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true);

                Assert.AreEqual(10, result.ContigSequences.Count);

                HashSet <string> expectedContigs = new HashSet <string>
                {
                    "GCGCGC",
                    "TTTTTT",
                    "TTTTTA",
                    "TTTTAA",
                    "TTTAAA",
                    "ATGCCTCCTATCTTAGC",
                    "TTTTAGC",
                    "TTAGCGCG",
                    "CGCGCCGCGC",
                    "CGCGCG"
                };

                foreach (ISequence contig in result.ContigSequences)
                {
                    string contigSeq = contig.ToString();
                    Assert.IsTrue(
                        expectedContigs.Contains(contigSeq) ||
                        expectedContigs.Contains(contigSeq.GetReverseComplement(new char[contigSeq.Length])));
                }

                Assert.AreEqual(8, result.Scaffolds.Count);
                HashSet <string> expectedScaffolds = new HashSet <string>
                {
                    "ATGCCTCCTATCTTAGCGCGC",
                    "TTTTTT",
                    "TTTTTA",
                    "TTTTAA",
                    "TTTAAA",
                    "CGCGCCGCGC",
                    "TTTTAGC",
                    "CGCGCG"
                };

                foreach (ISequence scaffold in result.Scaffolds)
                {
                    string scaffoldSeq = scaffold.ToString();
                    Assert.IsTrue(
                        expectedScaffolds.Contains(scaffoldSeq) ||
                        expectedScaffolds.Contains(scaffoldSeq.GetReverseComplement(new char[scaffoldSeq.Length])));
                }
            }
        }