Esempio n. 1
0
        public void BuildScaffold()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = TestInputs.GetReadsForScaffolds();

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();
            DeBruijnGraph     graph   = Graph;

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);
            GraphScaffoldBuilder scaffold    = new GraphScaffoldBuilder();
            IList <ISequence>    scaffoldSeq = scaffold.BuildScaffold(
                sequences, contigs, this.KmerLength, 3, 0);

            Assert.AreEqual(scaffoldSeq.Count, 8);
            Assert.IsTrue(scaffoldSeq[0].ToString().Equals(
                              "ATGCCTCCTATCTTAGCGCGC"));
        }
Esempio n. 2
0
        public void TestDanglingLinksPurger()
        {
            const int        KmerLength      = 11;
            const int        DangleThreshold = 3;
            List <ISequence> readSeqs        = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> graphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();
            int dangleRemovedGraphCount = Graph.Nodes.Count;
            int dangleRemovedGraphEdge  = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> dangleRemovedGraphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            graphNodes.ExceptWith(dangleRemovedGraphNodes);
            Assert.IsTrue(graphNodes.Contains("TCGAACGATGA"));
            Assert.IsTrue(graphNodes.Contains("ATCGAACGATG"));
        }
Esempio n. 3
0
        public void TestRedundantPathsPurger()
        {
            const int KmerLength         = 5;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            this.RedundantPathLengthThreshold = RedundantThreshold;
            this.RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);

            this.CreateGraph();
            int graphCount = this.Graph.Nodes.Count;
            int graphEdges = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            this.RemoveRedundancy();
            int redundancyRemovedGraphCount = this.Graph.Nodes.Count;
            int redundancyRemovedGraphEdge  = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(5, graphCount - redundancyRemovedGraphCount);
            Assert.AreEqual(12, graphEdges - redundancyRemovedGraphEdge);
        }
Esempio n. 4
0
        public void TestDeBruijnGraphBuilderTiny()
        {
            const int        KmerLength = 3;
            List <ISequence> reads      = TestInputs.GetTinyReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.AddSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(9, graph.Nodes.Count);
            HashSet <string> nodeStrings = new HashSet <string>(graph.Nodes.Select(n => graph.GetNodeSequence(n).ToString()));

            Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT"));
            Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA"));
            Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC"));
            Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA"));
            Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG"));
            Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG"));
            Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA"));
            Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT"));
            Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG"));
            int totalEdges = graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(31, totalEdges);
        }
Esempio n. 5
0
        public void TestDeBruijnGraphBuilderSmall()
        {
            const int        KmerLength = 6;
            List <ISequence> reads      = TestInputs.GetSmallReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.AddSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(20, graph.Nodes.Count);
            HashSet <string> nodeStrings = GetGraphNodesForSmallReads();
            string           nodeStr, nodeStrRC;

            foreach (DeBruijnNode node in graph.Nodes)
            {
                nodeStr   = graph.GetNodeSequence(node).ToString();
                nodeStrRC = graph.GetNodeSequence(node).ReverseComplement.ToString();
                Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC));
            }

            int totalEdges = graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(51, totalEdges);
        }
Esempio n. 6
0
        public void TestContigBuilder2()
        {
            const int KmerLength         = 6;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            RemoveRedundancy();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            IList <ISequence> contigs  = BuildContigs();
            int contigsBuiltGraphCount = Graph.Nodes.Count;
            int contigsBuilt           = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count);
            Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", contigs[0].ToString());
            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Esempio n. 7
0
        public void AssemblerTest()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence>        readSeqs  = TestInputs.GetDanglingReads();
            ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler();

            assembler.KmerLength                   = KmerLength;
            assembler.DanglingLinksThreshold       = DangleThreshold;
            assembler.RedundantPathLengthThreshold = RedundantThreshold;
            IDeNovoAssembly result = assembler.Assemble(readSeqs);

            // Compare the two graphs
            Assert.AreEqual(1, result.AssembledSequences.Count);
            HashSet <string> expectedContigs = new HashSet <string>()
            {
                "ATCGCTAGCATCGAACGATCATT"
            };

            foreach (ISequence contig in result.AssembledSequences)
            {
                Assert.IsTrue(expectedContigs.Contains(contig.ToString()));
            }
        }
Esempio n. 8
0
        public void TestContigGraphBuilder2()
        {
            const int KmerLength         = 6;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            this.RedundantPathLengthThreshold = RedundantThreshold;
            this.RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            this.ContigBuilder = new SimplePathContigBuilder();

            this.CreateGraph();
            this.RemoveRedundancy();
            IList <ISequence> contigs = this.BuildContigs();

            this.Graph.BuildContigGraph(contigs, KmerLength);

            int contigGraphCount = this.Graph.Nodes.Count;
            int contigGraphEdges = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(contigs.Count, contigGraphCount);
            Assert.AreEqual(0, contigGraphEdges);
            HashSet <string> contigSeqs = new HashSet <string>(contigs.Select(c => c.ToString()));

            foreach (DeBruijnNode node in this.Graph.Nodes)
            {
                Assert.IsTrue(contigSeqs.Contains(this.Graph.GetNodeSequence(node).ToString()));
            }
        }
Esempio n. 9
0
        public void TestContigBuilder1()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength              = KmerLength;
            DanglingLinksThreshold       = DangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(DangleThreshold);
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            UnDangleGraph();
            RemoveRedundancy();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            IList <ISequence> contigs  = BuildContigs();
            int contigsBuiltGraphCount = this.Graph.Nodes.Count;
            int contigsBuilt           = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count);
            HashSet <string> expectedContigs = new HashSet <string>()
            {
                "ATCGCTAGCATCGAACGATCATT"
            };

            foreach (ISequence contig in contigs)
            {
                Assert.IsTrue(expectedContigs.Contains(contig.ToString()));
            }

            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Esempio n. 10
0
        public void AssemblerTestWithScaffoldBuilder()
        {
            const int kmerLength         = 6;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 7;

            ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler();

            assembler.KmerLength                   = kmerLength;
            assembler.DanglingLinksThreshold       = dangleThreshold;
            assembler.RedundantPathLengthThreshold = redundantThreshold;

            assembler.ScaffoldRedundancy = 0;
            assembler.Depth = 3;
            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);

            PaDeNAAssembly result = (PaDeNAAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true);

            Assert.AreEqual(10, result.ContigSequences.Count);
            HashSet <string> expectedContigs = new HashSet <string>
            {
                "GCGCGC",
                "TTTTTT",
                "TTTTTA",
                "TTTTAA",
                "TTTAAA",
                "ATGCCTCCTATCTTAGC",
                "TTTTAGC",
                "TTAGCGCG",
                "CGCGCCGCGC",
                "CGCGCG"
            };

            foreach (ISequence contig in result.ContigSequences)
            {
                string contigSeq = contig.ToString();
                Assert.IsTrue(
                    expectedContigs.Contains(contigSeq) ||
                    expectedContigs.Contains(contigSeq.GetReverseComplement(new char[contigSeq.Length])));
            }

            Assert.AreEqual(8, result.Scaffolds.Count);
            HashSet <string> expectedScaffolds = new HashSet <string>
            {
                "ATGCCTCCTATCTTAGCGCGC",
                "TTTTTT",
                "TTTTTA",
                "TTTTAA",
                "TTTAAA",
                "CGCGCCGCGC",
                "TTTTAGC",
                "CGCGCG"
            };

            foreach (ISequence scaffold in result.Scaffolds)
            {
                string scaffoldSeq = scaffold.ToString();
                Assert.IsTrue(
                    expectedScaffolds.Contains(scaffoldSeq) ||
                    expectedScaffolds.Contains(scaffoldSeq.GetReverseComplement(new char[scaffoldSeq.Length])));
            }
        }