Пример #1
0
        public void TestDanglingLinksPurger()
        {
            const int        KmerLength      = 11;
            const int        DangleThreshold = 3;
            List <ISequence> readSeqs        = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> graphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();
            int dangleRemovedGraphCount = Graph.Nodes.Count;
            int dangleRemovedGraphEdge  = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> dangleRemovedGraphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            graphNodes.ExceptWith(dangleRemovedGraphNodes);
            Assert.IsTrue(graphNodes.Contains("TCGAACGATGA"));
            Assert.IsTrue(graphNodes.Contains("ATCGAACGATG"));
        }
Пример #2
0
        public void AssemblerTest()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = KmerLength;
                assembler.DanglingLinksThreshold       = DangleThreshold;
                assembler.RedundantPathLengthThreshold = RedundantThreshold;
                IDeNovoAssembly result = assembler.Assemble(readSeqs);

                // Compare the two graphs
                Assert.AreEqual(1, result.AssembledSequences.Count);
                HashSet <string> expectedContigs = new HashSet <string>()
                {
                    "ATCGCTAGCATCGAACGATCATT"
                };

                foreach (ISequence contig in result.AssembledSequences)
                {
                    Assert.IsTrue(expectedContigs.Contains(contig.ToString()));
                }
            }
        }
Пример #3
0
        public void TestContigBuilder1()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength              = KmerLength;
            DanglingLinksThreshold       = DangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(DangleThreshold);
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            UnDangleGraph();
            RemoveRedundancy();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            IList <ISequence> contigs  = BuildContigs();
            int contigsBuiltGraphCount = this.Graph.Nodes.Count;
            int contigsBuilt           = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count);
            HashSet <string> expectedContigs = new HashSet <string>()
            {
                "ATCGCTAGCATCGAACGATCATT"
            };

            foreach (ISequence contig in contigs)
            {
                Assert.IsTrue(expectedContigs.Contains(contig.ToString()));
            }

            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Пример #4
0
        public void TestContigGraphBuilder1()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            this.SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength                   = KmerLength;
            this.DanglingLinksThreshold       = DangleThreshold;
            this.RedundantPathLengthThreshold = RedundantThreshold;
            this.DanglingLinksPurger          = new DanglingLinksPurger(DangleThreshold);
            this.RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            this.ContigBuilder                = new SimplePathContigBuilder();

            this.CreateGraph();
            this.UnDangleGraph();
            this.RemoveRedundancy();
            IList <ISequence> contigs = this.BuildContigs();

            this.Graph.BuildContigGraph(contigs, KmerLength);

            int contigGraphCount = this.Graph.Nodes.Count;
            int contigGraphEdges = this.Graph.Nodes.Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(contigs.Count, contigGraphCount);
            Assert.AreEqual(0, contigGraphEdges);
            HashSet <string> contigSeqs = new HashSet <string>(contigs.Select(c => c.ToString()));

            foreach (DeBruijnNode node in this.Graph.Nodes)
            {
                Assert.IsTrue(contigSeqs.Contains(this.Graph.GetNodeSequence(node).ToString()));
            }
        }