Ejemplo n.º 1
0
        public void AssemblerTestWithScaffoldBuilder()
        {
            const int kmerLength         = 6;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 7;

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = kmerLength;
                assembler.DanglingLinksThreshold       = dangleThreshold;
                assembler.RedundantPathLengthThreshold = redundantThreshold;

                assembler.ScaffoldRedundancy = 0;
                assembler.Depth = 3;
                CloneLibrary.Instance.AddLibrary("abc", 5, 20);

                PadenaAssembly result = (PadenaAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true);

                HashSet <string> expectedContigs = new HashSet <string>
                {
                    "TTTTTT", "CGCGCG", "TTAGCGCG", "CGCGCCGCGC", "GCGCGC", "TTTTTA", "TTTTAA", "TTTAAA", "TTTTAGC", "ATGCCTCCTATCTTAGC"
                };

                AlignmentHelpers.CompareSequenceLists(expectedContigs, result.ContigSequences);

                HashSet <string> expectedScaffolds = new HashSet <string>
                {
                    "ATGCCTCCTATCTTAGCGCGC", "TTTAAA", "TTTTTT", "TTTTAGC", "TTTTAA", "CGCGCCGCGC", "TTTTTA", "CGCGCG"
                };

                AlignmentHelpers.CompareSequenceLists(expectedScaffolds, result.Scaffolds);
            }
        }
Ejemplo n.º 2
0
        public void TestDeBruijnGraphBuilderTiny()
        {
            const int        KmerLength = 3;
            List <ISequence> reads      = TestInputs.GetTinyReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(9, graph.NodeCount);
            HashSet <string> nodeStrings = new HashSet <string>(graph.GetNodes().Select(n =>
                                                                                        new string(graph.GetNodeSequence(n).Select(a => (char)a).ToArray())));

            Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT"));
            Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA"));
            Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC"));
            Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA"));
            Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG"));
            Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG"));
            Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA"));
            Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT"));
            Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG"));
            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(31, totalEdges);
        }
Ejemplo n.º 3
0
        public void TestDeBruijnGraphBuilderSmall()
        {
            const int        KmerLength = 6;
            List <ISequence> reads      = TestInputs.GetSmallReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(20, graph.NodeCount);
            HashSet <string> nodeStrings = GetGraphNodesForSmallReads();
            string           nodeStr, nodeStrRC;

            foreach (DeBruijnNode node in graph.GetNodes())
            {
                nodeStr   = new string(graph.GetNodeSequence(node).Select(a => (char)a).ToArray());
                nodeStrRC = new string(graph.GetNodeSequence(node).GetReverseComplementedSequence().Select(a => (char)a).ToArray());
                Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC));
            }

            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(51, totalEdges);
        }
Ejemplo n.º 4
0
        public void AssemblerTest()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = KmerLength;
                assembler.DanglingLinksThreshold       = DangleThreshold;
                assembler.RedundantPathLengthThreshold = RedundantThreshold;
                IDeNovoAssembly result = assembler.Assemble(readSeqs);

                // Compare the two graphs
                Assert.AreEqual(1, result.AssembledSequences.Count());
                HashSet <string> expectedContigs = new HashSet <string>()
                {
                    "ATCGCTAGCATCGAACGATCATT"
                };

                foreach (ISequence contig in result.AssembledSequences)
                {
                    Assert.IsTrue(expectedContigs.Contains(new string(contig.Select(a => (char)a).ToArray())));
                }
            }
        }
Ejemplo n.º 5
0
        public void TestContigBuilder2()
        {
            const int KmerLength = 6;
            const int RedundantThreshold = 10;

            List<ISequence> readSeqs = TestInputs.GetRedundantPathReads();
            SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            RemoveRedundancy();
            long graphCount = Graph.NodeCount;
            long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            IEnumerable<ISequence> contigs = BuildContigs();
            long contigsBuiltGraphCount = Graph.NodeCount;
            long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count());
            string s = new string(contigs.ElementAt(0).Select(a => (char)a).ToArray());
            Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", s);
            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Ejemplo n.º 6
0
        public void TestRedundantPathsPurger()
        {
            const int KmerLength         = 5;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            this.SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            this.RedundantPathLengthThreshold = RedundantThreshold;
            this.RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);

            this.CreateGraph();
            long graphCount = this.Graph.NodeCount;
            long graphEdges = this.Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            this.RemoveRedundancy();
            long redundancyRemovedGraphCount = this.Graph.NodeCount;
            long redundancyRemovedGraphEdge  = this.Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(5, graphCount - redundancyRemovedGraphCount);
            Assert.AreEqual(12, graphEdges - redundancyRemovedGraphEdge);
        }
Ejemplo n.º 7
0
        public void TestContigBuilder1()
        {
            const int KmerLength = 11;
            const int DangleThreshold = 3;
            const int RedundantThreshold = 10;

            List<ISequence> readSeqs = TestInputs.GetDanglingReads();
            this.SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger = new DanglingLinksPurger(DangleThreshold);
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            UnDangleGraph();
            RemoveRedundancy();
            long graphCount = Graph.NodeCount;
            long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            IEnumerable<ISequence> contigs = BuildContigs();
            long contigsBuiltGraphCount = this.Graph.NodeCount;
            long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count());
            HashSet<string> expectedContigs = new HashSet<string>()
            {
                "ATCGCTAGCATCGAACGATCATT"
            };

            foreach (ISequence contig in contigs)
            {
                string s = new string(contig.Select(a => (char)a).ToArray());
                Assert.IsTrue(expectedContigs.Contains(s));
            }

            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Ejemplo n.º 8
0
        public void TestDanglingLinksPurger()
        {
            const int KmerLength      = 11;
            const int DangleThreshold = 3;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            long graphCount = Graph.NodeCount;

            long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            var  graphNodes = Graph.GetNodes().Select(n => Graph.GetNodeSequence(n)).ToList();

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();

            long dangleRemovedGraphCount = Graph.NodeCount;
            long dangleRemovedGraphEdge  = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            var  dangleRemovedGraphNodes = Graph.GetNodes().Select(n => Graph.GetNodeSequence(n)).ToList();

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            var checkList = graphNodes.Except(dangleRemovedGraphNodes, new SequenceEqualityComparer());

            HashSet <string> expected = new HashSet <string> {
                "ATCGAACGATG", "TCGAACGATGA"
            };

            AlignmentHelpers.CompareSequenceLists(expected, checkList);
        }