Exemplo n.º 1
0
        public void TestDeBruijnGraphBuilderTiny()
        {
            const int        KmerLength = 3;
            List <ISequence> reads      = TestInputs.GetTinyReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(9, graph.NodeCount);
            HashSet <string> nodeStrings = new HashSet <string>(graph.GetNodes().Select(n =>
                                                                                        new string(graph.GetNodeSequence(n).Select(a => (char)a).ToArray())));

            Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT"));
            Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA"));
            Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC"));
            Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA"));
            Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG"));
            Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG"));
            Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA"));
            Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT"));
            Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG"));
            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(31, totalEdges);
        }
Exemplo n.º 2
0
        public void AssemblerTest()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = KmerLength;
                assembler.DanglingLinksThreshold       = DangleThreshold;
                assembler.RedundantPathLengthThreshold = RedundantThreshold;
                IDeNovoAssembly result = assembler.Assemble(readSeqs);

                // Compare the two graphs
                Assert.AreEqual(1, result.AssembledSequences.Count());
                HashSet <string> expectedContigs = new HashSet <string>()
                {
                    "ATCGCTAGCATCGAACGATCATT"
                };

                foreach (ISequence contig in result.AssembledSequences)
                {
                    Assert.IsTrue(expectedContigs.Contains(new string(contig.Select(a => (char)a).ToArray())));
                }
            }
        }
Exemplo n.º 3
0
        public void TestRedundantPathsPurger()
        {
            const int KmerLength         = 5;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            this.SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            this.RedundantPathLengthThreshold = RedundantThreshold;
            this.RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);

            this.CreateGraph();
            long graphCount = this.Graph.NodeCount;
            long graphEdges = this.Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            this.RemoveRedundancy();
            long redundancyRemovedGraphCount = this.Graph.NodeCount;
            long redundancyRemovedGraphEdge  = this.Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(5, graphCount - redundancyRemovedGraphCount);
            Assert.AreEqual(12, graphEdges - redundancyRemovedGraphEdge);
        }
Exemplo n.º 4
0
        public void BuildScaffold()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = new List <ISequence>(TestInputs.GetReadsForScaffolds());

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.SetSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();

            Assert.AreEqual(contigs.Count, 10);
            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);
            GraphScaffoldBuilder    scaffold    = new GraphScaffoldBuilder();
            IEnumerable <ISequence> scaffoldSeq = scaffold.BuildScaffold(
                sequences, contigs, this.KmerLength, 3, 0);

            Assert.AreEqual(scaffoldSeq.Count(), 8);
            Assert.IsTrue(new string(scaffoldSeq.First().Select(a => (char)a).ToArray()).Equals(
                              "ATGCCTCCTATCTTAGCGCGC"));
            scaffold.Dispose();
        }
Exemplo n.º 5
0
        public void TestDeBruijnGraphBuilderSmall()
        {
            const int        KmerLength = 6;
            List <ISequence> reads      = TestInputs.GetSmallReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(20, graph.NodeCount);
            HashSet <string> nodeStrings = GetGraphNodesForSmallReads();
            string           nodeStr, nodeStrRC;

            foreach (DeBruijnNode node in graph.GetNodes())
            {
                nodeStr   = new string(graph.GetNodeSequence(node).Select(a => (char)a).ToArray());
                nodeStrRC = new string(graph.GetNodeSequence(node).GetReverseComplementedSequence().Select(a => (char)a).ToArray());
                Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC));
            }

            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(51, totalEdges);
        }
Exemplo n.º 6
0
        public void TestContigBuilder2()
        {
            const int KmerLength         = 6;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetRedundantPathReads();

            SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength = KmerLength;
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            RemoveRedundancy();
            long graphCount = Graph.NodeCount;
            long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            IEnumerable <ISequence> contigs = BuildContigs();
            long contigsBuiltGraphCount     = Graph.NodeCount;
            long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count());
            string s = new string(contigs.ElementAt(0).Select(a => (char)a).ToArray());

            Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", s);
            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Exemplo n.º 7
0
        public void AssemblerTestWithScaffoldBuilder()
        {
            const int kmerLength         = 6;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 7;

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = kmerLength;
                assembler.DanglingLinksThreshold       = dangleThreshold;
                assembler.RedundantPathLengthThreshold = redundantThreshold;

                assembler.ScaffoldRedundancy = 0;
                assembler.Depth = 3;
                CloneLibrary.Instance.AddLibrary("abc", 5, 20);

                PadenaAssembly result = (PadenaAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true);

                HashSet <string> expectedContigs = new HashSet <string>
                {
                    "TTTTTT", "CGCGCG", "TTAGCGCG", "CGCGCCGCGC", "GCGCGC", "TTTTTA", "TTTTAA", "TTTAAA", "TTTTAGC", "ATGCCTCCTATCTTAGC"
                };

                AlignmentHelpers.CompareSequenceLists(expectedContigs, result.ContigSequences);

                HashSet <string> expectedScaffolds = new HashSet <string>
                {
                    "ATGCCTCCTATCTTAGCGCGC", "TTTAAA", "TTTTTT", "TTTTAGC", "TTTTAA", "CGCGCCGCGC", "TTTTTA", "CGCGCG"
                };

                AlignmentHelpers.CompareSequenceLists(expectedScaffolds, result.Scaffolds);
            }
        }
Exemplo n.º 8
0
        public void BuildScaffold()
        {
            const int kmerLength         = 6;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 7;
            var       sequences          = new List <ISequence>(TestInputs.GetReadsForScaffolds());

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.SetSequenceReads(sequences);
            CreateGraph();

            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();

            CloneLibrary.Instance.AddLibrary("abc", 5, 20);

            using (GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder())
            {
                IEnumerable <ISequence> scaffoldSeq = scaffold.BuildScaffold(sequences, contigs, this.KmerLength, 3, 0);
                HashSet <string>        expected    = new HashSet <string>
                {
                    "ATGCCTCCTATCTTAGCGCGC", "CGCGCCGCGC", "TTTTTT", "CGCGCG", "TTTTAGC", "TTTTTA", "TTTAAA", "TTTTAA",
                };
                AlignmentHelpers.CompareSequenceLists(expected, scaffoldSeq);
            }
        }
Exemplo n.º 9
0
        public void TestContigBuilder1()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            this.SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength              = KmerLength;
            DanglingLinksThreshold       = DangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(DangleThreshold);
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            UnDangleGraph();
            RemoveRedundancy();
            long graphCount = Graph.NodeCount;
            long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            IEnumerable <ISequence> contigs = BuildContigs();
            long contigsBuiltGraphCount     = this.Graph.NodeCount;
            long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count());
            HashSet <string> expectedContigs = new HashSet <string>()
            {
                "ATCGCTAGCATCGAACGATCATT"
            };

            foreach (ISequence contig in contigs)
            {
                string s = new string(contig.Select(a => (char)a).ToArray());
                Assert.IsTrue(expectedContigs.Contains(s));
            }

            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Exemplo n.º 10
0
        public void TestDanglingLinksPurger()
        {
            const int KmerLength      = 11;
            const int DangleThreshold = 3;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            long graphCount = Graph.NodeCount;

            long             graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            HashSet <string> graphNodes = new HashSet <string>(Graph.GetNodes().Select(n => Graph.GetNodeSequence(n).ConvertToString()));

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();

            long             dangleRemovedGraphCount = Graph.NodeCount;
            long             dangleRemovedGraphEdge  = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            HashSet <string> dangleRemovedGraphNodes = new HashSet <string>(Graph.GetNodes().Select(n => Graph.GetNodeSequence(n).ConvertToString()));

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            graphNodes.ExceptWith(dangleRemovedGraphNodes);

            HashSet <string> expected = new HashSet <string> {
                "ATCGAACGATG", "TCGAACGATGA"
            };

            AlignmentHelpers.CompareSequenceLists(expected, graphNodes);
        }
Exemplo n.º 11
0
        public void TestDanglingLinksPurger()
        {
            const int        KmerLength      = 11;
            const int        DangleThreshold = 3;
            List <ISequence> readSeqs        = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            long graphCount = Graph.NodeCount;

            long             graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            HashSet <string> graphNodes = new HashSet <string>(
                Graph.GetNodes().Select(n => new string(Graph.GetNodeSequence(n).Select(a => (char)a).ToArray())));

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();
            long             dangleRemovedGraphCount = Graph.NodeCount;
            long             dangleRemovedGraphEdge  = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            HashSet <string> dangleRemovedGraphNodes = new HashSet <string>(
                Graph.GetNodes().Select(n =>
            {
                return(new string(Graph.GetNodeSequence(n).Select(a => (char)a).ToArray()));
            }));

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            graphNodes.ExceptWith(dangleRemovedGraphNodes);
            Assert.IsTrue(graphNodes.Contains("TCGAACGATGA"));
            Assert.IsTrue(graphNodes.Contains("ATCGAACGATG"));
        }
Exemplo n.º 12
0
        public void AssemblerTestWithScaffoldBuilder()
        {
            const int kmerLength         = 6;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 7;

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = kmerLength;
                assembler.DanglingLinksThreshold       = dangleThreshold;
                assembler.RedundantPathLengthThreshold = redundantThreshold;

                assembler.ScaffoldRedundancy = 0;
                assembler.Depth = 3;
                CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);

                PadenaAssembly result = (PadenaAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true);

                Assert.AreEqual(10, result.ContigSequences.Count());

                HashSet <string> expectedContigs = new HashSet <string>
                {
                    "GCGCGC",
                    "TTTTTT",
                    "TTTTTA",
                    "TTTTAA",
                    "TTTAAA",
                    "ATGCCTCCTATCTTAGC",
                    "TTTTAGC",
                    "TTAGCGCG",
                    "CGCGCCGCGC",
                    "CGCGCG"
                };

                foreach (ISequence contig in result.ContigSequences)
                {
                    string contigSeq = new string(contig.Select(a => (char)a).ToArray());
                    Assert.IsTrue(
                        expectedContigs.Contains(contigSeq) ||
                        expectedContigs.Contains(contigSeq.GetReverseComplement(new char[contigSeq.Length])));
                }

                Assert.AreEqual(8, result.Scaffolds.Count());
                HashSet <string> expectedScaffolds = new HashSet <string>
                {
                    "ATGCCTCCTATCTTAGCGCGC",
                    "TTTTTT",
                    "TTTTTA",
                    "TTTTAA",
                    "TTTAAA",
                    "CGCGCCGCGC",
                    "TTTTAGC",
                    "CGCGCG"
                };

                foreach (ISequence scaffold in result.Scaffolds)
                {
                    string scaffoldSeq = new string(scaffold.Select(a => (char)a).ToArray());
                    Assert.IsTrue(
                        expectedScaffolds.Contains(scaffoldSeq) ||
                        expectedScaffolds.Contains(scaffoldSeq.GetReverseComplement(new char[scaffoldSeq.Length])));
                }
            }
        }