Example #1
0
        public void BuildScaffold()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = TestInputs.GetReadsForScaffolds();

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();
            DeBruijnGraph     graph   = Graph;

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);
            GraphScaffoldBuilder scaffold    = new GraphScaffoldBuilder();
            IList <ISequence>    scaffoldSeq = scaffold.BuildScaffold(
                sequences, contigs, this.KmerLength, 3, 0);

            Assert.AreEqual(scaffoldSeq.Count, 8);
            Assert.IsTrue(scaffoldSeq[0].ToString().Equals(
                              "ATGCCTCCTATCTTAGCGCGC"));
        }
Example #2
0
        public void TestDanglingLinksPurger()
        {
            const int        KmerLength      = 11;
            const int        DangleThreshold = 3;
            List <ISequence> readSeqs        = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.AddSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            int graphCount = Graph.Nodes.Count;
            int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> graphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();
            int dangleRemovedGraphCount = Graph.Nodes.Count;
            int dangleRemovedGraphEdge  = Graph.Nodes.Select(n => n.ExtensionsCount).Sum();
            HashSet <string> dangleRemovedGraphNodes = new HashSet <string>(
                Graph.Nodes.Select(n => Graph.GetNodeSequence(n).ToString()));

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            graphNodes.ExceptWith(dangleRemovedGraphNodes);
            Assert.IsTrue(graphNodes.Contains("TCGAACGATGA"));
            Assert.IsTrue(graphNodes.Contains("ATCGAACGATG"));
        }
Example #3
0
        public void BuildScaffold()
        {
            const int kmerLength         = 6;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 7;
            var       sequences          = new List <ISequence>(TestInputs.GetReadsForScaffolds());

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.SetSequenceReads(sequences);
            CreateGraph();

            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();

            CloneLibrary.Instance.AddLibrary("abc", 5, 20);

            using (GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder())
            {
                IEnumerable <ISequence> scaffoldSeq = scaffold.BuildScaffold(sequences, contigs, this.KmerLength, 3, 0);
                HashSet <string>        expected    = new HashSet <string>
                {
                    "ATGCCTCCTATCTTAGCGCGC", "CGCGCCGCGC", "TTTTTT", "CGCGCG", "TTTTAGC", "TTTTTA", "TTTAAA", "TTTTAA",
                };
                AlignmentHelpers.CompareSequenceLists(expected, scaffoldSeq);
            }
        }
Example #4
0
        public void BuildScaffold()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = new List <ISequence>(TestInputs.GetReadsForScaffolds());

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.SetSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();

            Assert.AreEqual(contigs.Count, 10);
            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);
            GraphScaffoldBuilder    scaffold    = new GraphScaffoldBuilder();
            IEnumerable <ISequence> scaffoldSeq = scaffold.BuildScaffold(
                sequences, contigs, this.KmerLength, 3, 0);

            Assert.AreEqual(scaffoldSeq.Count(), 8);
            Assert.IsTrue(new string(scaffoldSeq.First().Select(a => (char)a).ToArray()).Equals(
                              "ATGCCTCCTATCTTAGCGCGC"));
            scaffold.Dispose();
        }
Example #5
0
        public void TestContigBuilder1()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            this.SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength              = KmerLength;
            DanglingLinksThreshold       = DangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(DangleThreshold);
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            ContigBuilder = new SimplePathContigBuilder();

            CreateGraph();
            UnDangleGraph();
            RemoveRedundancy();
            long graphCount = Graph.NodeCount;
            long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            IEnumerable <ISequence> contigs = BuildContigs();
            long contigsBuiltGraphCount     = this.Graph.NodeCount;
            long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            // Compare the two graphs
            Assert.AreEqual(1, contigs.Count());
            HashSet <string> expectedContigs = new HashSet <string>()
            {
                "ATCGCTAGCATCGAACGATCATT"
            };

            foreach (ISequence contig in contigs)
            {
                string s = new string(contig.Select(a => (char)a).ToArray());
                Assert.IsTrue(expectedContigs.Contains(s));
            }

            Assert.AreEqual(graphCount, contigsBuiltGraphCount);
            Assert.AreEqual(graphEdges, contigsBuilt);
        }
Example #6
0
        public void TestDanglingLinksPurger()
        {
            const int KmerLength      = 11;
            const int DangleThreshold = 3;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            SequenceReads.Clear();
            this.SetSequenceReads(readSeqs);
            this.KmerLength        = KmerLength;
            DanglingLinksThreshold = DangleThreshold;
            DanglingLinksPurger    = new DanglingLinksPurger(DangleThreshold);

            CreateGraph();
            long graphCount = Graph.NodeCount;

            long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            var  graphNodes = Graph.GetNodes().Select(n => Graph.GetNodeSequence(n)).ToList();

            DanglingLinksThreshold = DangleThreshold;
            UnDangleGraph();

            long dangleRemovedGraphCount = Graph.NodeCount;
            long dangleRemovedGraphEdge  = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum();
            var  dangleRemovedGraphNodes = Graph.GetNodes().Select(n => Graph.GetNodeSequence(n)).ToList();

            // Compare the two graphs
            Assert.AreEqual(2, graphCount - dangleRemovedGraphCount);
            Assert.AreEqual(4, graphEdges - dangleRemovedGraphEdge);
            var checkList = graphNodes.Except(dangleRemovedGraphNodes, new SequenceEqualityComparer());

            HashSet <string> expected = new HashSet <string> {
                "ATCGAACGATG", "TCGAACGATGA"
            };

            AlignmentHelpers.CompareSequenceLists(expected, checkList);
        }
Example #7
0
        public void TracePathTestWithPalindromicContig()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = new List <ISequence>();
            Sequence         seq = new Sequence(Alphabets.DNA, "ATGCCTC");

            seq.DisplayID = ">10.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CCTCCTAT");
            seq.DisplayID = "1";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TCCTATC");
            seq.DisplayID = "2";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TGCCTCCT");
            seq.DisplayID = "3";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "ATCTTAGC");
            seq.DisplayID = "4";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTATCTTAG");
            seq.DisplayID = "5";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTTAGCG");
            seq.DisplayID = "6";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCCTCCTAT");
            seq.DisplayID = ">8.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAGCGCGCTA");
            seq.DisplayID = ">8.y1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "AGCGCGC");
            seq.DisplayID = ">9.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTT");
            seq.DisplayID = "7";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTAAA");
            seq.DisplayID = "8";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAAAAA");
            seq.DisplayID = "9";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTAG");
            seq.DisplayID = "10";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTAGC");
            seq.DisplayID = "11";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG");
            seq.DisplayID = "12";
            sequences.Add(seq);

            KmerLength = kmerLength;
            SequenceReads.Clear();
            AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLength);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            ContigMatePairs overlap;
            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator();

            dist.CalculateDistance(overlap);
            Graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(Graph, overlap, kmerLength, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath  scaffold = paths.First();
            DeBruijnGraph graph    = Graph;

            Assert.IsTrue(graph.GetNodeSequence(scaffold[0].Key).ToString().Equals("ATGCCTCCTATCTTAGC"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[1].Key).ToString().Equals("TTAGCGCG"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[2].Key).ToString().Equals("GCGCGC"));
        }
Example #8
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst    = 5;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 6;

            var sequences = new List <ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC")
                {
                    ID = "0"
                },
                new Sequence(Alphabets.DNA, "CCTCCTAT")
                {
                    ID = "1"
                },
                new Sequence(Alphabets.DNA, "TCCTATC")
                {
                    ID = "2"
                },
                new Sequence(Alphabets.DNA, "TGCCTCCT")
                {
                    ID = "3"
                },
                new Sequence(Alphabets.DNA, "ATCTTAGC")
                {
                    ID = "4"
                },
                new Sequence(Alphabets.DNA, "CTATCTTAG")
                {
                    ID = "5"
                },
                new Sequence(Alphabets.DNA, "CTTAGCG")
                {
                    ID = "6"
                },
                new Sequence(Alphabets.DNA, "GCCTCCTAT")
                {
                    ID = "7"
                },
                new Sequence(Alphabets.DNA, "TAGCGCGCTA")
                {
                    ID = "8"
                },
                new Sequence(Alphabets.DNA, "AGCGCGC")
                {
                    ID = "9"
                },
                new Sequence(Alphabets.DNA, "TTTTTT")
                {
                    ID = "10"
                },
                new Sequence(Alphabets.DNA, "TTTTTAAA")
                {
                    ID = "11"
                },
                new Sequence(Alphabets.DNA, "TAAAAA")
                {
                    ID = "12"
                },
                new Sequence(Alphabets.DNA, "TTTTAG")
                {
                    ID = "13"
                },
                new Sequence(Alphabets.DNA, "TTTAGC")
                {
                    ID = "14"
                },
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG")
                {
                    ID = "15"
                },
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();

            SetSequenceReads(sequences);
            CreateGraph();

            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);

            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            ContigMatePairs    overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist    = new DistanceCalculator(overlap);

            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();

            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }