示例#1
0
        private bool Compare(ScaffoldPath path, IList <DeBruijnNode> contig)
        {
            if (path.Count == contig.Count)
            {
                for (int index = 0; index < contig.Count; index++)
                {
                    if (path[index].Key != contig[index])
                    {
                        return(false);
                    }
                }

                return(true);
            }
            else
            {
                return(false);
            }
        }
示例#2
0
 /// <summary>
 /// Remove containing paths.
 /// </summary>
 /// <param name="scaffoldPath">Current path.</param>
 /// <param name="path">Path to be compared with.</param>
 /// <returns>Containing paths or not.</returns>
 private static bool RemoveContainingPaths(
     ScaffoldPath scaffoldPath,
     ScaffoldPath path)
 {
     if (scaffoldPath.Count >= path.Count)
     {
         if (path.All(t => scaffoldPath.Where(k => k.Key == t.Key).ToList().Count > 0))
         {
             return true;
         }
         return false;
     }
     
     if (scaffoldPath.All(t => path.Where(k => k.Key == t.Key).ToList().Count > 0))
     {
         scaffoldPath.Clear();
         scaffoldPath.AddRange(path);
         return true;
     }
     
     return false;
 }
示例#3
0
        public void TracePathTestWithPalindromicContig()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = new List <ISequence>();
            Sequence         seq = new Sequence(Alphabets.DNA, "ATGCCTC");

            seq.DisplayID = ">10.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CCTCCTAT");
            seq.DisplayID = "1";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TCCTATC");
            seq.DisplayID = "2";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TGCCTCCT");
            seq.DisplayID = "3";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "ATCTTAGC");
            seq.DisplayID = "4";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTATCTTAG");
            seq.DisplayID = "5";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTTAGCG");
            seq.DisplayID = "6";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCCTCCTAT");
            seq.DisplayID = ">8.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAGCGCGCTA");
            seq.DisplayID = ">8.y1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "AGCGCGC");
            seq.DisplayID = ">9.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTT");
            seq.DisplayID = "7";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTAAA");
            seq.DisplayID = "8";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAAAAA");
            seq.DisplayID = "9";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTAG");
            seq.DisplayID = "10";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTAGC");
            seq.DisplayID = "11";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG");
            seq.DisplayID = "12";
            sequences.Add(seq);

            KmerLength = kmerLength;
            SequenceReads.Clear();
            AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLength);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            ContigMatePairs overlap;
            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator();

            dist.CalculateDistance(overlap);
            Graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(Graph, overlap, kmerLength, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath  scaffold = paths.First();
            DeBruijnGraph graph    = Graph;

            Assert.IsTrue(graph.GetNodeSequence(scaffold[0].Key).ToString().Equals("ATGCCTCCTATCTTAGC"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[1].Key).ToString().Equals("TTAGCGCG"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[2].Key).ToString().Equals("GCGCGC"));
        }
示例#4
0
        public void PathPurger1()
        {
            const int        kmerLength = 7;
            List <ISequence> sequences  = new List <ISequence>();

            sequences.Add(new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG"));
            this.KmerLength = kmerLength;
            this.AddSequenceReads(sequences);
            this.CreateGraph();
            List <DeBruijnNode>  contigs = this.Graph.Nodes.ToList();
            IList <ScaffoldPath> paths   =
                new List <ScaffoldPath>();
            ScaffoldPath path = new ScaffoldPath();

            foreach (DeBruijnNode node in contigs)
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(2, 5))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(3, 5))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(6, 5))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(0, 11))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(7, 4))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(11, 0))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(2, 9))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (DeBruijnNode node in contigs.GetRange(1, 10))
            {
                path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null));
            }

            paths.Add(path);
            PathPurger assembler = new PathPurger();

            assembler.PurgePath(paths);
            Assert.AreEqual(paths.Count, 1);
            Assert.IsTrue(Compare(paths.First(), contigs));
        }
示例#5
0
        public void PathPurger1()
        {
            const int         KmerLength      = 7;
            ISequence         sequence        = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG");
            IList <ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList();
            ContigGraph       graph           = new ContigGraph();

            graph.BuildContigGraph(contigsSequence, KmerLength);
            List <Node>          contigs = graph.Nodes.ToList();
            IList <ScaffoldPath> paths   =
                new List <ScaffoldPath>();
            ScaffoldPath path = new ScaffoldPath();

            foreach (Node node in contigs)
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(2, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(3, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(6, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(0, 11))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(7, 4))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(11, 0))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(2, 9))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(1, 10))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            PathPurger assembler = new PathPurger();

            assembler.PurgePath(paths);
            Assert.AreEqual(paths.Count, 1);
            Assert.IsTrue(Compare(paths.First(), contigs));
        }
示例#6
0
        /// <summary>
        /// Removes Overlapping paths by generating pairwise overlaps between paths.
        /// </summary>
        /// <param name="scaffoldPath">Current path.</param>
        /// <param name="path">Path to be compared with.</param>
        /// <returns>Overlapping paths or not.</returns>
        private static bool RemoveOverlappingPaths(
            ScaffoldPath scaffoldPath,
            ScaffoldPath path)
        {
            // Generate Overlap Matrix [Similar To Pairwise Overlap aligner] 
            bool[,] matrix = new bool[scaffoldPath.Count, path.Count];
            for (int index = 0; index < scaffoldPath.Count; index++)
            {
                for (int index1 = 0; index1 < path.Count; index1++)
                {
                    matrix.SetValue(scaffoldPath[index].Key == path[index1].Key, index, index1);
                }
            }

            // Search in last row for a match.
            int startPosOfRow = -1;
            for (int index = scaffoldPath.Count - 1; index >= 0; index--)
            {
                if ((bool)matrix.GetValue(index, path.Count - 1))
                {
                    int index1 = 1;
                    while (path.Count - 1 - index1 >= 0 && index - index1 >= 0)
                    {
                        if ((bool)matrix.GetValue(index - index1, path.Count - 1 - index1))
                        {
                            index1++;
                        }
                        else
                        {
                            break;
                        }
                    }

                    if (path.Count - 1 - index1 <= 0 || index - index1 <= 0)
                    {
                        startPosOfRow = index;
                        break;
                    }
                }
            }

            // Search in last column for match.
            int startPosOfCol = -1;
            for (int index = path.Count - 2; index >= 0; index--)
            {
                if ((bool)matrix.GetValue(scaffoldPath.Count - 1, index))
                {
                    int index1 = 1;
                    while (scaffoldPath.Count - 1 - index1 > 0 && index - index1 > 0)
                    {
                        if ((bool)matrix.GetValue(scaffoldPath.Count - 1 - index1, index - index1))
                        {
                            index1++;
                        }
                        else
                        {
                            break;
                        }
                    }

                    if (scaffoldPath.Count - 1 - index1 <= 0 || index - index1 <= 0)
                    {
                        startPosOfCol = index;
                        break;
                    }
                }
            }

            if (startPosOfCol != -1 || startPosOfRow != -1)
            {
                if (startPosOfRow >= startPosOfCol)
                {
                    StitchPath(scaffoldPath, path, startPosOfRow, path.Count - 1);
                    return true;
                }
                StitchPath(scaffoldPath, path, scaffoldPath.Count - 1, startPosOfCol);
                return true;
            }

            return false;
        }
示例#7
0
        /// <summary>
        /// Search for containing and overlapping paths.
        /// </summary>
        /// <param name="scaffoldPath">Current Path.</param>
        /// <param name="isConsumed">Path status.</param>
        /// <returns>Update list or not.</returns>
        private bool SearchContainingAndOverlappingPaths(
            ScaffoldPath scaffoldPath,
            bool[] isConsumed)
        {
            bool isUpdated = false;
            for (int index = 0; index < this.internalScaffoldPaths.Count; index++)
            {
                if (!isConsumed[index] && scaffoldPath != this.internalScaffoldPaths[index])
                {
                    if (RemoveContainingPaths(scaffoldPath, this.internalScaffoldPaths[index]))
                    {
                        isConsumed[index] = true;
                        isUpdated = true;
                    }
                    else
                    {
                        if (RemoveOverlappingPaths(scaffoldPath, this.internalScaffoldPaths[index]))
                        {
                            isConsumed[index] = true;
                            isUpdated = true;
                        }
                    }
                }
            }

            return isUpdated;
        }
示例#8
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst    = 5;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 6;

            var sequences = new List <ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC")
                {
                    ID = "0"
                },
                new Sequence(Alphabets.DNA, "CCTCCTAT")
                {
                    ID = "1"
                },
                new Sequence(Alphabets.DNA, "TCCTATC")
                {
                    ID = "2"
                },
                new Sequence(Alphabets.DNA, "TGCCTCCT")
                {
                    ID = "3"
                },
                new Sequence(Alphabets.DNA, "ATCTTAGC")
                {
                    ID = "4"
                },
                new Sequence(Alphabets.DNA, "CTATCTTAG")
                {
                    ID = "5"
                },
                new Sequence(Alphabets.DNA, "CTTAGCG")
                {
                    ID = "6"
                },
                new Sequence(Alphabets.DNA, "GCCTCCTAT")
                {
                    ID = "7"
                },
                new Sequence(Alphabets.DNA, "TAGCGCGCTA")
                {
                    ID = "8"
                },
                new Sequence(Alphabets.DNA, "AGCGCGC")
                {
                    ID = "9"
                },
                new Sequence(Alphabets.DNA, "TTTTTT")
                {
                    ID = "10"
                },
                new Sequence(Alphabets.DNA, "TTTTTAAA")
                {
                    ID = "11"
                },
                new Sequence(Alphabets.DNA, "TAAAAA")
                {
                    ID = "12"
                },
                new Sequence(Alphabets.DNA, "TTTTAG")
                {
                    ID = "13"
                },
                new Sequence(Alphabets.DNA, "TTTAGC")
                {
                    ID = "14"
                },
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG")
                {
                    ID = "15"
                },
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();

            SetSequenceReads(sequences);
            CreateGraph();

            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);

            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            ContigMatePairs    overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist    = new DistanceCalculator(overlap);

            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();

            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }
示例#9
0
        public void ValidateBuildSequenceFromPath()
        {
            const int KmerLength = 7;
            ISequence sequence = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG");
            ISequence sequenceNew;
            IList<ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList();
            using (ContigGraph graph = new ContigGraph())
            {

                graph.BuildContigGraph(contigsSequence, KmerLength);
                List<Node> contigs = graph.Nodes.ToList();
                ScaffoldPath path = new ScaffoldPath();

                foreach (Node node in contigs.GetRange(0, 11))
                {
                    path.Add(new KeyValuePair<Node, Edge>(node, new Edge(true)));
                }
                sequenceNew = path.BuildSequenceFromPath(graph, KmerLength);
            }
            Assert.IsNotNull(sequenceNew);
            Assert.AreEqual((new string(sequenceNew.Select(a => (char)a).ToArray())), "GATTCAAGGGCTGGGGG");
        }
示例#10
0
        /// <summary>
        /// Add right extension of the nodes to queue.
        /// </summary>
        /// <param name="node">Current node.</param>
        /// <param name="search">Queue for BFS.</param>
        /// <param name="paths">List of paths.</param>
        /// <param name="familyTree">Nodes visited for construction of paths.</param>
        /// <param name="contigPairedReadMap">Contig and valid mate pair map.</param>
        private void RightExtension(
            KeyValuePair<Node, Edge> node,
            Queue<Paths> search,
            List<Paths> paths,
            ScaffoldPath familyTree,
            Dictionary<ISequence, IList<ValidMatePair>> contigPairedReadMap)
        {
            Paths childPath;
            if (node.Key.RightExtensionNodes.Count > 0)
            {
                foreach (KeyValuePair<Node, Edge> child in node.Key.RightExtensionNodes)
                {
                    childPath = new Paths();
                    childPath.CurrentNode = child;
                    if (familyTree == null)
                    {
                        childPath.FamilyTree.Add(node);
                    }
                    else
                    {
                        childPath.FamilyTree.AddRange(familyTree);
                        childPath.FamilyTree.Add(node);
                    }

                    childPath.NodeOrientation = true;
                    if (this.DistanceConstraint(childPath, contigPairedReadMap) &&
                        childPath.FamilyTree.Count < this.depth && 
                        !contigPairedReadMap.All(
                        t => childPath.FamilyTree.Any(k => t.Key == this.graph.GetNodeSequence(k.Key))))
                    {
                        search.Enqueue(childPath);
                    }
                    else
                    {
                       if (contigPairedReadMap.All(
                            t => childPath.FamilyTree.Any(k => t.Key == this.graph.GetNodeSequence(k.Key))))
                        {
                            paths.Add(childPath);
                        }
                    }
                }
            }
            else
            {
                childPath = new Paths();
                if (familyTree == null)
                {
                    childPath.FamilyTree.Add(node);
                }
                else
                {
                    childPath.FamilyTree.AddRange(familyTree);
                    childPath.FamilyTree.Add(node);
                }

               if (contigPairedReadMap.All(
                    t => childPath.FamilyTree.Any(k => t.Key == this.graph.GetNodeSequence(k.Key))))
                {
                    paths.Add(childPath);
                }
            }
        }