private bool Compare(ScaffoldPath path, IList <DeBruijnNode> contig) { if (path.Count == contig.Count) { for (int index = 0; index < contig.Count; index++) { if (path[index].Key != contig[index]) { return(false); } } return(true); } else { return(false); } }
/// <summary> /// Remove containing paths. /// </summary> /// <param name="scaffoldPath">Current path.</param> /// <param name="path">Path to be compared with.</param> /// <returns>Containing paths or not.</returns> private static bool RemoveContainingPaths( ScaffoldPath scaffoldPath, ScaffoldPath path) { if (scaffoldPath.Count >= path.Count) { if (path.All(t => scaffoldPath.Where(k => k.Key == t.Key).ToList().Count > 0)) { return true; } return false; } if (scaffoldPath.All(t => path.Where(k => k.Key == t.Key).ToList().Count > 0)) { scaffoldPath.Clear(); scaffoldPath.AddRange(path); return true; } return false; }
public void TracePathTestWithPalindromicContig() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; List <ISequence> sequences = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "ATGCCTC"); seq.DisplayID = ">10.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CCTCCTAT"); seq.DisplayID = "1"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TCCTATC"); seq.DisplayID = "2"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TGCCTCCT"); seq.DisplayID = "3"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "ATCTTAGC"); seq.DisplayID = "4"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CTATCTTAG"); seq.DisplayID = "5"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CTTAGCG"); seq.DisplayID = "6"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "GCCTCCTAT"); seq.DisplayID = ">8.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TAGCGCGCTA"); seq.DisplayID = ">8.y1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "AGCGCGC"); seq.DisplayID = ">9.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTT"); seq.DisplayID = "7"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTAAA"); seq.DisplayID = "8"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TAAAAA"); seq.DisplayID = "9"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTAG"); seq.DisplayID = "10"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTAGC"); seq.DisplayID = "11"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG"); seq.DisplayID = "12"; sequences.Add(seq); KmerLength = kmerLength; SequenceReads.Clear(); AddSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLength); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); ContigMatePairs overlap; OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(); dist.CalculateDistance(overlap); Graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList <ScaffoldPath> paths = path.FindPaths(Graph, overlap, kmerLength, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); DeBruijnGraph graph = Graph; Assert.IsTrue(graph.GetNodeSequence(scaffold[0].Key).ToString().Equals("ATGCCTCCTATCTTAGC")); Assert.IsTrue(graph.GetNodeSequence(scaffold[1].Key).ToString().Equals("TTAGCGCG")); Assert.IsTrue(graph.GetNodeSequence(scaffold[2].Key).ToString().Equals("GCGCGC")); }
public void PathPurger1() { const int kmerLength = 7; List <ISequence> sequences = new List <ISequence>(); sequences.Add(new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG")); this.KmerLength = kmerLength; this.AddSequenceReads(sequences); this.CreateGraph(); List <DeBruijnNode> contigs = this.Graph.Nodes.ToList(); IList <ScaffoldPath> paths = new List <ScaffoldPath>(); ScaffoldPath path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(2, 5)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(3, 5)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(6, 5)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(0, 11)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(7, 4)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(11, 0)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(2, 9)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (DeBruijnNode node in contigs.GetRange(1, 10)) { path.Add(new KeyValuePair <DeBruijnNode, DeBruijnEdge>(node, null)); } paths.Add(path); PathPurger assembler = new PathPurger(); assembler.PurgePath(paths); Assert.AreEqual(paths.Count, 1); Assert.IsTrue(Compare(paths.First(), contigs)); }
public void PathPurger1() { const int KmerLength = 7; ISequence sequence = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG"); IList <ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigsSequence, KmerLength); List <Node> contigs = graph.Nodes.ToList(); IList <ScaffoldPath> paths = new List <ScaffoldPath>(); ScaffoldPath path = new ScaffoldPath(); foreach (Node node in contigs) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(2, 5)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(3, 5)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(6, 5)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(0, 11)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(7, 4)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(11, 0)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(2, 9)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(1, 10)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); PathPurger assembler = new PathPurger(); assembler.PurgePath(paths); Assert.AreEqual(paths.Count, 1); Assert.IsTrue(Compare(paths.First(), contigs)); }
/// <summary> /// Removes Overlapping paths by generating pairwise overlaps between paths. /// </summary> /// <param name="scaffoldPath">Current path.</param> /// <param name="path">Path to be compared with.</param> /// <returns>Overlapping paths or not.</returns> private static bool RemoveOverlappingPaths( ScaffoldPath scaffoldPath, ScaffoldPath path) { // Generate Overlap Matrix [Similar To Pairwise Overlap aligner] bool[,] matrix = new bool[scaffoldPath.Count, path.Count]; for (int index = 0; index < scaffoldPath.Count; index++) { for (int index1 = 0; index1 < path.Count; index1++) { matrix.SetValue(scaffoldPath[index].Key == path[index1].Key, index, index1); } } // Search in last row for a match. int startPosOfRow = -1; for (int index = scaffoldPath.Count - 1; index >= 0; index--) { if ((bool)matrix.GetValue(index, path.Count - 1)) { int index1 = 1; while (path.Count - 1 - index1 >= 0 && index - index1 >= 0) { if ((bool)matrix.GetValue(index - index1, path.Count - 1 - index1)) { index1++; } else { break; } } if (path.Count - 1 - index1 <= 0 || index - index1 <= 0) { startPosOfRow = index; break; } } } // Search in last column for match. int startPosOfCol = -1; for (int index = path.Count - 2; index >= 0; index--) { if ((bool)matrix.GetValue(scaffoldPath.Count - 1, index)) { int index1 = 1; while (scaffoldPath.Count - 1 - index1 > 0 && index - index1 > 0) { if ((bool)matrix.GetValue(scaffoldPath.Count - 1 - index1, index - index1)) { index1++; } else { break; } } if (scaffoldPath.Count - 1 - index1 <= 0 || index - index1 <= 0) { startPosOfCol = index; break; } } } if (startPosOfCol != -1 || startPosOfRow != -1) { if (startPosOfRow >= startPosOfCol) { StitchPath(scaffoldPath, path, startPosOfRow, path.Count - 1); return true; } StitchPath(scaffoldPath, path, scaffoldPath.Count - 1, startPosOfCol); return true; } return false; }
/// <summary> /// Search for containing and overlapping paths. /// </summary> /// <param name="scaffoldPath">Current Path.</param> /// <param name="isConsumed">Path status.</param> /// <returns>Update list or not.</returns> private bool SearchContainingAndOverlappingPaths( ScaffoldPath scaffoldPath, bool[] isConsumed) { bool isUpdated = false; for (int index = 0; index < this.internalScaffoldPaths.Count; index++) { if (!isConsumed[index] && scaffoldPath != this.internalScaffoldPaths[index]) { if (RemoveContainingPaths(scaffoldPath, this.internalScaffoldPaths[index])) { isConsumed[index] = true; isUpdated = true; } else { if (RemoveOverlappingPaths(scaffoldPath, this.internalScaffoldPaths[index])) { isConsumed[index] = true; isUpdated = true; } } } } return isUpdated; }
public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List <ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") { ID = "0" }, new Sequence(Alphabets.DNA, "CCTCCTAT") { ID = "1" }, new Sequence(Alphabets.DNA, "TCCTATC") { ID = "2" }, new Sequence(Alphabets.DNA, "TGCCTCCT") { ID = "3" }, new Sequence(Alphabets.DNA, "ATCTTAGC") { ID = "4" }, new Sequence(Alphabets.DNA, "CTATCTTAG") { ID = "5" }, new Sequence(Alphabets.DNA, "CTTAGCG") { ID = "6" }, new Sequence(Alphabets.DNA, "GCCTCCTAT") { ID = "7" }, new Sequence(Alphabets.DNA, "TAGCGCGCTA") { ID = "8" }, new Sequence(Alphabets.DNA, "AGCGCGC") { ID = "9" }, new Sequence(Alphabets.DNA, "TTTTTT") { ID = "10" }, new Sequence(Alphabets.DNA, "TTTTTAAA") { ID = "11" }, new Sequence(Alphabets.DNA, "TAAAAA") { ID = "12" }, new Sequence(Alphabets.DNA, "TTTTAG") { ID = "13" }, new Sequence(Alphabets.DNA, "TTTAGC") { ID = "14" }, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") { ID = "15" }, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }
public void ValidateBuildSequenceFromPath() { const int KmerLength = 7; ISequence sequence = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG"); ISequence sequenceNew; IList<ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList(); using (ContigGraph graph = new ContigGraph()) { graph.BuildContigGraph(contigsSequence, KmerLength); List<Node> contigs = graph.Nodes.ToList(); ScaffoldPath path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(0, 11)) { path.Add(new KeyValuePair<Node, Edge>(node, new Edge(true))); } sequenceNew = path.BuildSequenceFromPath(graph, KmerLength); } Assert.IsNotNull(sequenceNew); Assert.AreEqual((new string(sequenceNew.Select(a => (char)a).ToArray())), "GATTCAAGGGCTGGGGG"); }
/// <summary> /// Add right extension of the nodes to queue. /// </summary> /// <param name="node">Current node.</param> /// <param name="search">Queue for BFS.</param> /// <param name="paths">List of paths.</param> /// <param name="familyTree">Nodes visited for construction of paths.</param> /// <param name="contigPairedReadMap">Contig and valid mate pair map.</param> private void RightExtension( KeyValuePair<Node, Edge> node, Queue<Paths> search, List<Paths> paths, ScaffoldPath familyTree, Dictionary<ISequence, IList<ValidMatePair>> contigPairedReadMap) { Paths childPath; if (node.Key.RightExtensionNodes.Count > 0) { foreach (KeyValuePair<Node, Edge> child in node.Key.RightExtensionNodes) { childPath = new Paths(); childPath.CurrentNode = child; if (familyTree == null) { childPath.FamilyTree.Add(node); } else { childPath.FamilyTree.AddRange(familyTree); childPath.FamilyTree.Add(node); } childPath.NodeOrientation = true; if (this.DistanceConstraint(childPath, contigPairedReadMap) && childPath.FamilyTree.Count < this.depth && !contigPairedReadMap.All( t => childPath.FamilyTree.Any(k => t.Key == this.graph.GetNodeSequence(k.Key)))) { search.Enqueue(childPath); } else { if (contigPairedReadMap.All( t => childPath.FamilyTree.Any(k => t.Key == this.graph.GetNodeSequence(k.Key)))) { paths.Add(childPath); } } } } else { childPath = new Paths(); if (familyTree == null) { childPath.FamilyTree.Add(node); } else { childPath.FamilyTree.AddRange(familyTree); childPath.FamilyTree.Add(node); } if (contigPairedReadMap.All( t => childPath.FamilyTree.Any(k => t.Key == this.graph.GetNodeSequence(k.Key)))) { paths.Add(childPath); } } }