/// <summary> /// Step 3: Remove dangling links from graph. /// </summary> protected void UnDangleGraph() { if (DanglingLinksPurger != null && DanglingLinksThreshold > 0) { DeBruijnPathList danglingNodes; // Observe lengths of dangling links in the graph // This is an optimization - instead of incrementing threshold by 1 and // running the purger iteratively, we first determine the lengths of the // danglings links found in the graph and run purger only for those lengths. DanglingLinksPurger.LengthThreshold = DanglingLinksThreshold - 1; IEnumerable <int> danglingLengths; IGraphEndsEroder graphEndsEroder = DanglingLinksPurger as IGraphEndsEroder; if (graphEndsEroder != null && AllowErosion) { // If eroder is implemented, while getting lengths of dangling links, // it also erodes the low coverage ends, this marks any node for deletion below a threshold. // TODO: Verify that this does enumerate all dangling ends, the concern is that if a dangling end of length 7 and 2 // arrive at a node which itself would be of dangling node of length 2 without these "dangling ends" then a dangling end of 9 // (which it would be without either the 7 or 2 end) might not be reported. danglingLengths = graphEndsEroder.ErodeGraphEnds(Graph, ErosionThreshold); } else { // Perform dangling purger at all incremental values till dangleThreshold. danglingLengths = Enumerable.Range(1, DanglingLinksThreshold - 1); } // Erosion is to be only once. Reset erode threshold to -1. ErosionThreshold = -1; // Start removing dangling links foreach (int threshold in danglingLengths) { if (Graph.NodeCount >= threshold) { DanglingLinksPurger.LengthThreshold = threshold; danglingNodes = DanglingLinksPurger.DetectErroneousNodes(Graph); DanglingLinksPurger.RemoveErroneousNodes(Graph, danglingNodes); } } // Removing dangling links can in turn create more dangling links // In order to remove all links within threshold, we therefore run // purger at threshold length until there is no more change in graph. do { danglingNodes = null; if (Graph.NodeCount >= DanglingLinksThreshold) { DanglingLinksPurger.LengthThreshold = DanglingLinksThreshold; danglingNodes = DanglingLinksPurger.DetectErroneousNodes(Graph); DanglingLinksPurger.RemoveErroneousNodes(Graph, danglingNodes); } }while (danglingNodes != null && danglingNodes.Paths.Count > 0); } }
/// <summary> /// Initializes the above defined fields. For each step in assembly /// we use a separate class for implementation. This method assigns /// these variables to classes with desired implementation. /// </summary> protected void InitializeDefaultGraphModifiers() { // Assign uninitialized fields to default values if (DanglingLinksPurger == null) { DanglingLinksPurger = new DanglingLinksPurger(); } if (RedundantPathsPurger == null) { RedundantPathsPurger = new RedundantPathsPurger(RedundantPathLengthThreshold); } if (LowCoverageContigPurger == null) { LowCoverageContigPurger = new SimplePathContigBuilder(); } }
public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List<ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"}, new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"}, new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"}, new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"}, new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"}, new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"}, new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"}, new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"}, new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"}, new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"}, new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"}, new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"}, new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"}, new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"}, new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"}, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"}, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList<ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }
/// <summary> /// Validate RemoveErrorNodes() method is removing dangling nodes from the graph /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidatePadenaRemoveErrorNodes(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // and remove the dangling links from graph in step3 // Validate the graph this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; // Find the dangling nodes and remove the dangling node DanglingLinksPurger danglingLinksPurger = new DanglingLinksPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1); DeBruijnPathList danglingnodes = danglingLinksPurger.DetectErroneousNodes(graph); danglingLinksPurger.RemoveErroneousNodes(graph, danglingnodes); Assert.IsFalse(graph.GetNodes().Contains(danglingnodes.Paths[0].PathNodes[0])); } ApplicationLog.WriteLine(@"Padena P1 :DeBruijnGraph.RemoveErrorNodes() validation for Padena step3 completed successfully"); }
/// <summary> /// Validate the Padena DetectErrorNodes() method is /// returning dangling nodes as expected /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidatePadenaDetectErrorNodes(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string danglingSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DangleNodeSequenceNode); string[] expectedDanglings = danglingSequence.Split(','); // Get the input reads and build kmers using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // and remove the dangling links from graph in step3 // Validate the graph this.KmerLength = int.Parse(kmerLength, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); // Find the dangling node DanglingLinksPurger danglingLinksPurger = new DanglingLinksPurger(int.Parse(kmerLength, null) + 1); DeBruijnPathList danglingnodes = danglingLinksPurger.DetectErroneousNodes(this.Graph); foreach (DeBruijnPath dbnodes in danglingnodes.Paths) { foreach (DeBruijnNode node in dbnodes.PathNodes) { Assert.IsTrue(expectedDanglings.Contains(Graph.GetNodeSequence(node).ToString())); } } } ApplicationLog.WriteLine( @"Padena BVT :DeBruijnGraph.DetectErrorNodes() validation for Padena step3 completed successfully"); }