/// <summary> /// Step 4: Remove redundant paths from graph. /// </summary> protected void RemoveRedundancy() { if (RedundantPathsPurger != null) { DeBruijnPathList redundantNodes; do { redundantNodes = RedundantPathsPurger.DetectErroneousNodes(Graph); RedundantPathsPurger.RemoveErroneousNodes(Graph, redundantNodes); }while (redundantNodes.Paths.Count > 0); } }
/// <summary> /// Initializes the above defined fields. For each step in assembly /// we use a separate class for implementation. This method assigns /// these variables to classes with desired implementation. /// </summary> protected void InitializeDefaultGraphModifiers() { // Assign uninitialized fields to default values if (DanglingLinksPurger == null) { DanglingLinksPurger = new DanglingLinksPurger(); } if (RedundantPathsPurger == null) { RedundantPathsPurger = new RedundantPathsPurger(RedundantPathLengthThreshold); } if (LowCoverageContigPurger == null) { LowCoverageContigPurger = new SimplePathContigBuilder(); } }
public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List<ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"}, new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"}, new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"}, new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"}, new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"}, new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"}, new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"}, new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"}, new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"}, new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"}, new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"}, new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"}, new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"}, new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"}, new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"}, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"}, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList<ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }
/// <summary> /// Creates RedundantPathPurger instance by passing pathlength and count. Detect /// redundant error nodes and remove these nodes from the graph. Validate the graph. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="isMicroOrganism">Is micro organism</param> internal void ValidateRedundantPathPurgerCtor(string nodeName, bool isMicroOrganism) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedNodesCountAfterDangling); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Validate the graph this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; this.DanglingLinksPurger = new DanglingLinksPurger(this.KmerLength); this.UnDangleGraph(); // Create RedundantPathPurger instance, detect redundant nodes and remove error nodes RedundantPathsPurger redundantPathPurger = new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1); DeBruijnPathList redundantnodelist = redundantPathPurger.DetectErroneousNodes(graph); redundantPathPurger.RemoveErroneousNodes(graph, redundantnodelist); if (isMicroOrganism) Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count()); else ValidateGraph(graph, nodeName); } ApplicationLog.WriteLine(@"Padena P1 :RedundantPathsPurger ctor and methods validation for Padena step4 completed successfully"); }
/// <summary> /// Creates RedundantPathPurger instance by passing pathlength and count. Detect /// redundant error nodes and remove these nodes from the graph. Validate the graph. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateRedundantPathPurgerCtor(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); sequenceReads = parser.Parse().ToList(); parser.Close(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Validate the graph this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); // Create RedundantPathPurger instance, detect redundant nodes and remove error nodes RedundantPathsPurger redundantPathPurger = new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1); DeBruijnPathList redundantnodelist = redundantPathPurger.DetectErroneousNodes(this.Graph); redundantPathPurger.RemoveErroneousNodes(this.Graph, redundantnodelist); ValidateGraph(this.Graph, nodeName); ApplicationLog.WriteLine( @"Padena BVT :RedundantPathsPurger ctor and methods validation for Padena step4 completed successfully"); }