/// <summary> /// Performs Breadth First Search to traverse through graph to generate scaffold paths. /// </summary> /// <param name="overlapGraph">Contig Overlap Graph.</param> /// <param name="contigPairedReadMaps">InterContig Distances.</param> /// <param name="lengthOfKmer">Length of Kmer.</param> /// <param name="searchDepth">Depth to which graph is searched.</param> /// <returns>List of paths/scaffold.</returns> public IList<ScaffoldPath> FindPaths( ContigGraph overlapGraph, ContigMatePairs contigPairedReadMaps, int lengthOfKmer, int searchDepth = 10) { if (overlapGraph == null) { throw new ArgumentNullException("deBruijnGraph"); } if (contigPairedReadMaps == null) { throw new ArgumentNullException("contigPairedReadMaps"); } if (lengthOfKmer <= 0) { throw new ArgumentException(Resource.KmerLength); } if (searchDepth <= 0) { throw new ArgumentException(Resource.Depth); } this.graph = overlapGraph; this.kmerLength = lengthOfKmer; this.depth = searchDepth; List<ScaffoldPath> scaffoldPaths = new List<ScaffoldPath>(); Parallel.ForEach( overlapGraph.Nodes, (Node node) => { Dictionary<ISequence, IList<ValidMatePair>> contigPairedReadMap; if (contigPairedReadMaps.TryGetValue(overlapGraph.GetNodeSequence(node), out contigPairedReadMap)) { List<ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap); lock (scaffoldPaths) { scaffoldPaths.AddRange(scaffoldPath); } } }); return scaffoldPaths; }
/// <summary> /// Generate sequences from list of contig nodes. /// </summary> /// <param name="contigGraph">Contig Overlap Graph.</param> /// <param name="paths">Scaffold paths.</param> /// <returns>List of sequences of scaffolds.</returns> protected IList<ISequence> GenerateScaffold( ContigGraph contigGraph, IList<ScaffoldPath> paths) { if (contigGraph == null) { throw new ArgumentNullException("contigGraph"); } if (paths == null) { throw new ArgumentNullException("paths"); } List<ISequence> scaffolds = paths.AsParallel().Select(t => t.BuildSequenceFromPath(contigGraph, this.kmerLength)).ToList(); IEnumerable<Node> visitedNodes = contigGraph.Nodes.AsParallel().Where(t => !t.IsMarked()); scaffolds.AddRange(visitedNodes.AsParallel().Select(t => contigGraph.GetNodeSequence(t))); contigGraph.Dispose(); return scaffolds; }
public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List<ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"}, new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"}, new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"}, new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"}, new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"}, new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"}, new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"}, new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"}, new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"}, new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"}, new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"}, new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"}, new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"}, new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"}, new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"}, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"}, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList<ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }
/// <summary> /// Validate scaffold paths for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateScaffoldPath(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string[] expectedScaffoldNodes = utilityObj.xmlUtil.GetTextValues(nodeName,Constants.ScaffoldNodes); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean); string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.DepthNode); // Get the input reads and build kmers using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); ContigGraph graph = new ContigGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null)); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(contigpairedReads); dist.CalculateDistance(); graph.BuildContigGraph(contigs.ToList(), this.KmerLength); // Validate ScaffoldPath using BFS. TracePath trace = new TracePath(); IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null)); ScaffoldPath scaffold = paths.First(); foreach (KeyValuePair<Node, Edge> kvp in scaffold) { ISequence seq = graph.GetNodeSequence(kvp.Key); string sequence = seq.ConvertToString(); string reversedSequence = seq.GetReverseComplementedSequence().ConvertToString(); Assert.IsTrue(expectedScaffoldNodes.Contains(sequence) || expectedScaffoldNodes.Contains(reversedSequence), "Failed to find " + sequence + ", or " + reversedSequence); } } ApplicationLog.WriteLine("PADENA P1 : FindPaths() validation for Padena step6:step6 completed successfully"); }