/// <summary> /// Gets the sequence for kmer associated with input node. /// Uses index and position information along with base sequence /// to construct sequence. /// There should be atleast one valid position in the node. /// Since all positions indicate the same kmer sequence, /// the position information from the first kmer is used /// to construct the sequence. /// </summary> /// <param name="node">Graph Node.</param> /// <returns>Sequence associated with input node.</returns> public ISequence GetNodeSequence(Node node) { if (node == null) { throw new ArgumentNullException("node"); } // Get sequence index and validate. int sequenceIndex = node.SequenceIndex; if (sequenceIndex < 0 || sequenceIndex >= this.baseSequences.Count) { throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerIndexOutOfRange); } // Get base sequence, position and validate. return this.baseSequences[sequenceIndex]; }
/// <summary> /// Check if input node is null. /// </summary> /// <param name="node">Input node.</param> private static void ValidateNode(Node node) { if (node == null) { throw new ArgumentNullException("node"); } }
/// <summary> /// Add node with given orientation to right extension edges. /// Not thread-safe. Use lock at caller if required. /// </summary> /// <param name="node">Node to add right-extension to.</param> /// <param name="isSameOrientation">Orientation of connecting edge.</param> public void AddRightEndExtension(Node node, bool isSameOrientation) { ValidateNode(node); Edge edge; if (this.rightEndExtensionNodes.TryGetValue(node, out edge)) { this.rightEndExtensionNodes[node].IsSameOrientation ^= isSameOrientation; } else { this.rightEndExtensionNodes[node] = new Edge(isSameOrientation); } }
/// <summary> /// Builds a contig graph from kmer graph using contig data information. /// Creates a graph node for each contig, computes adjacency /// for contig graph using edge information in kmer graph. /// Finally, all kmer nodes are deleted from the graph. /// </summary> /// <param name="contigs">List of contig data.</param> /// <param name="kmerLength">Kmer length.</param> public void BuildContigGraph(IList<ISequence> contigs, int kmerLength) { if (contigs == null) { throw new ArgumentNullException("contigs"); } if (kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive); } // Create contig nodes Node[] contigNodes = new Node[contigs.Count()]; Parallel.For(0, contigs.Count, (int ndx) => contigNodes[ndx] = new Node(contigs[ndx].Count, ndx)); GenerateContigAdjacency(contigs, kmerLength, contigNodes); // Update graph with new nodes this.baseSequences = new List<ISequence>(contigs); this.kmerNodes = new HashSet<Node>(contigNodes); }
/// <summary> /// Checks for and adds edges between contigs /// based on left, right kmer maps. /// </summary> /// <param name="contigNodes">Array of contig nodes.</param> /// <param name="leftKmerMap">Map of left k-mer to contig nodes.</param> /// <param name="rightKmerMap">Map of right k-mer to contig nodes.</param> private static void AddContigGraphEdges( Node[] contigNodes, Dictionary<ISequence, List<int>> leftKmerMap, Dictionary<ISequence, List<int>> rightKmerMap) { // Check and add left extensions. No locks used here since each iteration works with a different contigNode. Parallel.ForEach( leftKmerMap, leftKmer => { List<int> positions; if (rightKmerMap.TryGetValue(leftKmer.Key, out positions)) { foreach (int leftNodeIndex in leftKmer.Value) { foreach (int rightNodeIndex in positions) { contigNodes[leftNodeIndex].AddLeftEndExtension(contigNodes[rightNodeIndex], true); } } } if (leftKmerMap.TryGetValue(leftKmer.Key.GetReverseComplementedSequence(), out positions)) { foreach (int leftNodeIndex in leftKmer.Value) { foreach (int rightNodeIndex in positions) { contigNodes[leftNodeIndex].AddLeftEndExtension(contigNodes[rightNodeIndex], false); } } } }); // Check and add right extensions. No locks used here since each iteration works with a different contigNode. Parallel.ForEach( rightKmerMap, rightKmer => { List<int> positions; if (leftKmerMap.TryGetValue(rightKmer.Key, out positions)) { foreach (int rightNodeIndex in rightKmer.Value) { foreach (int leftNodeIndex in positions) { contigNodes[rightNodeIndex].AddRightEndExtension(contigNodes[leftNodeIndex], true); } } } if (rightKmerMap.TryGetValue(rightKmer.Key.GetReverseComplementedSequence(), out positions)) { foreach (int rightNodeIndex in rightKmer.Value) { foreach (int leftNodeIndex in positions) { contigNodes[rightNodeIndex].AddRightEndExtension(contigNodes[leftNodeIndex], false); } } } }); }
/// <summary> /// Generate adjacency information between contig nodes /// by computing overlapping regions between contig sequences. /// </summary> /// <param name="contigs">List of contig data.</param> /// <param name="kmerLength">Kmer length.</param> /// <param name="contigNodes">Array of contig nodes.</param> private static void GenerateContigAdjacency(IList<ISequence> contigs, long kmerLength, Node[] contigNodes) { // Create dictionaries that map (k-1) left and right substrings of contigs to contig indexes. Dictionary<ISequence, List<int>> leftKmerMap = new Dictionary<ISequence, List<int>>(new SequenceEqualityComparer()); Dictionary<ISequence, List<int>> rightKmerMap = new Dictionary<ISequence, List<int>>(new SequenceEqualityComparer()); Parallel.For( 0, contigs.Count, ndx => { ISequence contig = contigs[ndx]; List<int> contigIndexes; ISequence kmer; if (contig.Count < kmerLength) { throw new ArgumentException(Properties.Resource.KmerLengthIsTooLong); } // update left map kmer = contig.GetSubSequence(0, kmerLength - 1); lock (leftKmerMap) { if (!leftKmerMap.TryGetValue(kmer, out contigIndexes)) { contigIndexes = new List<int>(); leftKmerMap.Add(kmer, contigIndexes); } } lock (contigIndexes) { contigIndexes.Add(ndx); } // Update right map kmer = contig.GetSubSequence(contig.Count - (kmerLength - 1), kmerLength - 1); lock (rightKmerMap) { if (!rightKmerMap.TryGetValue(kmer, out contigIndexes)) { contigIndexes = new List<int>(); rightKmerMap.Add(kmer, contigIndexes); } } lock (contigIndexes) { contigIndexes.Add(ndx); } }); AddContigGraphEdges(contigNodes, leftKmerMap, rightKmerMap); }
/// <summary> /// Performs Breadth First Search. /// </summary> /// <param name="node">Start Node.</param> /// <param name="contigPairedReadMap">Map of all contigs having valid /// mate pairs with given node contig.</param> /// <returns>List of paths.</returns> private List<ScaffoldPath> TraverseGraph( Node node, Dictionary<ISequence, IList<ValidMatePair>> contigPairedReadMap) { Queue<Paths> search = new Queue<Paths>(); List<Paths> paths = new List<Paths>(); this.LeftExtension( new KeyValuePair<Node, Edge>(node, new Edge(false)), search, paths, null, contigPairedReadMap); this.RightExtension( new KeyValuePair<Node, Edge>(node, new Edge(true)), search, paths, null, contigPairedReadMap); Paths parentPath; while (search.Count != 0) { parentPath = search.Dequeue(); if (parentPath.NodeOrientation) { if (parentPath.CurrentNode.Value.IsSameOrientation) { this.RightExtension( parentPath.CurrentNode, search, paths, parentPath.FamilyTree, contigPairedReadMap); } else { this.LeftExtension( parentPath.CurrentNode, search, paths, parentPath.FamilyTree, contigPairedReadMap); } } else if (parentPath.CurrentNode.Value.IsSameOrientation) { this.LeftExtension( parentPath.CurrentNode, search, paths, parentPath.FamilyTree, contigPairedReadMap); } else { this.RightExtension( parentPath.CurrentNode, search, paths, parentPath.FamilyTree, contigPairedReadMap); } } return new List<ScaffoldPath>(paths.Select(t => t.FamilyTree)); }