/// <summary> /// Distance calculator. /// </summary> /// <param name="contigPairedReads">Contig pair reads.</param> public DistanceCalculator(ContigMatePairs contigPairedReads) { if (contigPairedReads == null) { throw new ArgumentNullException("contigPairedReads"); } this.contigPairedReads = contigPairedReads; }
/// <summary> /// Performs Breadth First Search to traverse through graph to generate scaffold paths. /// </summary> /// <param name="overlapGraph">Contig Overlap Graph.</param> /// <param name="contigPairedReadMaps">InterContig Distances.</param> /// <param name="lengthOfKmer">Length of Kmer.</param> /// <param name="searchDepth">Depth to which graph is searched.</param> /// <returns>List of paths/scaffold.</returns> public IList <ScaffoldPath> FindPaths( ContigGraph overlapGraph, ContigMatePairs contigPairedReadMaps, int lengthOfKmer, int searchDepth = 10) { if (overlapGraph == null) { throw new ArgumentNullException("deBruijnGraph"); } if (contigPairedReadMaps == null) { throw new ArgumentNullException("contigPairedReadMaps"); } if (lengthOfKmer <= 0) { throw new ArgumentException(Resource.KmerLength); } if (searchDepth <= 0) { throw new ArgumentException(Resource.Depth); } this.graph = overlapGraph; this.kmerLength = lengthOfKmer; this.depth = searchDepth; List <ScaffoldPath> scaffoldPaths = new List <ScaffoldPath>(); Parallel.ForEach( overlapGraph.Nodes, (Node node) => { Dictionary <ISequence, IList <ValidMatePair> > contigPairedReadMap; if (contigPairedReadMaps.TryGetValue(overlapGraph.GetNodeSequence(node), out contigPairedReadMap)) { List <ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap); lock (scaffoldPaths) { scaffoldPaths.AddRange(scaffoldPath); } } }); return(scaffoldPaths); }
/// <summary> /// Calculate distance between contigs using paired reads. /// </summary> /// <param name="contigMatePairs">Contig Mate Pair map.</param> /// <returns>Number of contig-read pairs.</returns> protected int CalculateDistanceBetweenContigs(ContigMatePairs contigMatePairs) { if (contigMatePairs == null) { throw new ArgumentNullException("contigMatePairs"); } if (this.distanceCalculator == null) { this.distanceCalculator = new DistanceCalculator(contigMatePairs); contigMatePairs = this.distanceCalculator.CalculateDistance(); } else { contigMatePairs = this.distanceCalculator.CalculateDistance(); } // this dictionary is updated in this step. return(contigMatePairs.Count); }
/// <summary> /// Filter Paired Read Based on Orientation. /// </summary> /// <param name="matePairMap">Map between contigs using mate pair information.</param> /// <param name="redundancy">Number of mate pairs required to create a link between two contigs. /// Hierarchical Scaffolding With Bambus /// by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param> public ContigMatePairs FilterPairedReads(ContigMatePairs matePairMap, int redundancy = 2) { if (null == matePairMap) { throw new ArgumentNullException("matePairMap"); } if (redundancy < 0) { throw new ArgumentException(Bio.Algorithms.Assembly.Padena.Properties.Resource.NegativeRedundancy); } foreach (KeyValuePair <ISequence, Dictionary <ISequence, IList <ValidMatePair> > > matePair in matePairMap) { foreach (KeyValuePair <ISequence, IList <ValidMatePair> > validMatePair in matePair.Value) { if (matePair.Key != validMatePair.Key) { Dictionary <ISequence, IList <ValidMatePair> > validMatePairs; if (matePairMap.TryGetValue(validMatePair.Key, out validMatePairs)) { IList <ValidMatePair> pair; if (validMatePairs.TryGetValue(matePair.Key, out pair)) { OrientationFilter(pair, validMatePair.Value, redundancy); } else { if (validMatePair.Value.Count < redundancy) { validMatePair.Value.Clear(); } } } else { if (validMatePair.Value.Count < redundancy) { validMatePair.Value.Clear(); } } } else { validMatePair.Value.Clear(); } } } ContigMatePairs newMatePairMap = new ContigMatePairs(); Parallel.ForEach( matePairMap, (KeyValuePair <ISequence, Dictionary <ISequence, IList <ValidMatePair> > > matePair) => { Dictionary <ISequence, IList <ValidMatePair> > map = new Dictionary <ISequence, IList <ValidMatePair> >(); foreach (KeyValuePair <ISequence, IList <ValidMatePair> > validMatePair in matePair.Value) { if (validMatePair.Value.Count > 0) { map.Add(validMatePair.Key, validMatePair.Value); } } if (map.Count > 0) { lock (newMatePairMap) { newMatePairMap.Add(matePair.Key, map); } } }); return(newMatePairMap); }
/// <summary> /// Performs Breadth First Search in contig overlap graph. /// </summary> /// <param name="contigGraph">Contig Graph.</param> /// <param name="contigMatePairs">Contig Mate Pair map.</param> /// <returns>List of Scaffold Paths.</returns> protected IList <ScaffoldPath> TracePath(ContigGraph contigGraph, ContigMatePairs contigMatePairs) { return(this.tracePath.FindPaths(contigGraph, contigMatePairs, this.kmerLength, this.depthField)); }
/// <summary> /// Filter reads based on orientation of contigs. /// </summary> /// <param name="contigMatePairs">Contig Mate Pair map.</param> /// <returns>Returns Contig Mate Pair map.</returns> protected ContigMatePairs FilterReadsBasedOnOrientation(ContigMatePairs contigMatePairs) { return(this.pairedReadFilter.FilterPairedReads(contigMatePairs, this.redundancyField)); }
/// <summary> /// Builds scaffolds from list of reads and contigs. /// </summary> /// <param name="reads">List of reads.</param> /// <param name="contigs">List of contigs.</param> /// <param name="lengthofKmer">Kmer Length.</param> /// <param name="depth">Depth for graph traversal.</param> /// <param name="redundancy">Number of mate pairs required to create a link between two contigs. /// Hierarchical Scaffolding With Bambus /// by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param> /// <returns>List of scaffold sequences.</returns> public IList <ISequence> BuildScaffold( IEnumerable <ISequence> reads, IList <ISequence> contigs, int lengthofKmer, int depth = 10, int redundancy = 2) { if (contigs == null) { throw new ArgumentNullException("contigs"); } if (null == reads) { throw new ArgumentNullException("reads"); } if (lengthofKmer <= 0) { throw new ArgumentException(Properties.Resource.KmerLength); } if (depth <= 0) { throw new ArgumentException(Resource.Depth); } if (redundancy < 0) { throw new ArgumentException(Resource.NegativeRedundancy); } this.depthField = depth; this.redundancyField = redundancy; this.kmerLength = lengthofKmer; IEnumerable <ISequence> readSeqs = ValidateReads(reads); //Step1: Generate contig overlap graph. IList <ISequence> contigsList = new List <ISequence>(contigs); ContigGraph contigGraph = GenerateContigOverlapGraph(contigsList); IEnumerable <Node> nodes = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0); foreach (Node node in nodes) { contigsList.Remove(contigGraph.GetNodeSequence(node)); } // Step2: Map Reads to contigs. ReadContigMap readContigMaps = ReadContigMap(contigsList, readSeqs); contigsList = null; // Step3: Generate Contig Mate Pair Map. ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMaps, readSeqs); readContigMaps = null; // Step4: Filter Paired Reads. contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs); // Step5: Distance Calculation. CalculateDistanceBetweenContigs(contigMatePairs); // Step6: Trace Scaffold Paths. IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs); contigMatePairs = null; // Step7: Assemble paths. PathPurger(paths); // Step8: Generate sequence of scaffolds. return(GenerateScaffold(contigGraph, paths)); }