/// <summary> /// Map paired reads to contigs using FASTA sequence header. /// </summary> /// <param name="readContigMap">Map between reads and contigs</param> /// <param name="reads">Sequences of reads.</param> /// <returns>Contig Mate Pair map.</returns> protected ContigMatePairs MapPairedReadsToContigs(ReadContigMap readContigMap, IList <ISequence> reads) { ContigMatePairs contigMatePairs = new ContigMatePairs(); contigMatePairs = _mapPairedReads.MapContigToMatePairs(reads, readContigMap); return(contigMatePairs); }
/// <summary> /// Calculates distances between contigs. /// </summary> /// <param name="contigPairedReads">Input Contigs and mate pairs mappping.</param> public void CalculateDistance(ContigMatePairs contigPairedReads) { Parallel.ForEach(contigPairedReads, (KeyValuePair <ISequence, Dictionary <ISequence, IList <ValidMatePair> > > contigPairedRead) => { CalculateInterContigDistance(contigPairedRead, contigPairedRead.Key.Count); }); }
/// <summary> /// Calculate distance between contigs using paired reads. /// </summary> /// <param name="contigMatePairs">Contig Mate Pair map.</param> /// <returns>Number of contig-read pairs</returns> protected int CalculateDistanceBetweenContigs(ContigMatePairs contigMatePairs) { if (contigMatePairs == null) { throw new ArgumentNullException("contigMatePairs"); } _distanceCalculator.CalculateDistance(contigMatePairs); // this dictionary is updated in this step return(contigMatePairs.Count); }
/// <summary> /// Performs Breadth First Search to traverse through graph to generate scaffold paths. /// </summary> /// <param name="graph">Contig Overlap Graph.</param> /// <param name="contigPairedReadMaps">InterContig Distances.</param> /// <param name="kmerLength">Length of Kmer</param> /// <param name="depth">Depth to which graph is searched.</param> /// <returns>List of paths/scaffold</returns> public IList <ScaffoldPath> FindPaths( DeBruijnGraph graph, ContigMatePairs contigPairedReadMaps, int kmerLength, int depth = 10) { if (graph == null) { throw new ArgumentNullException("graph"); } if (contigPairedReadMaps == null) { throw new ArgumentNullException("contigPairedReadMaps"); } if (kmerLength <= 0) { throw new ArgumentException(Resource.KmerLength); } if (depth <= 0) { throw new ArgumentException(Resource.Depth); } _graph = graph; _kmerLength = kmerLength; _depth = depth; List <ScaffoldPath> scaffoldPaths = new List <ScaffoldPath>(); Parallel.ForEach(_graph.Nodes, (DeBruijnNode node) => { Dictionary <ISequence, IList <ValidMatePair> > contigPairedReadMap; if (contigPairedReadMaps.TryGetValue(graph.GetNodeSequence(node), out contigPairedReadMap)) { List <ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap); lock (scaffoldPaths) { scaffoldPaths.AddRange(scaffoldPath); } } }); return(scaffoldPaths); }
/// <summary> /// Filter Paired Read Based on Orientation. /// </summary> /// <param name="matePairMap">Map between contigs using mate pair information.</param> /// <param name="redundancy">Number of mate pairs required to create a link between two contigs. /// Hierarchical Scaffolding With Bambus /// by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param> public ContigMatePairs FilterPairedReads(ContigMatePairs matePairMap, int redundancy = 2) { if (null == matePairMap) { throw new ArgumentNullException("matePairMap"); } if (redundancy < 0) { throw new ArgumentException(Resource.NegativeRedundancy); } foreach (KeyValuePair <ISequence, Dictionary <ISequence, IList <ValidMatePair> > > matePair in matePairMap) { foreach (KeyValuePair <ISequence, IList <ValidMatePair> > validMatePair in matePair.Value) { if (matePair.Key != validMatePair.Key) { Dictionary <ISequence, IList <ValidMatePair> > validMatePairs; if (matePairMap.TryGetValue(validMatePair.Key, out validMatePairs)) { IList <ValidMatePair> pair; if (validMatePairs.TryGetValue(matePair.Key, out pair)) { OrientationFilter(pair, validMatePair.Value, redundancy); } else { if (validMatePair.Value.Count < redundancy) { validMatePair.Value.Clear(); } } } else { if (validMatePair.Value.Count < redundancy) { validMatePair.Value.Clear(); } } } else { validMatePair.Value.Clear(); } } } ContigMatePairs newMatePairMap = new ContigMatePairs(); Parallel.ForEach(matePairMap, (KeyValuePair <ISequence, Dictionary <ISequence, IList <ValidMatePair> > > matePair) => { Dictionary <ISequence, IList <ValidMatePair> > map = new Dictionary <ISequence, IList <ValidMatePair> >(); foreach (KeyValuePair <ISequence, IList <ValidMatePair> > validMatePair in matePair.Value) { if (validMatePair.Value.Count > 0) { map.Add(validMatePair.Key, validMatePair.Value); } } if (map.Count > 0) { lock (newMatePairMap) { newMatePairMap.Add(matePair.Key, map); } } }); return(newMatePairMap); }
/// <summary> /// Performs Breadth First Search in contig overlap graph. /// </summary> /// <param name="contigGraph">Contig Graph.</param> /// <param name="contigMatePairs">Contig Mate Pair map.</param> /// <returns>List of Scaffold Paths</returns> protected IList <ScaffoldPath> TracePath(DeBruijnGraph contigGraph, ContigMatePairs contigMatePairs) { return(_tracePath.FindPaths(contigGraph, contigMatePairs, _kmerLength, _depth)); }
/// <summary> /// Filter reads based on orientation of contigs. /// </summary> /// <param name="contigMatePairs">Contig Mate Pair map.</param> /// <returns>Contig Mate Pair map.</returns> protected ContigMatePairs FilterReadsBasedOnOrientation(ContigMatePairs contigMatePairs) { return(_pairedReadFilter.FilterPairedReads(contigMatePairs, _redundancy)); }
/// <summary> /// Builds scaffolds from list of reads and contigs /// </summary> /// <param name="reads">List of reads</param> /// <param name="contigs">List of contigs</param> /// <param name="kmerLength">Kmer Length</param> /// <param name="depth">Depth for graph traversal</param> /// <param name="redundancy">Number of mate pairs required to create a link between two contigs. /// Hierarchical Scaffolding With Bambus /// by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param> /// <returns>List of scaffold sequences</returns> public IList <ISequence> BuildScaffold( IList <ISequence> reads, IList <ISequence> contigs, int kmerLength, int depth = 10, int redundancy = 2) { if (contigs == null) { throw new ArgumentNullException("contigs"); } if (null == reads) { throw new ArgumentNullException("reads"); } if (kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLength); } if (depth <= 0) { throw new ArgumentException(Resource.Depth); } if (redundancy < 0) { throw new ArgumentException(Resource.NegativeRedundancy); } _depth = depth; _redundancy = redundancy; _kmerLength = kmerLength; IList <ISequence> readSeqs = reads.AsParallel().Where(s => s.All <ISequenceItem>(c => !c.IsAmbiguous && !c.IsGap)).ToList(); //Step1: Generate contig overlap graph. DeBruijnGraph contigGraph = GenerateContigOverlapGraph(contigs); IEnumerable <DeBruijnNode> nodes = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0); foreach (DeBruijnNode node in nodes) { contigs.Remove(contigGraph.GetNodeSequence(node)); } // Step2: Map Reads to contigs. ReadContigMap readContigMap = ReadContigMap(contigs, readSeqs); contigs = null; // Step3: Generate Contig Mate Pair Map. ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMap, readSeqs); readContigMap = null; // Step4: Filter Paired Reads. contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs); // Step5: Distance Calculation. CalculateDistanceBetweenContigs(contigMatePairs); // Step6: Trace Scaffold Paths. IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs); contigMatePairs = null; // Step7: Assemble paths. PathPurger(paths); // Step8: Generate sequence of scaffolds. return(GenerateScaffold(contigGraph, paths)); }