/// <summary> /// Map paired reads to contigs using FASTA sequence header. /// </summary> /// <param name="readContigMap">Map between reads and contigs</param> /// <param name="reads">Sequences of reads.</param> /// <returns>Contig Mate Pair map.</returns> protected ContigMatePairs MapPairedReadsToContigs(ReadContigMap readContigMap, IList <ISequence> reads) { ContigMatePairs contigMatePairs = new ContigMatePairs(); contigMatePairs = _mapPairedReads.MapContigToMatePairs(reads, readContigMap); return(contigMatePairs); }
/// <summary> /// Public method mapping Reads to Contigs. /// </summary> /// <param name="contigs">List of sequences of contigs.</param> /// <param name="reads">List of input reads.</param> /// <param name="kmerLength">Length of kmer.</param> /// <returns>Contig Read Map.</returns> public ReadContigMap Map(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength); ReadContigMap maps = new ReadContigMap(); using (ThreadLocal <char[]> rcBuilder = new ThreadLocal <char[]>(() => new char[kmerLength])) { Parallel.ForEach(reads, (ISequence readSequence) => { IEnumerable <string> kmers = SequenceToKmerBuilder.GetKmerStrings(readSequence, kmerLength); ReadIndex read = new ReadIndex(readSequence); IList <KmerIndexer> positions; foreach (string kmer in kmers) { if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplement(rcBuilder.Value), out positions)) { read.ContigReadMatchIndexes.Add(positions); } } IList <Task <IList <ReadMap> > > tasks = new List <Task <IList <ReadMap> > >(); //Stores information about contigs for which tasks has been generated. IList <int> visitedContigs = new List <int>(); //Creates Task for every read in nodes for a given contig. for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index]) { int contigIndex = kmer.SequenceIndex; if (!visitedContigs.Contains(contigIndex)) { visitedContigs.Add(contigIndex); tasks.Add(Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, read.ContigReadMatchIndexes, contigIndex, read.ReadSequence.Count, kmerLength), TaskCreationOptions.AttachedToParent)); } } } Dictionary <ISequence, IList <ReadMap> > overlapMaps = new Dictionary <ISequence, IList <ReadMap> >(); for (int index = 0; index < visitedContigs.Count; index++) { overlapMaps.Add(contigs[visitedContigs[index]], tasks[index].Result); } lock (maps) { if (!maps.ContainsKey(read.ReadSequence.DisplayID)) { maps.Add(read.ReadSequence.DisplayID, overlapMaps); } else { throw new ArgumentException( string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.DisplayID)); } } }); } return(maps); }
/// <summary> /// Builds scaffolds from list of reads and contigs /// </summary> /// <param name="reads">List of reads</param> /// <param name="contigs">List of contigs</param> /// <param name="kmerLength">Kmer Length</param> /// <param name="depth">Depth for graph traversal</param> /// <param name="redundancy">Number of mate pairs required to create a link between two contigs. /// Hierarchical Scaffolding With Bambus /// by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param> /// <returns>List of scaffold sequences</returns> public IList <ISequence> BuildScaffold( IList <ISequence> reads, IList <ISequence> contigs, int kmerLength, int depth = 10, int redundancy = 2) { if (contigs == null) { throw new ArgumentNullException("contigs"); } if (null == reads) { throw new ArgumentNullException("reads"); } if (kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLength); } if (depth <= 0) { throw new ArgumentException(Resource.Depth); } if (redundancy < 0) { throw new ArgumentException(Resource.NegativeRedundancy); } _depth = depth; _redundancy = redundancy; _kmerLength = kmerLength; IList <ISequence> readSeqs = reads.AsParallel().Where(s => s.All <ISequenceItem>(c => !c.IsAmbiguous && !c.IsGap)).ToList(); //Step1: Generate contig overlap graph. DeBruijnGraph contigGraph = GenerateContigOverlapGraph(contigs); IEnumerable <DeBruijnNode> nodes = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0); foreach (DeBruijnNode node in nodes) { contigs.Remove(contigGraph.GetNodeSequence(node)); } // Step2: Map Reads to contigs. ReadContigMap readContigMap = ReadContigMap(contigs, readSeqs); contigs = null; // Step3: Generate Contig Mate Pair Map. ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMap, readSeqs); readContigMap = null; // Step4: Filter Paired Reads. contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs); // Step5: Distance Calculation. CalculateDistanceBetweenContigs(contigMatePairs); // Step6: Trace Scaffold Paths. IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs); contigMatePairs = null; // Step7: Assemble paths. PathPurger(paths); // Step8: Generate sequence of scaffolds. return(GenerateScaffold(contigGraph, paths)); }