/// <summary> /// Map paired reads to contigs using FASTA sequence header. /// </summary> /// <param name="readContigMaps">Map between reads and contigs.</param> /// <param name="reads">Sequences of reads.</param> /// <returns>Contig Mate Pair map.</returns> protected ContigMatePairs MapPairedReadsToContigs(ReadContigMap readContigMaps, IEnumerable <ISequence> reads) { ContigMatePairs contigMatePairs; contigMatePairs = this.mapPairedReads.MapContigToMatePairs(reads, readContigMaps); return(contigMatePairs); }
/// <summary> /// Public method mapping Reads to Contigs. /// </summary> /// <param name="contigs">List of sequences of contigs.</param> /// <param name="reads">List of input reads.</param> /// <param name="kmerLength">Length of kmer.</param> /// <returns>Contig Read Map.</returns> public ReadContigMap Map(IList <ISequence> contigs, IEnumerable <ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength); ReadContigMap maps = new ReadContigMap(); Parallel.ForEach(reads, readSequence => { IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(readSequence, kmerLength); ReadIndex read = new ReadIndex(readSequence); foreach (ISequence kmer in kmers) { IList <KmerIndexer> positions; if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { read.ContigReadMatchIndexes.Add(positions); } } IList <Task <IList <ReadMap> > > tasks = new List <Task <IList <ReadMap> > >(); // Stores information about contigs for which tasks has been generated. IList <long> visitedContigs = new List <long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedContigs.Contains(contigIndex)) { visitedContigs.Add(contigIndex); tasks.Add( Task <IList <ReadMap> > .Factory.StartNew( t => MapRead( readPosition, read.ContigReadMatchIndexes, contigIndex, read.ReadSequence.Count, kmerLength), TaskCreationOptions.AttachedToParent)); } } } var overlapMaps = new Dictionary <ISequence, IList <ReadMap> >(); for (int index = 0; index < visitedContigs.Count; index++) { overlapMaps.Add(contigs.ElementAt(visitedContigs[index]), tasks[index].Result); } lock (maps) { if (!maps.ContainsKey(read.ReadSequence.ID)) { maps.Add(read.ReadSequence.ID, overlapMaps); } else { throw new ArgumentException( string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.ID)); } } }); return(maps); }
/// <summary> /// Builds scaffolds from list of reads and contigs. /// </summary> /// <param name="reads">List of reads.</param> /// <param name="contigs">List of contigs.</param> /// <param name="lengthofKmer">Kmer Length.</param> /// <param name="depth">Depth for graph traversal.</param> /// <param name="redundancy">Number of mate pairs required to create a link between two contigs. /// Hierarchical Scaffolding With Bambus /// by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param> /// <returns>List of scaffold sequences.</returns> public IList <ISequence> BuildScaffold( IEnumerable <ISequence> reads, IList <ISequence> contigs, int lengthofKmer, int depth = 10, int redundancy = 2) { if (contigs == null) { throw new ArgumentNullException("contigs"); } if (null == reads) { throw new ArgumentNullException("reads"); } if (lengthofKmer <= 0) { throw new ArgumentException(Properties.Resource.KmerLength); } if (depth <= 0) { throw new ArgumentException(Resource.Depth); } if (redundancy < 0) { throw new ArgumentException(Resource.NegativeRedundancy); } this.depthField = depth; this.redundancyField = redundancy; this.kmerLength = lengthofKmer; IEnumerable <ISequence> readSeqs = ValidateReads(reads); //Step1: Generate contig overlap graph. IList <ISequence> contigsList = new List <ISequence>(contigs); ContigGraph contigGraph = GenerateContigOverlapGraph(contigsList); IEnumerable <Node> nodes = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0); foreach (Node node in nodes) { contigsList.Remove(contigGraph.GetNodeSequence(node)); } // Step2: Map Reads to contigs. ReadContigMap readContigMaps = ReadContigMap(contigsList, readSeqs); contigsList = null; // Step3: Generate Contig Mate Pair Map. ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMaps, readSeqs); readContigMaps = null; // Step4: Filter Paired Reads. contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs); // Step5: Distance Calculation. CalculateDistanceBetweenContigs(contigMatePairs); // Step6: Trace Scaffold Paths. IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs); contigMatePairs = null; // Step7: Assemble paths. PathPurger(paths); // Step8: Generate sequence of scaffolds. return(GenerateScaffold(contigGraph, paths)); }