/// <summary> /// Public method mapping Reads to Contigs. /// </summary> /// <param name="contigs">List of sequences of contigs.</param> /// <param name="reads">List of input reads.</param> /// <param name="kmerLength">Length of kmer.</param> /// <returns>Contig Read Map.</returns> public ReadContigMap Map(IList <ISequence> contigs, IEnumerable <ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength); ReadContigMap maps = new ReadContigMap(); Parallel.ForEach(reads, readSequence => { IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(readSequence, kmerLength); ReadIndex read = new ReadIndex(readSequence); foreach (ISequence kmer in kmers) { IList <KmerIndexer> positions; if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { read.ContigReadMatchIndexes.Add(positions); } } IList <Task <IList <ReadMap> > > tasks = new List <Task <IList <ReadMap> > >(); // Stores information about contigs for which tasks has been generated. IList <long> visitedContigs = new List <long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedContigs.Contains(contigIndex)) { visitedContigs.Add(contigIndex); tasks.Add( Task <IList <ReadMap> > .Factory.StartNew( t => MapRead( readPosition, read.ContigReadMatchIndexes, contigIndex, read.ReadSequence.Count, kmerLength), TaskCreationOptions.AttachedToParent)); } } } var overlapMaps = new Dictionary <ISequence, IList <ReadMap> >(); for (int index = 0; index < visitedContigs.Count; index++) { overlapMaps.Add(contigs.ElementAt(visitedContigs[index]), tasks[index].Result); } lock (maps) { if (!maps.ContainsKey(read.ReadSequence.ID)) { maps.Add(read.ReadSequence.ID, overlapMaps); } else { throw new ArgumentException( string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.ID)); } } }); return(maps); }
/// <summary> /// Aligns reads to contigs using kmer method of alignment. /// </summary> /// <param name="contigs">List of contig sequences.</param> /// <param name="reads">List of read sequences.</param> /// <param name="kmerLength">Kmer Length.</param> /// <returns>List of Contig.</returns> public static IList <Contig> ReadContigAlignment(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength); IList <ContigIndex> contigDatas; contigDatas = contigs.AsParallel().Select(contig => { IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength); ContigIndex index = new ContigIndex(contig); IList <KmerIndexer> positions; foreach (ISequence kmer in kmers) { if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { index.ContigReadMatchIndexes.Add(positions); } else { index.ContigReadMatchIndexes.Add(new List <KmerIndexer>()); } } return(index); }).ToList(); return(contigDatas.Select(contigData => { IList <Task <IList <ReadMap> > > tasks = new List <Task <IList <ReadMap> > >(); // Stores information about contigs for which tasks has been generated. IList <long> visitedReads = new List <long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedReads.Contains(contigIndex)) { visitedReads.Add(contigIndex); tasks.Add( Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent)); } } } Contig contigOutputStructure = new Contig(); contigOutputStructure.Consensus = contigData.ContigSequence; for (int index = 0; index < visitedReads.Count; index++) { foreach (ReadMap maps in tasks[index].Result) { Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence() { Length = maps.Length, Position = maps.StartPositionOfContig, ReadPosition = maps.StartPositionOfRead, Sequence = reads.ElementAt(visitedReads[index]) }; if (new string( contigOutputStructure.Consensus.GetSubSequence( assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()). Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length) .Select(a => (char)a).ToArray()))) { assembledSeq.IsComplemented = false; assembledSeq.IsReversed = false; } else { assembledSeq.IsComplemented = true; assembledSeq.IsReversed = true; } contigOutputStructure.Sequences.Add(assembledSeq); } } return contigOutputStructure; }).ToList()); }
public void PathPurger1() { const int KmerLength = 7; ISequence sequence = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG"); IList <ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigsSequence, KmerLength); List <Node> contigs = graph.Nodes.ToList(); IList <ScaffoldPath> paths = new List <ScaffoldPath>(); ScaffoldPath path = new ScaffoldPath(); foreach (Node node in contigs) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(2, 5)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(3, 5)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(6, 5)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(0, 11)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(7, 4)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(11, 0)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(2, 9)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(1, 10)) { path.Add(new KeyValuePair <Node, Edge>(node, null)); } paths.Add(path); PathPurger assembler = new PathPurger(); assembler.PurgePath(paths); Assert.AreEqual(paths.Count, 1); Assert.IsTrue(Compare(paths.First(), contigs)); }