Exemplo n.º 1
0
        /// <summary>
        /// Aligns reads to contigs using kmer method of alignment.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <param name="reads">List of read sequences.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>List of Contig.</returns>
        public static IList <Contig> ReadContigAlignment(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength);
            IList <ContigIndex>   contigDatas;

            contigDatas = contigs.AsParallel().Select(contig =>
            {
                IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength);
                ContigIndex index             = new ContigIndex(contig);
                IList <KmerIndexer> positions;
                foreach (ISequence kmer in kmers)
                {
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        index.ContigReadMatchIndexes.Add(positions);
                    }
                    else
                    {
                        index.ContigReadMatchIndexes.Add(new List <KmerIndexer>());
                    }
                }

                return(index);
            }).ToList();

            return(contigDatas.Select(contigData =>
            {
                IList <Task <IList <ReadMap> > > tasks =
                    new List <Task <IList <ReadMap> > >();

                // Stores information about contigs for which tasks has been generated.
                IList <long> visitedReads = new List <long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedReads.Contains(contigIndex))
                        {
                            visitedReads.Add(contigIndex);
                            tasks.Add(
                                Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                Contig contigOutputStructure = new Contig();
                contigOutputStructure.Consensus = contigData.ContigSequence;

                for (int index = 0; index < visitedReads.Count; index++)
                {
                    foreach (ReadMap maps in tasks[index].Result)
                    {
                        Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence()
                        {
                            Length = maps.Length,
                            Position = maps.StartPositionOfContig,
                            ReadPosition = maps.StartPositionOfRead,
                            Sequence = reads.ElementAt(visitedReads[index])
                        };

                        if (new string(
                                contigOutputStructure.Consensus.GetSubSequence(
                                    assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()).
                            Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length)
                                              .Select(a => (char)a).ToArray())))
                        {
                            assembledSeq.IsComplemented = false;
                            assembledSeq.IsReversed = false;
                        }
                        else
                        {
                            assembledSeq.IsComplemented = true;
                            assembledSeq.IsReversed = true;
                        }

                        contigOutputStructure.Sequences.Add(assembledSeq);
                    }
                }

                return contigOutputStructure;
            }).ToList());
        }
        /// <summary>
        /// Public method mapping Reads to Contigs.
        /// </summary>
        /// <param name="contigs">List of sequences of contigs.</param>
        /// <param name="reads">List of input reads.</param>
        /// <param name="kmerLength">Length of kmer.</param>
        /// <returns>Contig Read Map.</returns>
        public ReadContigMap Map(IList <ISequence> contigs, IEnumerable <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map  = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength);
            ReadContigMap         maps = new ReadContigMap();

            Parallel.ForEach(reads, readSequence =>
            {
                IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(readSequence, kmerLength);
                ReadIndex read = new ReadIndex(readSequence);
                foreach (ISequence kmer in kmers)
                {
                    IList <KmerIndexer> positions;
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        read.ContigReadMatchIndexes.Add(positions);
                    }
                }

                IList <Task <IList <ReadMap> > > tasks =
                    new List <Task <IList <ReadMap> > >();

                // Stores information about contigs for which tasks has been generated.
                IList <long> visitedContigs = new List <long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedContigs.Contains(contigIndex))
                        {
                            visitedContigs.Add(contigIndex);
                            tasks.Add(
                                Task <IList <ReadMap> > .Factory.StartNew(
                                    t => MapRead(
                                        readPosition,
                                        read.ContigReadMatchIndexes,
                                        contigIndex,
                                        read.ReadSequence.Count,
                                        kmerLength),
                                    TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                var overlapMaps = new Dictionary <ISequence, IList <ReadMap> >();
                for (int index = 0; index < visitedContigs.Count; index++)
                {
                    overlapMaps.Add(contigs.ElementAt(visitedContigs[index]), tasks[index].Result);
                }

                lock (maps)
                {
                    if (!maps.ContainsKey(read.ReadSequence.ID))
                    {
                        maps.Add(read.ReadSequence.ID, overlapMaps);
                    }
                    else
                    {
                        throw new ArgumentException(
                            string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.ID));
                    }
                }
            });

            return(maps);
        }