Ejemplo n.º 1
0
        /// <summary>
        /// Map paired reads to contigs using FASTA sequence header.
        /// </summary>
        /// <param name="readContigMaps">Map between reads and contigs.</param>
        /// <param name="reads">Sequences of reads.</param>
        /// <returns>Contig Mate Pair map.</returns>
        protected ContigMatePairs MapPairedReadsToContigs(ReadContigMap readContigMaps, IEnumerable <ISequence> reads)
        {
            ContigMatePairs contigMatePairs;

            contigMatePairs = this.mapPairedReads.MapContigToMatePairs(reads, readContigMaps);
            return(contigMatePairs);
        }
        /// <summary>
        /// Public method mapping Reads to Contigs.
        /// </summary>
        /// <param name="contigs">List of sequences of contigs.</param>
        /// <param name="reads">List of input reads.</param>
        /// <param name="kmerLength">Length of kmer.</param>
        /// <returns>Contig Read Map.</returns>
        public ReadContigMap Map(IList <ISequence> contigs, IEnumerable <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map  = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength);
            ReadContigMap         maps = new ReadContigMap();

            Parallel.ForEach(reads, readSequence =>
            {
                IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(readSequence, kmerLength);
                ReadIndex read = new ReadIndex(readSequence);
                foreach (ISequence kmer in kmers)
                {
                    IList <KmerIndexer> positions;
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        read.ContigReadMatchIndexes.Add(positions);
                    }
                }

                IList <Task <IList <ReadMap> > > tasks =
                    new List <Task <IList <ReadMap> > >();

                // Stores information about contigs for which tasks has been generated.
                IList <long> visitedContigs = new List <long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedContigs.Contains(contigIndex))
                        {
                            visitedContigs.Add(contigIndex);
                            tasks.Add(
                                Task <IList <ReadMap> > .Factory.StartNew(
                                    t => MapRead(
                                        readPosition,
                                        read.ContigReadMatchIndexes,
                                        contigIndex,
                                        read.ReadSequence.Count,
                                        kmerLength),
                                    TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                var overlapMaps = new Dictionary <ISequence, IList <ReadMap> >();
                for (int index = 0; index < visitedContigs.Count; index++)
                {
                    overlapMaps.Add(contigs.ElementAt(visitedContigs[index]), tasks[index].Result);
                }

                lock (maps)
                {
                    if (!maps.ContainsKey(read.ReadSequence.ID))
                    {
                        maps.Add(read.ReadSequence.ID, overlapMaps);
                    }
                    else
                    {
                        throw new ArgumentException(
                            string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.ID));
                    }
                }
            });

            return(maps);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Builds scaffolds from list of reads and contigs.
        /// </summary>
        /// <param name="reads">List of reads.</param>
        /// <param name="contigs">List of contigs.</param>
        /// <param name="lengthofKmer">Kmer Length.</param>
        /// <param name="depth">Depth for graph traversal.</param>
        /// <param name="redundancy">Number of mate pairs required to create a link between two contigs.
        ///  Hierarchical Scaffolding With Bambus
        ///  by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg
        ///  Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param>
        /// <returns>List of scaffold sequences.</returns>
        public IList <ISequence> BuildScaffold(
            IEnumerable <ISequence> reads,
            IList <ISequence> contigs,
            int lengthofKmer,
            int depth      = 10,
            int redundancy = 2)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            if (null == reads)
            {
                throw new ArgumentNullException("reads");
            }

            if (lengthofKmer <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            if (redundancy < 0)
            {
                throw new ArgumentException(Resource.NegativeRedundancy);
            }

            this.depthField      = depth;
            this.redundancyField = redundancy;
            this.kmerLength      = lengthofKmer;

            IEnumerable <ISequence> readSeqs = ValidateReads(reads);

            //Step1: Generate contig overlap graph.
            IList <ISequence>  contigsList = new List <ISequence>(contigs);
            ContigGraph        contigGraph = GenerateContigOverlapGraph(contigsList);
            IEnumerable <Node> nodes       = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0);

            foreach (Node node in nodes)
            {
                contigsList.Remove(contigGraph.GetNodeSequence(node));
            }

            // Step2: Map Reads to contigs.
            ReadContigMap readContigMaps = ReadContigMap(contigsList, readSeqs);

            contigsList = null;

            // Step3: Generate Contig Mate Pair Map.
            ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMaps, readSeqs);

            readContigMaps = null;

            // Step4: Filter Paired Reads.
            contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs);

            // Step5: Distance Calculation.
            CalculateDistanceBetweenContigs(contigMatePairs);

            // Step6: Trace Scaffold Paths.
            IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs);

            contigMatePairs = null;

            // Step7: Assemble paths.
            PathPurger(paths);

            // Step8: Generate sequence of scaffolds.
            return(GenerateScaffold(contigGraph, paths));
        }