Exemplo n.º 1
0
        /// <summary>
        /// Map paired reads to contigs using FASTA sequence header.
        /// </summary>
        /// <param name="readContigMap">Map between reads and contigs</param>
        /// <param name="reads">Sequences of reads.</param>
        /// <returns>Contig Mate Pair map.</returns>
        protected ContigMatePairs MapPairedReadsToContigs(ReadContigMap readContigMap, IList <ISequence> reads)
        {
            ContigMatePairs contigMatePairs = new ContigMatePairs();

            contigMatePairs = _mapPairedReads.MapContigToMatePairs(reads, readContigMap);
            return(contigMatePairs);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Public method mapping Reads to Contigs.
        /// </summary>
        /// <param name="contigs">List of sequences of contigs.</param>
        /// <param name="reads">List of input reads.</param>
        /// <param name="kmerLength">Length of kmer.</param>
        /// <returns>Contig Read Map.</returns>
        public ReadContigMap Map(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map  = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength);
            ReadContigMap         maps = new ReadContigMap();

            using (ThreadLocal <char[]> rcBuilder = new ThreadLocal <char[]>(() => new char[kmerLength]))
            {
                Parallel.ForEach(reads, (ISequence readSequence) =>
                {
                    IEnumerable <string> kmers = SequenceToKmerBuilder.GetKmerStrings(readSequence, kmerLength);
                    ReadIndex read             = new ReadIndex(readSequence);
                    IList <KmerIndexer> positions;
                    foreach (string kmer in kmers)
                    {
                        if (map.TryGetValue(kmer, out positions) ||
                            map.TryGetValue(kmer.GetReverseComplement(rcBuilder.Value), out positions))
                        {
                            read.ContigReadMatchIndexes.Add(positions);
                        }
                    }

                    IList <Task <IList <ReadMap> > > tasks =
                        new List <Task <IList <ReadMap> > >();

                    //Stores information about contigs for which tasks has been generated.
                    IList <int> visitedContigs = new List <int>();

                    //Creates Task for every read in nodes for a given contig.
                    for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++)
                    {
                        int readPosition = index;
                        foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index])
                        {
                            int contigIndex = kmer.SequenceIndex;
                            if (!visitedContigs.Contains(contigIndex))
                            {
                                visitedContigs.Add(contigIndex);
                                tasks.Add(Task <IList <ReadMap> > .Factory.StartNew(t =>
                                                                                    MapRead(readPosition, read.ContigReadMatchIndexes, contigIndex, read.ReadSequence.Count, kmerLength),
                                                                                    TaskCreationOptions.AttachedToParent));
                            }
                        }
                    }

                    Dictionary <ISequence, IList <ReadMap> > overlapMaps =
                        new Dictionary <ISequence, IList <ReadMap> >();
                    for (int index = 0; index < visitedContigs.Count; index++)
                    {
                        overlapMaps.Add(contigs[visitedContigs[index]], tasks[index].Result);
                    }

                    lock (maps)
                    {
                        if (!maps.ContainsKey(read.ReadSequence.DisplayID))
                        {
                            maps.Add(read.ReadSequence.DisplayID, overlapMaps);
                        }
                        else
                        {
                            throw new ArgumentException(
                                string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.DisplayID));
                        }
                    }
                });
            }

            return(maps);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Builds scaffolds from list of reads and contigs
        /// </summary>
        /// <param name="reads">List of reads</param>
        /// <param name="contigs">List of contigs</param>
        /// <param name="kmerLength">Kmer Length</param>
        /// <param name="depth">Depth for graph traversal</param>
        /// <param name="redundancy">Number of mate pairs required to create a link between two contigs.
        ///  Hierarchical Scaffolding With Bambus
        ///  by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg
        ///  Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param>
        /// <returns>List of scaffold sequences</returns>
        public IList <ISequence> BuildScaffold(
            IList <ISequence> reads,
            IList <ISequence> contigs,
            int kmerLength,
            int depth      = 10,
            int redundancy = 2)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            if (null == reads)
            {
                throw new ArgumentNullException("reads");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            if (redundancy < 0)
            {
                throw new ArgumentException(Resource.NegativeRedundancy);
            }

            _depth      = depth;
            _redundancy = redundancy;
            _kmerLength = kmerLength;


            IList <ISequence> readSeqs = reads.AsParallel().Where(s => s.All <ISequenceItem>(c => !c.IsAmbiguous && !c.IsGap)).ToList();

            //Step1: Generate contig overlap graph.
            DeBruijnGraph contigGraph        = GenerateContigOverlapGraph(contigs);
            IEnumerable <DeBruijnNode> nodes = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0);

            foreach (DeBruijnNode node in nodes)
            {
                contigs.Remove(contigGraph.GetNodeSequence(node));
            }

            // Step2: Map Reads to contigs.
            ReadContigMap readContigMap = ReadContigMap(contigs, readSeqs);

            contigs = null;

            // Step3: Generate Contig Mate Pair Map.
            ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMap, readSeqs);

            readContigMap = null;

            // Step4: Filter Paired Reads.
            contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs);

            // Step5: Distance Calculation.
            CalculateDistanceBetweenContigs(contigMatePairs);

            // Step6: Trace Scaffold Paths.
            IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs);

            contigMatePairs = null;

            // Step7: Assemble paths.
            PathPurger(paths);

            // Step8: Generate sequence of scaffolds.
            return(GenerateScaffold(contigGraph, paths));
        }