Beispiel #1
0
        /// <summary>
        /// Map paired reads to contigs using FASTA sequence header.
        /// </summary>
        /// <param name="readContigMap">Map between reads and contigs</param>
        /// <param name="reads">Sequences of reads.</param>
        /// <returns>Contig Mate Pair map.</returns>
        protected ContigMatePairs MapPairedReadsToContigs(ReadContigMap readContigMap, IList <ISequence> reads)
        {
            ContigMatePairs contigMatePairs = new ContigMatePairs();

            contigMatePairs = _mapPairedReads.MapContigToMatePairs(reads, readContigMap);
            return(contigMatePairs);
        }
Beispiel #2
0
 /// <summary>
 /// Calculates distances between contigs.
 /// </summary>
 /// <param name="contigPairedReads">Input Contigs and mate pairs mappping.</param>
 public void CalculateDistance(ContigMatePairs contigPairedReads)
 {
     Parallel.ForEach(contigPairedReads, (KeyValuePair <ISequence, Dictionary
                                                        <ISequence, IList <ValidMatePair> > > contigPairedRead) =>
     {
         CalculateInterContigDistance(contigPairedRead, contigPairedRead.Key.Count);
     });
 }
Beispiel #3
0
        /// <summary>
        /// Calculate distance between contigs using paired reads.
        /// </summary>
        /// <param name="contigMatePairs">Contig Mate Pair map.</param>
        /// <returns>Number of contig-read pairs</returns>
        protected int CalculateDistanceBetweenContigs(ContigMatePairs contigMatePairs)
        {
            if (contigMatePairs == null)
            {
                throw new ArgumentNullException("contigMatePairs");
            }

            _distanceCalculator.CalculateDistance(contigMatePairs);
            // this dictionary is updated in this step
            return(contigMatePairs.Count);
        }
Beispiel #4
0
        /// <summary>
        /// Performs Breadth First Search to traverse through graph to generate scaffold paths.
        /// </summary>
        /// <param name="graph">Contig Overlap Graph.</param>
        /// <param name="contigPairedReadMaps">InterContig Distances.</param>
        /// <param name="kmerLength">Length of Kmer</param>
        /// <param name="depth">Depth to which graph is searched.</param>
        /// <returns>List of paths/scaffold</returns>
        public IList <ScaffoldPath> FindPaths(
            DeBruijnGraph graph,
            ContigMatePairs contigPairedReadMaps,
            int kmerLength,
            int depth = 10)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            if (contigPairedReadMaps == null)
            {
                throw new ArgumentNullException("contigPairedReadMaps");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            _graph      = graph;
            _kmerLength = kmerLength;
            _depth      = depth;

            List <ScaffoldPath> scaffoldPaths = new List <ScaffoldPath>();

            Parallel.ForEach(_graph.Nodes, (DeBruijnNode node) =>
            {
                Dictionary <ISequence, IList <ValidMatePair> > contigPairedReadMap;
                if (contigPairedReadMaps.TryGetValue(graph.GetNodeSequence(node), out contigPairedReadMap))
                {
                    List <ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap);
                    lock (scaffoldPaths)
                    {
                        scaffoldPaths.AddRange(scaffoldPath);
                    }
                }
            });

            return(scaffoldPaths);
        }
Beispiel #5
0
        /// <summary>
        /// Filter Paired Read Based on Orientation.
        /// </summary>
        /// <param name="matePairMap">Map between contigs using mate pair information.</param>
        /// <param name="redundancy">Number of mate pairs required to create a link between two contigs.
        ///  Hierarchical Scaffolding With Bambus
        ///  by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg
        ///  Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param>
        public ContigMatePairs FilterPairedReads(ContigMatePairs matePairMap, int redundancy = 2)
        {
            if (null == matePairMap)
            {
                throw new ArgumentNullException("matePairMap");
            }

            if (redundancy < 0)
            {
                throw new ArgumentException(Resource.NegativeRedundancy);
            }

            foreach (KeyValuePair <ISequence, Dictionary <ISequence, IList <ValidMatePair> > > matePair in matePairMap)
            {
                foreach (KeyValuePair <ISequence, IList <ValidMatePair> > validMatePair in matePair.Value)
                {
                    if (matePair.Key != validMatePair.Key)
                    {
                        Dictionary <ISequence, IList <ValidMatePair> > validMatePairs;
                        if (matePairMap.TryGetValue(validMatePair.Key, out validMatePairs))
                        {
                            IList <ValidMatePair> pair;
                            if (validMatePairs.TryGetValue(matePair.Key, out pair))
                            {
                                OrientationFilter(pair, validMatePair.Value, redundancy);
                            }
                            else
                            {
                                if (validMatePair.Value.Count < redundancy)
                                {
                                    validMatePair.Value.Clear();
                                }
                            }
                        }
                        else
                        {
                            if (validMatePair.Value.Count < redundancy)
                            {
                                validMatePair.Value.Clear();
                            }
                        }
                    }
                    else
                    {
                        validMatePair.Value.Clear();
                    }
                }
            }

            ContigMatePairs newMatePairMap = new ContigMatePairs();

            Parallel.ForEach(matePairMap, (KeyValuePair <ISequence, Dictionary <ISequence, IList <ValidMatePair> > > matePair) =>
            {
                Dictionary <ISequence, IList <ValidMatePair> > map = new Dictionary <ISequence, IList <ValidMatePair> >();
                foreach (KeyValuePair <ISequence, IList <ValidMatePair> > validMatePair in matePair.Value)
                {
                    if (validMatePair.Value.Count > 0)
                    {
                        map.Add(validMatePair.Key, validMatePair.Value);
                    }
                }

                if (map.Count > 0)
                {
                    lock (newMatePairMap)
                    {
                        newMatePairMap.Add(matePair.Key, map);
                    }
                }
            });

            return(newMatePairMap);
        }
Beispiel #6
0
 /// <summary>
 /// Performs Breadth First Search in contig overlap graph.
 /// </summary>
 /// <param name="contigGraph">Contig Graph.</param>
 /// <param name="contigMatePairs">Contig Mate Pair map.</param>
 /// <returns>List of Scaffold Paths</returns>
 protected IList <ScaffoldPath> TracePath(DeBruijnGraph contigGraph, ContigMatePairs contigMatePairs)
 {
     return(_tracePath.FindPaths(contigGraph, contigMatePairs, _kmerLength, _depth));
 }
Beispiel #7
0
 /// <summary>
 /// Filter reads based on orientation of contigs.
 /// </summary>
 /// <param name="contigMatePairs">Contig Mate Pair map.</param>
 /// <returns>Contig Mate Pair map.</returns>
 protected ContigMatePairs FilterReadsBasedOnOrientation(ContigMatePairs contigMatePairs)
 {
     return(_pairedReadFilter.FilterPairedReads(contigMatePairs, _redundancy));
 }
Beispiel #8
0
        /// <summary>
        /// Builds scaffolds from list of reads and contigs
        /// </summary>
        /// <param name="reads">List of reads</param>
        /// <param name="contigs">List of contigs</param>
        /// <param name="kmerLength">Kmer Length</param>
        /// <param name="depth">Depth for graph traversal</param>
        /// <param name="redundancy">Number of mate pairs required to create a link between two contigs.
        ///  Hierarchical Scaffolding With Bambus
        ///  by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg
        ///  Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param>
        /// <returns>List of scaffold sequences</returns>
        public IList <ISequence> BuildScaffold(
            IList <ISequence> reads,
            IList <ISequence> contigs,
            int kmerLength,
            int depth      = 10,
            int redundancy = 2)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            if (null == reads)
            {
                throw new ArgumentNullException("reads");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            if (redundancy < 0)
            {
                throw new ArgumentException(Resource.NegativeRedundancy);
            }

            _depth      = depth;
            _redundancy = redundancy;
            _kmerLength = kmerLength;


            IList <ISequence> readSeqs = reads.AsParallel().Where(s => s.All <ISequenceItem>(c => !c.IsAmbiguous && !c.IsGap)).ToList();

            //Step1: Generate contig overlap graph.
            DeBruijnGraph contigGraph        = GenerateContigOverlapGraph(contigs);
            IEnumerable <DeBruijnNode> nodes = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0);

            foreach (DeBruijnNode node in nodes)
            {
                contigs.Remove(contigGraph.GetNodeSequence(node));
            }

            // Step2: Map Reads to contigs.
            ReadContigMap readContigMap = ReadContigMap(contigs, readSeqs);

            contigs = null;

            // Step3: Generate Contig Mate Pair Map.
            ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMap, readSeqs);

            readContigMap = null;

            // Step4: Filter Paired Reads.
            contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs);

            // Step5: Distance Calculation.
            CalculateDistanceBetweenContigs(contigMatePairs);

            // Step6: Trace Scaffold Paths.
            IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs);

            contigMatePairs = null;

            // Step7: Assemble paths.
            PathPurger(paths);

            // Step8: Generate sequence of scaffolds.
            return(GenerateScaffold(contigGraph, paths));
        }