Ejemplo n.º 1
0
        public void TestDeBruijnGraphBuilderSmall()
        {
            const int        KmerLength = 6;
            List <ISequence> reads      = TestInputs.GetSmallReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(20, graph.NodeCount);
            HashSet <string> nodeStrings = GetGraphNodesForSmallReads();
            string           nodeStr, nodeStrRC;

            foreach (DeBruijnNode node in graph.GetNodes())
            {
                nodeStr   = new string(graph.GetNodeSequence(node).Select(a => (char)a).ToArray());
                nodeStrRC = new string(graph.GetNodeSequence(node).GetReverseComplementedSequence().Select(a => (char)a).ToArray());
                Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC));
            }

            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(51, totalEdges);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Some set of paths will appear twice, one traced in forward direction
        /// and other in opposite. This method eliminate duplicates.
        /// </summary>
        /// <param name="redundantPathClusters">List of path cluster</param>
        /// <returns>List of unique path clusters</returns>
        private List <DeBruijnPathList> RemoveDuplicates(List <DeBruijnPathList> redundantPathClusters)
        {
            // Divide the list into two groups. One with paths that do not
            // have duplicates, and one with paths that do not have duplicate
            List <IGrouping <bool, DeBruijnPathList> > uniqueAndDuplicatedPaths =
                redundantPathClusters.AsParallel().GroupBy(pc1 =>
                                                           redundantPathClusters.Any(pc2 =>
                                                                                     GetStartNode(pc1) == GetEndNode(pc2) && GetEndNode(pc1) == GetStartNode(pc2))).ToList();

            List <DeBruijnPathList> uniquePaths = new List <DeBruijnPathList>();

            foreach (IGrouping <bool, DeBruijnPathList> group in uniqueAndDuplicatedPaths)
            {
                if (!group.Key)
                {
                    // Add all paths that do have duplicates to final list
                    uniquePaths.AddRange(group);
                }
                else
                {
                    // Each element in this list contains a duplicate in the list
                    // Add only those where the start node has a sequence that is
                    // lexicographically greater than the end node sequence. This
                    // operation will eliminate duplicates effectively.
                    uniquePaths.AddRange(
                        group.AsParallel().Where(pc =>
                                                 string.CompareOrdinal(
                                                     _graph.GetNodeSequence(GetStartNode(pc)).ToString(),
                                                     _graph.GetNodeSequence(GetEndNode(pc)).ToString()) >= 0));
                }
            }

            return(uniquePaths);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Get simple paths in the graph.
        /// </summary>
        /// <returns>List of simple paths.</returns>
        private List <ISequence> GetSimplePaths(bool createContigSequences)
        {
            //set flag to false so we can find any nodes that are missed during the build
            _graph.SetNodeVisitState(false);
            List <ISequence> paths = new List <ISequence>();

            Parallel.ForEach(_graph.GetNodes(), node =>
            {
                int validLeftExtensionsCount  = node.LeftExtensionNodesCount;
                int validRightExtensionsCount = node.RightExtensionNodesCount;
                if (validLeftExtensionsCount + validRightExtensionsCount == 0)
                {
                    node.IsVisited = true;
                    // Island. Check coverage
                    if (_coverageThreshold == -1)
                    {
                        if (createContigSequences)
                        {
                            lock (paths)
                            {
                                paths.Add(_graph.GetNodeSequence(node));
                            }
                        }
                    }
                    else
                    {
                        if (node.KmerCount < _coverageThreshold)
                        {
                            node.MarkNodeForDelete();
                        }
                    }
                }
                else if (validLeftExtensionsCount == 1 && validRightExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, false, createContigSequences, true);
                }
                else if (validRightExtensionsCount == 1 && validLeftExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, true, createContigSequences, true);
                }
            });



            //All paths starting from ends have now been found, however graph nodes entirely enclosed in a
            //circular loop have been skipped, since these are small plasmids, etc.  fast enough to do not in parallel.
            //Must also be done sequentially to avoid grabbing nodes from the same circle in the graph concurrently
            foreach (var node in _graph.GetUnvisitedNodes())
            {
                TraceSimplePath(paths, node, true, createContigSequences, false);
            }

            // Reset flag state to false, likely unnecessary as any method using the visit state flag
            // should set it to false independently
            _graph.SetNodeVisitState(false);
            return(paths);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Converts the scaffold path into its sequence.
        /// </summary>
        /// <param name="graph">De Bruijn graph.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>Scaffold Sequence.</returns>
        public ISequence BuildSequenceFromPath(DeBruijnGraph graph, int kmerLength)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            DeBruijnNode startNode          = this[0].Key;
            bool         isForwardDirection = this[0].Value.IsSameOrientation;

            startNode.MarkNode();
            ISequence scaffoldSequence = new Sequence(Alphabets.DNA);

            scaffoldSequence.InsertRange(0, graph.GetNodeSequence(startNode).ToString());
            this.RemoveAt(0);

            // There is overlap of (k-1) symbols between adjacent contigs
            if (kmerLength > 1)
            {
                kmerLength--;
            }

            bool      sameOrientation = true;
            ISequence nextNodeSequence;

            foreach (KeyValuePair <DeBruijnNode, DeBruijnEdge> extensions in this)
            {
                sameOrientation  = !(sameOrientation ^ extensions.Value.IsSameOrientation);
                nextNodeSequence = sameOrientation ? graph.GetNodeSequence(extensions.Key) :
                                   graph.GetNodeSequence(extensions.Key).ReverseComplement;

                // Extend scaffold sequence using symbols from contig beyond the overlap
                if (isForwardDirection)
                {
                    scaffoldSequence.InsertRange(scaffoldSequence.Count,
                                                 nextNodeSequence.Range(kmerLength, nextNodeSequence.Count - kmerLength).ToString());
                }
                else
                {
                    scaffoldSequence.InsertRange(0,
                                                 nextNodeSequence.Range(0, nextNodeSequence.Count - kmerLength).ToString());
                }

                extensions.Key.MarkNode();
            }

            return(scaffoldSequence);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Performs Breadth First Search to traverse through graph to generate scaffold paths.
        /// </summary>
        /// <param name="graph">Contig Overlap Graph.</param>
        /// <param name="contigPairedReadMaps">InterContig Distances.</param>
        /// <param name="kmerLength">Length of Kmer</param>
        /// <param name="depth">Depth to which graph is searched.</param>
        /// <returns>List of paths/scaffold</returns>
        public IList <ScaffoldPath> FindPaths(
            DeBruijnGraph graph,
            ContigMatePairs contigPairedReadMaps,
            int kmerLength,
            int depth = 10)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            if (contigPairedReadMaps == null)
            {
                throw new ArgumentNullException("contigPairedReadMaps");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            _graph      = graph;
            _kmerLength = kmerLength;
            _depth      = depth;

            List <ScaffoldPath> scaffoldPaths = new List <ScaffoldPath>();

            Parallel.ForEach(_graph.Nodes, (DeBruijnNode node) =>
            {
                Dictionary <ISequence, IList <ValidMatePair> > contigPairedReadMap;
                if (contigPairedReadMaps.TryGetValue(graph.GetNodeSequence(node), out contigPairedReadMap))
                {
                    List <ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap);
                    lock (scaffoldPaths)
                    {
                        scaffoldPaths.AddRange(scaffoldPath);
                    }
                }
            });

            return(scaffoldPaths);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Get simple paths in the graph.
        /// </summary>
        /// <returns>List of simple paths.</returns>
        private List <ISequence> GetSimplePaths(bool createContigSequences)
        {
            List <ISequence> paths = new List <ISequence>();

            Parallel.ForEach(
                this.graph.GetNodes(),
                node =>
            {
                int validLeftExtensionsCount, validRightExtensionsCount;
                validLeftExtensionsCount  = node.LeftExtensionNodesCount;
                validRightExtensionsCount = node.RightExtensionNodesCount;

                if (validLeftExtensionsCount + validRightExtensionsCount == 0)
                {
                    // Island. Check coverage
                    if (coverageThreshold == -1)
                    {
                        if (createContigSequences)
                        {
                            lock (paths)
                            {
                                paths.Add(graph.GetNodeSequence(node));
                            }
                        }
                    }
                    else
                    {
                        if (node.KmerCount < coverageThreshold)
                        {
                            node.MarkNodeForDelete();
                        }
                    }
                }
                else if (validLeftExtensionsCount == 1 && validRightExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, false, createContigSequences);
                }
                else if (validRightExtensionsCount == 1 && validLeftExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, true, createContigSequences);
                }
            });

            return(paths);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Condense redundant paths down to simple paths
        /// </summary>
        /// <returns>List of simple paths.</returns>
        private IList<ISequence> CreateMegaNodes()
        {
            foreach(DeBruijnNode node in _graph.GetNodes())
            {
            IList<ISequence> paths = new List<ISequence>();
            Parallel.ForEach(this._graph.GetNodes(), node =>
            {
                int validLeftExtensionsCount = node.LeftExtensionNodesCount;
                int validRightExtensionsCount = node.RightExtensionNodesCount;

                if (validLeftExtensionsCount + validRightExtensionsCount == 0)
                {
                    // Island. Check coverage
                    if (Double.IsNaN(_coverageThreshold))
                    {
                        if (createContigSequences)
                        {
                            lock (paths)
                            {
                                paths.Add(_graph.GetNodeSequence(node));
                            }
                        }
                    }
                    else
                    {
                        if (node.KmerCount < _coverageThreshold)
                        {
                            node.MarkNodeForDelete();
                        }
                    }
                }
                else if (validLeftExtensionsCount == 1 && validRightExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, false, createContigSequences);
                }
                else if (validRightExtensionsCount == 1 && validLeftExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, true, createContigSequences);
                }
            });

            return paths;
        }

    }
Ejemplo n.º 8
0
        /// <summary>
        /// Generate sequences from list of contig nodes.
        /// </summary>
        /// <param name="contigGraph">Contig Overlap Graph.</param>
        /// <param name="paths">Scaffold paths.</param>
        /// <returns>List of sequences of scaffolds.</returns>
        protected IList <ISequence> GenerateScaffold(
            DeBruijnGraph contigGraph,
            IList <ScaffoldPath> paths)
        {
            if (contigGraph == null)
            {
                throw new ArgumentNullException("contigGraph");
            }

            if (paths == null)
            {
                throw new ArgumentNullException("paths");
            }

            List <ISequence>           scaffolds    = paths.AsParallel().Select(t => t.BuildSequenceFromPath(contigGraph, _kmerLength)).ToList();
            IEnumerable <DeBruijnNode> visitedNodes = contigGraph.Nodes.AsParallel().Where(t => !t.IsMarked());

            scaffolds.AddRange(visitedNodes.AsParallel().Select(t => contigGraph.GetNodeSequence(t)));
            contigGraph.Dispose();
            return(scaffolds);
        }
Ejemplo n.º 9
0
        public void TestDeBruijnGraphBuilderTiny()
        {
            const int        KmerLength = 3;
            List <ISequence> reads      = TestInputs.GetTinyReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(9, graph.NodeCount);
            HashSet <string> nodeStrings = new HashSet <string>(graph.GetNodes().Select(n =>
                                                                                        new string(graph.GetNodeSequence(n).Select(a => (char)a).ToArray())));

            Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT"));
            Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA"));
            Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC"));
            Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA"));
            Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG"));
            Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG"));
            Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA"));
            Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT"));
            Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG"));
            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(31, totalEdges);
        }
Ejemplo n.º 10
0
        public MetaNode(DeBruijnNode startNode, DeBruijnGraph graph)
        {
            this.NodeNumber = GraphGenerator.NodeCount++;
            KmerLength      = graph.KmerLength;
            if (startNode.IsVisited)
            {
                throw new Exception("If a node has been visited it should not form a metanode, suggests an infinite recursion problem");
            }
            NODE_TYPE type = ClassifyNode(startNode);

            startNode.IsVisited = true;
            //Either of these become their own thing
            if (type == NODE_TYPE.NEXUS || type == NODE_TYPE.ISLAND || type == NODE_TYPE.END_LOOPS_ON_ITSELF)
            {
                ConstituentNodes.Add(startNode);
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                Sequence       = (new Sequence((IAlphabet)NoGapDnaAlphabet.Instance, contigSequence.ToArray())).ConvertToString(0, contigSequence.Count);
            }
            else if (type == NODE_TYPE.LINK_IN_CHAIN)
            {
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                if (!VerifyNotCircular(startNode))
                {
                    MakeCircle(startNode, graph);
                    //throw new Exception("Non circular visualizations not currently supported");
                }
                else
                {
                    //go right first
                    contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                    //var nextNodes = ExtendChain(startNode, true, graph);
                    ExtendChain(startNode, true, graph);
                    //copy the right information and clear it out
                    var tmpRightSeq = contigSequence.ToArray();
                    //skip the first node
                    var tmpRightNodes = ConstituentNodes.Skip(1).ToArray();
                    ConstituentNodes.Clear();
                    contigSequence.Clear();
                    //now go left
                    ExtendChain(startNode, false, graph);
                    //now lets combine
                    ConstituentNodes.Reverse();
                    ConstituentNodes.AddRange(tmpRightNodes);
                    var tmpSequence = new Sequence(DnaAlphabet.Instance, contigSequence.ToArray());
                    tmpSequence = new Sequence(tmpSequence.GetReverseComplementedSequence());
                    string LeftSequence = "";
                    if (tmpSequence.Count > 0)
                    {
                        LeftSequence = tmpSequence.ConvertToString(0, tmpSequence.Count);
                    }
                    tmpSequence    = new Sequence(DnaAlphabet.Instance, tmpRightSeq);
                    Sequence       = LeftSequence + tmpSequence.ConvertToString(0, (tmpSequence.Count));
                    contigSequence = new Sequence(DnaAlphabet.Instance, Sequence).ToList();
                }
            }
            else if (type == NODE_TYPE.GO_LEFT)
            {
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode).GetReverseComplementedSequence());
                //var nextNodes = ExtendChain(startNode, false, graph);
                ExtendChain(startNode, false, graph);
                var tmpSequence = new Sequence(DnaAlphabet.Instance, contigSequence.ToArray());
                //somewhat confusing - originally built the RC of sequence, so RCing again to get correct orientation for
                //neighbors

                tmpSequence    = new Sequence(tmpSequence.GetReverseComplementedSequence());
                contigSequence = tmpSequence.ToList();
                Sequence       = tmpSequence.ConvertToString(0, tmpSequence.Count);
                //flip it so nodes and sequence are in order
                ConstituentNodes.Reverse();
            }
            else if (type == NODE_TYPE.GO_RIGHT)
            {
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                //var nextNodes = ExtendChain(startNode, true, graph);
                ExtendChain(startNode, true, graph);
                var tmpSequence = new Sequence(DnaAlphabet.Instance, contigSequence.ToArray());
                Sequence = tmpSequence.ConvertToString(0, tmpSequence.Count);
            }

            Cement();
        }
Ejemplo n.º 11
0
        public void TracePathTestWithPalindromicContig()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = new List <ISequence>();
            Sequence         seq = new Sequence(Alphabets.DNA, "ATGCCTC");

            seq.DisplayID = ">10.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CCTCCTAT");
            seq.DisplayID = "1";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TCCTATC");
            seq.DisplayID = "2";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TGCCTCCT");
            seq.DisplayID = "3";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "ATCTTAGC");
            seq.DisplayID = "4";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTATCTTAG");
            seq.DisplayID = "5";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTTAGCG");
            seq.DisplayID = "6";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCCTCCTAT");
            seq.DisplayID = ">8.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAGCGCGCTA");
            seq.DisplayID = ">8.y1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "AGCGCGC");
            seq.DisplayID = ">9.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTT");
            seq.DisplayID = "7";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTAAA");
            seq.DisplayID = "8";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAAAAA");
            seq.DisplayID = "9";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTAG");
            seq.DisplayID = "10";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTAGC");
            seq.DisplayID = "11";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG");
            seq.DisplayID = "12";
            sequences.Add(seq);

            KmerLength = kmerLength;
            SequenceReads.Clear();
            AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLength);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            ContigMatePairs overlap;
            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator();

            dist.CalculateDistance(overlap);
            Graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(Graph, overlap, kmerLength, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath  scaffold = paths.First();
            DeBruijnGraph graph    = Graph;

            Assert.IsTrue(graph.GetNodeSequence(scaffold[0].Key).ToString().Equals("ATGCCTCCTATCTTAGC"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[1].Key).ToString().Equals("TTAGCGCG"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[2].Key).ToString().Equals("GCGCGC"));
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Builds scaffolds from list of reads and contigs
        /// </summary>
        /// <param name="reads">List of reads</param>
        /// <param name="contigs">List of contigs</param>
        /// <param name="kmerLength">Kmer Length</param>
        /// <param name="depth">Depth for graph traversal</param>
        /// <param name="redundancy">Number of mate pairs required to create a link between two contigs.
        ///  Hierarchical Scaffolding With Bambus
        ///  by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg
        ///  Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param>
        /// <returns>List of scaffold sequences</returns>
        public IList <ISequence> BuildScaffold(
            IList <ISequence> reads,
            IList <ISequence> contigs,
            int kmerLength,
            int depth      = 10,
            int redundancy = 2)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            if (null == reads)
            {
                throw new ArgumentNullException("reads");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            if (redundancy < 0)
            {
                throw new ArgumentException(Resource.NegativeRedundancy);
            }

            _depth      = depth;
            _redundancy = redundancy;
            _kmerLength = kmerLength;


            IList <ISequence> readSeqs = reads.AsParallel().Where(s => s.All <ISequenceItem>(c => !c.IsAmbiguous && !c.IsGap)).ToList();

            //Step1: Generate contig overlap graph.
            DeBruijnGraph contigGraph        = GenerateContigOverlapGraph(contigs);
            IEnumerable <DeBruijnNode> nodes = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0);

            foreach (DeBruijnNode node in nodes)
            {
                contigs.Remove(contigGraph.GetNodeSequence(node));
            }

            // Step2: Map Reads to contigs.
            ReadContigMap readContigMap = ReadContigMap(contigs, readSeqs);

            contigs = null;

            // Step3: Generate Contig Mate Pair Map.
            ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMap, readSeqs);

            readContigMap = null;

            // Step4: Filter Paired Reads.
            contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs);

            // Step5: Distance Calculation.
            CalculateDistanceBetweenContigs(contigMatePairs);

            // Step6: Trace Scaffold Paths.
            IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs);

            contigMatePairs = null;

            // Step7: Assemble paths.
            PathPurger(paths);

            // Step8: Generate sequence of scaffolds.
            return(GenerateScaffold(contigGraph, paths));
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Checks if the sequence in the node is a palindrome.
        /// A sequence is palindrome if it is same as its reverse complement.
        /// Reference: http://en.wikipedia.org/wiki/Palindromic_sequence
        /// </summary>
        /// <param name="node">DeBruijn graph node</param>
        /// <returns>Boolean indicating if node represents palidromic sequence</returns>
        private bool IsPalindrome(DeBruijnNode node)
        {
            ISequence seq = _graph.GetNodeSequence(node);

            return(string.CompareOrdinal(seq.ToString(), seq.ReverseComplement.ToString()) == 0);
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Add left extension of the nodes to queue.
        /// </summary>
        /// <param name="node">Current node.</param>
        /// <param name="search">Queue for BFS.</param>
        /// <param name="paths">List of paths</param>
        /// <param name="familyTree">nodes visited for construction of paths.</param>
        /// <param name="contigPairedReadMap">contig and valid mate pair map.</param>
        private void LeftExtension(
            KeyValuePair <DeBruijnNode, DeBruijnEdge> node,
            Queue <Paths> search,
            List <Paths> paths,
            ScaffoldPath familyTree,
            Dictionary <ISequence, IList <ValidMatePair> > contigPairedReadMap)
        {
            Paths childPath;

            if (node.Key.LeftExtensionNodes.Count > 0)
            {
                foreach (KeyValuePair <DeBruijnNode, DeBruijnEdge> child in node.Key.LeftExtensionNodes)
                {
                    childPath             = new Paths();
                    childPath.CurrentNode = child;
                    if (familyTree == null)
                    {
                        childPath.FamilyTree.Add(node);
                    }
                    else
                    {
                        childPath.FamilyTree.AddRange(familyTree);
                        childPath.FamilyTree.Add(node);
                    }

                    childPath.NodeOrientation = false;
                    if (DistanceConstraint(childPath, contigPairedReadMap) &&
                        childPath.FamilyTree.Count < _depth &&
                        !contigPairedReadMap.All(
                            t => childPath.FamilyTree.Any(k => t.Key == _graph.GetNodeSequence(k.Key))))
                    {
                        search.Enqueue(childPath);
                    }
                    else
                    {
                        if (contigPairedReadMap.All(
                                t => childPath.FamilyTree.Any(k => t.Key == _graph.GetNodeSequence(k.Key))))
                        {
                            paths.Add(childPath);
                        }
                    }
                }
            }
            else
            {
                childPath = new Paths();
                if (familyTree == null)
                {
                    childPath.FamilyTree.Add(node);
                }
                else
                {
                    childPath.FamilyTree.AddRange(familyTree);
                    childPath.FamilyTree.Add(node);
                }

                if (contigPairedReadMap.All(
                        t => childPath.FamilyTree.Any(k => t.Key == _graph.GetNodeSequence(k.Key))))
                {
                    paths.Add(childPath);
                }
            }
        }