Пример #1
0
        /// <summary>
        /// Generate contig overlap graph.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <returns>Contig Graph.</returns>
        protected ContigGraph GenerateContigOverlapGraph(IList <ISequence> contigs)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            ContigGraph contigGraph = new ContigGraph();

            contigGraph.BuildContigGraph(contigs, this.kmerLength);
            return(contigGraph);
        }
Пример #2
0
        /// <summary>
        /// Converts the scaffold path into its sequence.
        /// </summary>
        /// <param name="graph">De Bruijn graph.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>Scaffold Sequence.</returns>
        public ISequence BuildSequenceFromPath(ContigGraph graph, int kmerLength)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            Node startNode          = this[0].Key;
            bool isForwardDirection = this[0].Value.IsSameOrientation;

            startNode.MarkNode();
            List <byte> scaffoldSequence = new List <byte>();

            scaffoldSequence.InsertRange(0, graph.GetNodeSequence(startNode));
            this.RemoveAt(0);

            // There is overlap of (k-1) symbols between adjacent contigs
            if (kmerLength > 1)
            {
                kmerLength--;
            }

            bool      sameOrientation  = true;
            ISequence nextNodeSequence = null;

            foreach (KeyValuePair <Node, Edge> extensions in this)
            {
                sameOrientation  = !(sameOrientation ^ extensions.Value.IsSameOrientation);
                nextNodeSequence = sameOrientation ? graph.GetNodeSequence(extensions.Key) :
                                   graph.GetNodeSequence(extensions.Key).GetReverseComplementedSequence();

                // Extend scaffold sequence using symbols from contig beyond the overlap
                if (isForwardDirection)
                {
                    scaffoldSequence.InsertRange(
                        scaffoldSequence.Count,
                        nextNodeSequence.GetSubSequence(kmerLength, nextNodeSequence.Count - kmerLength));
                }
                else
                {
                    scaffoldSequence.InsertRange(0, nextNodeSequence.GetSubSequence(0, nextNodeSequence.Count - kmerLength));
                }

                extensions.Key.MarkNode();
            }

            if (nextNodeSequence == null)
            {
                return(null);
            }

            return(new Sequence(nextNodeSequence.Alphabet, scaffoldSequence.ToArray(), false));
        }
Пример #3
0
        /// <summary>
        /// Performs Breadth First Search to traverse through graph to generate scaffold paths.
        /// </summary>
        /// <param name="overlapGraph">Contig Overlap Graph.</param>
        /// <param name="contigPairedReadMaps">InterContig Distances.</param>
        /// <param name="lengthOfKmer">Length of Kmer.</param>
        /// <param name="searchDepth">Depth to which graph is searched.</param>
        /// <returns>List of paths/scaffold.</returns>
        public IList <ScaffoldPath> FindPaths(
            ContigGraph overlapGraph,
            ContigMatePairs contigPairedReadMaps,
            int lengthOfKmer,
            int searchDepth = 10)
        {
            if (overlapGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            if (contigPairedReadMaps == null)
            {
                throw new ArgumentNullException("contigPairedReadMaps");
            }

            if (lengthOfKmer <= 0)
            {
                throw new ArgumentException(Resource.KmerLength);
            }

            if (searchDepth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            this.graph      = overlapGraph;
            this.kmerLength = lengthOfKmer;
            this.depth      = searchDepth;

            List <ScaffoldPath> scaffoldPaths = new List <ScaffoldPath>();

            Parallel.ForEach(
                overlapGraph.Nodes,
                (Node node) =>
            {
                Dictionary <ISequence, IList <ValidMatePair> > contigPairedReadMap;
                if (contigPairedReadMaps.TryGetValue(overlapGraph.GetNodeSequence(node), out contigPairedReadMap))
                {
                    List <ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap);
                    lock (scaffoldPaths)
                    {
                        scaffoldPaths.AddRange(scaffoldPath);
                    }
                }
            });

            return(scaffoldPaths);
        }
Пример #4
0
        /// <summary>
        /// Generate sequences from list of contig nodes.
        /// </summary>
        /// <param name="contigGraph">Contig Overlap Graph.</param>
        /// <param name="paths">Scaffold paths.</param>
        /// <returns>List of sequences of scaffolds.</returns>
        protected IList <ISequence> GenerateScaffold(
            ContigGraph contigGraph,
            IList <ScaffoldPath> paths)
        {
            if (contigGraph == null)
            {
                throw new ArgumentNullException("contigGraph");
            }

            if (paths == null)
            {
                throw new ArgumentNullException("paths");
            }

            List <ISequence>   scaffolds    = paths.AsParallel().Select(t => t.BuildSequenceFromPath(contigGraph, this.kmerLength)).ToList();
            IEnumerable <Node> visitedNodes = contigGraph.Nodes.AsParallel().Where(t => !t.IsMarked());

            scaffolds.AddRange(visitedNodes.AsParallel().Select(t => contigGraph.GetNodeSequence(t)));
            contigGraph.Dispose();
            return(scaffolds);
        }
Пример #5
0
 /// <summary>
 /// Performs Breadth First Search in contig overlap graph.
 /// </summary>
 /// <param name="contigGraph">Contig Graph.</param>
 /// <param name="contigMatePairs">Contig Mate Pair map.</param>
 /// <returns>List of Scaffold Paths.</returns>
 protected IList <ScaffoldPath> TracePath(ContigGraph contigGraph, ContigMatePairs contigMatePairs)
 {
     return(this.tracePath.FindPaths(contigGraph, contigMatePairs, this.kmerLength, this.depthField));
 }
Пример #6
0
        /// <summary>
        /// Builds scaffolds from list of reads and contigs.
        /// </summary>
        /// <param name="reads">List of reads.</param>
        /// <param name="contigs">List of contigs.</param>
        /// <param name="lengthofKmer">Kmer Length.</param>
        /// <param name="depth">Depth for graph traversal.</param>
        /// <param name="redundancy">Number of mate pairs required to create a link between two contigs.
        ///  Hierarchical Scaffolding With Bambus
        ///  by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg
        ///  Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param>
        /// <returns>List of scaffold sequences.</returns>
        public IList <ISequence> BuildScaffold(
            IEnumerable <ISequence> reads,
            IList <ISequence> contigs,
            int lengthofKmer,
            int depth      = 10,
            int redundancy = 2)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            if (null == reads)
            {
                throw new ArgumentNullException("reads");
            }

            if (lengthofKmer <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            if (redundancy < 0)
            {
                throw new ArgumentException(Resource.NegativeRedundancy);
            }

            this.depthField      = depth;
            this.redundancyField = redundancy;
            this.kmerLength      = lengthofKmer;

            IEnumerable <ISequence> readSeqs = ValidateReads(reads);

            //Step1: Generate contig overlap graph.
            IList <ISequence>  contigsList = new List <ISequence>(contigs);
            ContigGraph        contigGraph = GenerateContigOverlapGraph(contigsList);
            IEnumerable <Node> nodes       = contigGraph.Nodes.Where(t => t.ExtensionsCount == 0);

            foreach (Node node in nodes)
            {
                contigsList.Remove(contigGraph.GetNodeSequence(node));
            }

            // Step2: Map Reads to contigs.
            ReadContigMap readContigMaps = ReadContigMap(contigsList, readSeqs);

            contigsList = null;

            // Step3: Generate Contig Mate Pair Map.
            ContigMatePairs contigMatePairs = MapPairedReadsToContigs(readContigMaps, readSeqs);

            readContigMaps = null;

            // Step4: Filter Paired Reads.
            contigMatePairs = FilterReadsBasedOnOrientation(contigMatePairs);

            // Step5: Distance Calculation.
            CalculateDistanceBetweenContigs(contigMatePairs);

            // Step6: Trace Scaffold Paths.
            IList <ScaffoldPath> paths = TracePath(contigGraph, contigMatePairs);

            contigMatePairs = null;

            // Step7: Assemble paths.
            PathPurger(paths);

            // Step8: Generate sequence of scaffolds.
            return(GenerateScaffold(contigGraph, paths));
        }
Пример #7
0
        public void PathPurger1()
        {
            const int         KmerLength      = 7;
            ISequence         sequence        = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG");
            IList <ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList();
            ContigGraph       graph           = new ContigGraph();

            graph.BuildContigGraph(contigsSequence, KmerLength);
            List <Node>          contigs = graph.Nodes.ToList();
            IList <ScaffoldPath> paths   =
                new List <ScaffoldPath>();
            ScaffoldPath path = new ScaffoldPath();

            foreach (Node node in contigs)
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(2, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(3, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(6, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(0, 11))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(7, 4))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(11, 0))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(2, 9))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(1, 10))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            PathPurger assembler = new PathPurger();

            assembler.PurgePath(paths);
            Assert.AreEqual(paths.Count, 1);
            Assert.IsTrue(Compare(paths.First(), contigs));
        }
Пример #8
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst    = 5;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 6;

            var sequences = new List <ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC")
                {
                    ID = "0"
                },
                new Sequence(Alphabets.DNA, "CCTCCTAT")
                {
                    ID = "1"
                },
                new Sequence(Alphabets.DNA, "TCCTATC")
                {
                    ID = "2"
                },
                new Sequence(Alphabets.DNA, "TGCCTCCT")
                {
                    ID = "3"
                },
                new Sequence(Alphabets.DNA, "ATCTTAGC")
                {
                    ID = "4"
                },
                new Sequence(Alphabets.DNA, "CTATCTTAG")
                {
                    ID = "5"
                },
                new Sequence(Alphabets.DNA, "CTTAGCG")
                {
                    ID = "6"
                },
                new Sequence(Alphabets.DNA, "GCCTCCTAT")
                {
                    ID = "7"
                },
                new Sequence(Alphabets.DNA, "TAGCGCGCTA")
                {
                    ID = "8"
                },
                new Sequence(Alphabets.DNA, "AGCGCGC")
                {
                    ID = "9"
                },
                new Sequence(Alphabets.DNA, "TTTTTT")
                {
                    ID = "10"
                },
                new Sequence(Alphabets.DNA, "TTTTTAAA")
                {
                    ID = "11"
                },
                new Sequence(Alphabets.DNA, "TAAAAA")
                {
                    ID = "12"
                },
                new Sequence(Alphabets.DNA, "TTTTAG")
                {
                    ID = "13"
                },
                new Sequence(Alphabets.DNA, "TTTAGC")
                {
                    ID = "14"
                },
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG")
                {
                    ID = "15"
                },
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();

            SetSequenceReads(sequences);
            CreateGraph();

            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);

            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            ContigMatePairs    overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist    = new DistanceCalculator(overlap);

            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();

            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }
Пример #9
0
        public void TracePathTestWithPalindromicContig()
        {
            const int        KmerLengthConst    = 6;
            const int        DangleThreshold    = 3;
            const int        RedundantThreshold = 7;
            List <ISequence> sequences          = new List <ISequence>();
            Sequence         seq = new Sequence(Alphabets.DNA, "ATGCCTC".Select(a => (byte)a).ToArray());

            seq.ID = ">10.x1:abc";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "CCTCCTAT".Select(a => (byte)a).ToArray());
            seq.ID = "1";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TCCTATC".Select(a => (byte)a).ToArray());
            seq.ID = "2";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TGCCTCCT".Select(a => (byte)a).ToArray());
            seq.ID = "3";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "ATCTTAGC".Select(a => (byte)a).ToArray());
            seq.ID = "4";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "CTATCTTAG".Select(a => (byte)a).ToArray());
            seq.ID = "5";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "CTTAGCG".Select(a => (byte)a).ToArray());
            seq.ID = "6";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "GCCTCCTAT".Select(a => (byte)a).ToArray());
            seq.ID = ">8.x1:abc";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TAGCGCGCTA".Select(a => (byte)a).ToArray());
            seq.ID = ">8.y1:abc";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "AGCGCGC".Select(a => (byte)a).ToArray());
            seq.ID = ">9.x1:abc";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTTTT".Select(a => (byte)a).ToArray());
            seq.ID = "7";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTTTAAA".Select(a => (byte)a).ToArray());
            seq.ID = "8";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TAAAAA".Select(a => (byte)a).ToArray());
            seq.ID = "9";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTTAG".Select(a => (byte)a).ToArray());
            seq.ID = "10";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTAGC".Select(a => (byte)a).ToArray());
            seq.ID = "11";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG".Select(a => (byte)a).ToArray());
            seq.ID = "12";
            sequences.Add(seq);

            KmerLength = KmerLengthConst;
            SequenceReads.Clear();
            SetSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = DangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(DangleThreshold);
            RedundantPathLengthThreshold = RedundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(RedundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, KmerLengthConst);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            ContigMatePairs overlap;
            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator(overlap);

            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();

            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, KmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.IsTrue(new string(graph.GetNodeSequence(scaffold[0].Key).Select(a => (char)a).ToArray()).Equals("ATGCCTCCTATCTTAGC"));
            Assert.IsTrue(new string(graph.GetNodeSequence(scaffold[1].Key).Select(a => (char)a).ToArray()).Equals("TTAGCGCG"));
            Assert.IsTrue(new string(graph.GetNodeSequence(scaffold[2].Key).Select(a => (char)a).ToArray()).Equals("GCGCGC"));
        }