/// <summary>
        /// Step 3: Remove dangling links from graph.
        /// </summary>
        protected void UnDangleGraph()
        {
            if (DanglingLinksPurger != null && DanglingLinksThreshold > 0)
            {
                DeBruijnPathList danglingNodes;

                // Observe lengths of dangling links in the graph
                // This is an optimization - instead of incrementing threshold by 1 and
                // running the purger iteratively, we first determine the lengths of the
                // danglings links found in the graph and run purger only for those lengths.
                DanglingLinksPurger.LengthThreshold = DanglingLinksThreshold - 1;

                IEnumerable <int> danglingLengths;
                IGraphEndsEroder  graphEndsEroder = DanglingLinksPurger as IGraphEndsEroder;
                if (graphEndsEroder != null && AllowErosion)
                {
                    // If eroder is implemented, while getting lengths of dangling links,
                    // it also erodes the low coverage ends, this marks any node for deletion below a threshold.

                    // TODO: Verify that this does enumerate all dangling ends, the concern is that if a dangling end of length 7 and 2
                    // arrive at a node which itself would be of dangling node of length 2 without these "dangling ends" then a dangling end of 9
                    // (which it would be without either the 7 or 2 end) might not be reported.
                    danglingLengths = graphEndsEroder.ErodeGraphEnds(Graph, ErosionThreshold);
                }
                else
                {
                    // Perform dangling purger at all incremental values till dangleThreshold.
                    danglingLengths = Enumerable.Range(1, DanglingLinksThreshold - 1);
                }

                // Erosion is to be only once. Reset erode threshold to -1.
                ErosionThreshold = -1;

                // Start removing dangling links
                foreach (int threshold in danglingLengths)
                {
                    if (Graph.NodeCount >= threshold)
                    {
                        DanglingLinksPurger.LengthThreshold = threshold;
                        danglingNodes = DanglingLinksPurger.DetectErroneousNodes(Graph);
                        DanglingLinksPurger.RemoveErroneousNodes(Graph, danglingNodes);
                    }
                }

                // Removing dangling links can in turn create more dangling links
                // In order to remove all links within threshold, we therefore run
                // purger at threshold length until there is no more change in graph.
                do
                {
                    danglingNodes = null;
                    if (Graph.NodeCount >= DanglingLinksThreshold)
                    {
                        DanglingLinksPurger.LengthThreshold = DanglingLinksThreshold;
                        danglingNodes = DanglingLinksPurger.DetectErroneousNodes(Graph);
                        DanglingLinksPurger.RemoveErroneousNodes(Graph, danglingNodes);
                    }
                }while (danglingNodes != null && danglingNodes.Paths.Count > 0);
            }
        }
        /// <summary>
        /// Initializes the above defined fields. For each step in assembly
        /// we use a separate class for implementation. This method assigns
        /// these variables to classes with desired implementation.
        /// </summary>
        protected void InitializeDefaultGraphModifiers()
        {
            // Assign uninitialized fields to default values
            if (DanglingLinksPurger == null)
            {
                DanglingLinksPurger = new DanglingLinksPurger();
            }

            if (RedundantPathsPurger == null)
            {
                RedundantPathsPurger = new RedundantPathsPurger(RedundantPathLengthThreshold);
            }

            if (LowCoverageContigPurger == null)
            {
                LowCoverageContigPurger = new SimplePathContigBuilder();
            }
        }
Beispiel #3
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst = 5;
            const int dangleThreshold = 3;
            const int redundantThreshold = 6;

            var sequences = new List<ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"},
                new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"},
                new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"},
                new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"},
                new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"},
                new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"},
                new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"},
                new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"},
                new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"},
                new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"},
                new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"},
                new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"},
                new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"},
                new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"},
                new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"},
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"},
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();
            
            SetSequenceReads(sequences);
            CreateGraph();
            
            DanglingLinksThreshold = dangleThreshold;
            DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold);
            
            UnDangleGraph();
            RemoveRedundancy();

            IList<ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper mapper = new ReadContigMapper();

            ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();
            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
            
            ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator(overlap);
            
            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();
            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath path = new TracePath();
            IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }
Beispiel #4
0
        /// <summary>
        /// Validate RemoveErrorNodes() method is removing dangling nodes from the graph
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidatePadenaRemoveErrorNodes(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // and remove the dangling links from graph in step3
                // Validate the graph
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;

                // Find the dangling nodes and remove the dangling node
                DanglingLinksPurger danglingLinksPurger =
                  new DanglingLinksPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1);
                DeBruijnPathList danglingnodes = danglingLinksPurger.DetectErroneousNodes(graph);
                danglingLinksPurger.RemoveErroneousNodes(graph, danglingnodes);
                Assert.IsFalse(graph.GetNodes().Contains(danglingnodes.Paths[0].PathNodes[0]));
            }
            ApplicationLog.WriteLine(@"Padena P1 :DeBruijnGraph.RemoveErrorNodes() validation for Padena step3 completed successfully");
        }
        /// <summary>
        /// Initializes the above defined fields. For each step in assembly
        /// we use a separate class for implementation. This method assigns 
        /// these variables to classes with desired implementation.
        /// </summary>
        protected void InitializeDefaultGraphModifiers()
        {
            // Assign uninitialized fields to default values
            if (DanglingLinksPurger == null)
            {
                DanglingLinksPurger = new DanglingLinksPurger();
            }

            if (RedundantPathsPurger == null)
            {
                RedundantPathsPurger = new RedundantPathsPurger(RedundantPathLengthThreshold);
            }

            if (LowCoverageContigPurger == null)
            {
                LowCoverageContigPurger = new SimplePathContigBuilder();
            }
        }
Beispiel #6
0
        /// <summary>
        /// Validate the Padena DetectErrorNodes() method is 
        /// returning dangling nodes as expected 
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidatePadenaDetectErrorNodes(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.KmerLengthNode);
            string danglingSequence = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.DangleNodeSequenceNode);
            string[] expectedDanglings = danglingSequence.Split(',');

            // Get the input reads and build kmers
            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // and remove the dangling links from graph in step3
                // Validate the graph
                this.KmerLength = int.Parse(kmerLength, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();

                // Find the dangling node
                DanglingLinksPurger danglingLinksPurger = new DanglingLinksPurger(int.Parse(kmerLength, null) + 1);
                DeBruijnPathList danglingnodes = danglingLinksPurger.DetectErroneousNodes(this.Graph);
                foreach (DeBruijnPath dbnodes in danglingnodes.Paths)
                {
                    foreach (DeBruijnNode node in dbnodes.PathNodes)
                    {
                        Assert.IsTrue(expectedDanglings.Contains(Graph.GetNodeSequence(node).ToString()));
                    }
                }
            }

            ApplicationLog.WriteLine(
                @"Padena BVT :DeBruijnGraph.DetectErrorNodes() 
                    validation for Padena step3 completed successfully");
        }