Ejemplo n.º 1
0
 /// <summary>
 /// Step 4: Remove redundant paths from graph.
 /// </summary>
 protected void RemoveRedundancy()
 {
     if (RedundantPathsPurger != null)
     {
         DeBruijnPathList redundantNodes;
         do
         {
             redundantNodes = RedundantPathsPurger.DetectErroneousNodes(Graph);
             RedundantPathsPurger.RemoveErroneousNodes(Graph, redundantNodes);
         }while (redundantNodes.Paths.Count > 0);
     }
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Initializes the above defined fields. For each step in assembly
        /// we use a separate class for implementation. This method assigns
        /// these variables to classes with desired implementation.
        /// </summary>
        protected void InitializeDefaultGraphModifiers()
        {
            // Assign uninitialized fields to default values
            if (DanglingLinksPurger == null)
            {
                DanglingLinksPurger = new DanglingLinksPurger();
            }

            if (RedundantPathsPurger == null)
            {
                RedundantPathsPurger = new RedundantPathsPurger(RedundantPathLengthThreshold);
            }

            if (LowCoverageContigPurger == null)
            {
                LowCoverageContigPurger = new SimplePathContigBuilder();
            }
        }
Ejemplo n.º 3
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst = 5;
            const int dangleThreshold = 3;
            const int redundantThreshold = 6;

            var sequences = new List<ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"},
                new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"},
                new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"},
                new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"},
                new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"},
                new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"},
                new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"},
                new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"},
                new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"},
                new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"},
                new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"},
                new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"},
                new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"},
                new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"},
                new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"},
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"},
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();
            
            SetSequenceReads(sequences);
            CreateGraph();
            
            DanglingLinksThreshold = dangleThreshold;
            DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold);
            
            UnDangleGraph();
            RemoveRedundancy();

            IList<ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper mapper = new ReadContigMapper();

            ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();
            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
            
            ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator(overlap);
            
            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();
            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath path = new TracePath();
            IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Creates RedundantPathPurger instance by passing pathlength and count. Detect 
        /// redundant error nodes and remove these nodes from the graph. Validate the graph.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="isMicroOrganism">Is micro organism</param>    
        internal void ValidateRedundantPathPurgerCtor(string nodeName, bool isMicroOrganism)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string expectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.ExpectedNodesCountAfterDangling);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Validate the graph
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;
                this.DanglingLinksPurger = new DanglingLinksPurger(this.KmerLength);
                this.UnDangleGraph();

                // Create RedundantPathPurger instance, detect redundant nodes and remove error nodes
                RedundantPathsPurger redundantPathPurger =
                  new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1);
                DeBruijnPathList redundantnodelist = redundantPathPurger.DetectErroneousNodes(graph);
                redundantPathPurger.RemoveErroneousNodes(graph, redundantnodelist);

                if (isMicroOrganism)
                    Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count());
                else
                    ValidateGraph(graph, nodeName);
            }
            ApplicationLog.WriteLine(@"Padena P1 :RedundantPathsPurger ctor and methods validation for Padena step4 completed successfully");
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Initializes the above defined fields. For each step in assembly
        /// we use a separate class for implementation. This method assigns 
        /// these variables to classes with desired implementation.
        /// </summary>
        protected void InitializeDefaultGraphModifiers()
        {
            // Assign uninitialized fields to default values
            if (DanglingLinksPurger == null)
            {
                DanglingLinksPurger = new DanglingLinksPurger();
            }

            if (RedundantPathsPurger == null)
            {
                RedundantPathsPurger = new RedundantPathsPurger(RedundantPathLengthThreshold);
            }

            if (LowCoverageContigPurger == null)
            {
                LowCoverageContigPurger = new SimplePathContigBuilder();
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Creates RedundantPathPurger instance by passing pathlength and count. Detect 
        /// redundant error nodes and remove these nodes from the graph. Validate the graph.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateRedundantPathPurgerCtor(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
                parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
                sequenceReads = parser.Parse().ToList();
                parser.Close();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Validate the graph
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Create RedundantPathPurger instance, detect redundant nodes and remove error nodes
                RedundantPathsPurger redundantPathPurger =
                    new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1);
                DeBruijnPathList redundantnodelist =
                    redundantPathPurger.DetectErroneousNodes(this.Graph);
                redundantPathPurger.RemoveErroneousNodes(this.Graph, redundantnodelist);

                ValidateGraph(this.Graph, nodeName);


            ApplicationLog.WriteLine(
                @"Padena BVT :RedundantPathsPurger ctor and methods validation for 
                    Padena step4 completed successfully");
        }