Example #1
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst = 5;
            const int dangleThreshold = 3;
            const int redundantThreshold = 6;

            var sequences = new List<ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"},
                new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"},
                new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"},
                new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"},
                new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"},
                new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"},
                new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"},
                new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"},
                new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"},
                new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"},
                new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"},
                new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"},
                new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"},
                new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"},
                new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"},
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"},
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();
            
            SetSequenceReads(sequences);
            CreateGraph();
            
            DanglingLinksThreshold = dangleThreshold;
            DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold);
            
            UnDangleGraph();
            RemoveRedundancy();

            IList<ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper mapper = new ReadContigMapper();

            ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();
            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
            
            ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator(overlap);
            
            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();
            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath path = new TracePath();
            IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }
Example #2
0
        /// <summary>
        /// Validate scaffold paths for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateScaffoldPath(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string[] expectedScaffoldNodes = utilityObj.xmlUtil.GetTextValues(nodeName,Constants.ScaffoldNodes);
            string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean);
            string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.DepthNode);

            // Get the input reads and build kmers
            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                ContigGraph graph = new ContigGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();
                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null));

                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                DistanceCalculator dist = new DistanceCalculator(contigpairedReads);
                dist.CalculateDistance();

                graph.BuildContigGraph(contigs.ToList(), this.KmerLength);

                // Validate ScaffoldPath using BFS.
                TracePath trace = new TracePath();
                IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null),
                                                            Int32.Parse(expectedDepth, null));

                ScaffoldPath scaffold = paths.First();

                foreach (KeyValuePair<Node, Edge> kvp in scaffold)
                {
                    ISequence seq = graph.GetNodeSequence(kvp.Key);
                    string sequence = seq.ConvertToString();
                    string reversedSequence = seq.GetReverseComplementedSequence().ConvertToString();

                    Assert.IsTrue(expectedScaffoldNodes.Contains(sequence)
                               || expectedScaffoldNodes.Contains(reversedSequence),
                               "Failed to find " + sequence + ", or " + reversedSequence);
                }
            }

            ApplicationLog.WriteLine("PADENA P1 : FindPaths() validation for Padena step6:step6 completed successfully");
        }
Example #3
0
        /// <summary>
        /// Validate Filter contig nodes.
        /// </summary>
        /// <param name="nodeName">xml node name used for a differnt testcase.</param>
        /// <param name="isFirstContig">Is First Contig?</param>
        internal void ValidateFilterPaired(string nodeName, bool isFirstContig)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RedundantThreshold);
            string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ContigPairedReadsCount);
            string forwardReadStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ForwardReadStartPos);
            string reverseReadStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ReverseReadStartPos);
            string reverseComplementStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RerverseReadReverseCompPos);
            string[] expectedForwardReadStartPos = forwardReadStartPos.Split(',');
            string[] expectedReverseReadStartPos = reverseReadStartPos.Split(',');
            string[] expectedReverseComplementStartPos = reverseComplementStartPos.Split(',');

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                  new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(
                    sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);


                Assert.AreEqual(expectedContigPairedReadsCount,
                  contigpairedReads.Values.Count.ToString((IFormatProvider)null));

                Dictionary<ISequence, IList<ValidMatePair>> map = null;
                IList<ValidMatePair> valid = null;
                ISequence firstSeq = sortedContigs[0];
                ISequence secondSeq = sortedContigs[1];
                // Validate Contig paired reads after filtering contig sequences.
                if (isFirstContig)
                {

                    map = contigpairedReads[firstSeq];
                    valid = SortPairedReads(map[secondSeq], sequenceReads);
                }
                else
                {
                    map = contigpairedReads[secondSeq];
                    valid = SortPairedReads(map[firstSeq], sequenceReads);
                }

                for (int index = 0; index < valid.Count; index++)
                {
                    Assert.IsTrue((expectedForwardReadStartPos[index] ==
                          valid[index].ForwardReadStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedForwardReadStartPos[index] ==
                          valid[index].ForwardReadStartPosition[1].ToString((IFormatProvider)null))));

                    if (valid[index].ReverseReadReverseComplementStartPosition.Count > 1)
                    {
                        Assert.IsTrue((expectedReverseReadStartPos[index] ==
                          valid[index].ReverseReadReverseComplementStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedReverseReadStartPos[index] ==
                          valid[index].ReverseReadReverseComplementStartPosition[1].ToString((IFormatProvider)null))));
                    }

                    if (valid[index].ReverseReadStartPosition.Count > 1)
                    {
                        Assert.IsTrue((expectedReverseComplementStartPos[index] ==
                          valid[index].ReverseReadStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedReverseComplementStartPos[index] ==
                          valid[index].ReverseReadStartPosition[1].ToString((IFormatProvider)null))));
                    }
                }
            }
            ApplicationLog.WriteLine("PADENA P1 : FilterPairedReads() validation for Padena step6:step4 completed successfully");
        }
Example #4
0
        /// <summary>
        /// Validate FilterPairedRead.FilterPairedRead() by passing graph object
        /// </summary>
        /// <param name="nodeName">xml node name used for a differnt testcase.</param>
        internal void ValidateContigDistance(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RedundantThreshold);
            string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ContigPairedReadsCount);
            string distanceBetweenFirstContigs = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DistanceBetweenFirstContig);
            string distanceBetweenSecondContigs = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DistanceBetweenSecondContig);
            string firstStandardDeviation = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FirstContigStandardDeviation);
            string secondStandardDeviation = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.SecondContigStandardDeviation);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null);
                this.DanglingLinksPurger =
                  new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                  new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                // Calculate the distance between contigs.
                DistanceCalculator calc = new DistanceCalculator(contigpairedReads);
                calc.CalculateDistance();
                Assert.AreEqual(expectedContigPairedReadsCount,
                    contigpairedReads.Values.Count.ToString((IFormatProvider)null));

                Dictionary<ISequence, IList<ValidMatePair>> map;
                IList<ValidMatePair> valid;
                ISequence firstSeq = sortedContigs[0];
                ISequence secondSeq = sortedContigs[1];
                if (contigpairedReads.ContainsKey(firstSeq))
                {
                    map = contigpairedReads[firstSeq];
                }
                else
                {
                    map = contigpairedReads[secondSeq];
                }

                if (map.ContainsKey(firstSeq))
                {
                    valid = map[firstSeq];
                }
                else
                {
                    valid = map[secondSeq];
                }

                // Validate distance and standard deviation between contigs.
                Assert.AreEqual(float.Parse(distanceBetweenFirstContigs, (IFormatProvider)null),
                  valid.First().DistanceBetweenContigs[0]);
                Assert.AreEqual(float.Parse(distanceBetweenSecondContigs, (IFormatProvider)null),
                  valid.First().DistanceBetweenContigs[1]);
                Assert.AreEqual(float.Parse(firstStandardDeviation, (IFormatProvider)null),
                  valid.First().StandardDeviation[0]);
                Assert.AreEqual(float.Parse(secondStandardDeviation, (IFormatProvider)null),
                  valid.First().StandardDeviation[1]);
            }

            ApplicationLog.WriteLine("PADENA P1 : DistanceCalculator() validation for Padena step6:step5 completed successfully");
        }
Example #5
0
        /// <summary>
        /// Validate Assembled paths for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateAssembledPath(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName);
            string StdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean);
            string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DepthNode);
            string[] assembledPath = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencePathNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads

                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                ContigGraph graph = new ContigGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, null), float.Parse(StdDeviation, null));
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                DistanceCalculator dist = new DistanceCalculator(contigpairedReads);
                dist.CalculateDistance();
                graph.BuildContigGraph(contigs.ToList(), this.KmerLength);

                // Validate ScaffoldPath using BFS.
                TracePath trace = new TracePath();
                IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null),
                                                            Int32.Parse(expectedDepth, null));

                // Assemble paths.
                PathPurger pathsAssembler = new PathPurger();
                pathsAssembler.PurgePath(paths);

                // Get sequences from assembled path.
                IList<ISequence> seqList = paths.Select(temp => temp.BuildSequenceFromPath(graph, Int32.Parse(kmerLength, null))).ToList();

                //Validate assembled sequence paths.
                foreach (string sequence in seqList.Select(t => t.ConvertToString()))
                {
                    Assert.IsTrue(assembledPath.Contains(sequence), "Failed to locate " + sequence);
                }


            ApplicationLog.WriteLine("PADENA P1 : AssemblePath() validation for Padena step6:step7 completed successfully");
        }