Example #1
0
        /// <summary>
        /// Validate scaffold paths for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateScaffoldPath(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string[] expectedScaffoldNodes = utilityObj.xmlUtil.GetTextValues(nodeName,Constants.ScaffoldNodes);
            string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean);
            string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.DepthNode);

            // Get the input reads and build kmers
            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                ContigGraph graph = new ContigGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();
                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null));

                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                DistanceCalculator dist = new DistanceCalculator(contigpairedReads);
                dist.CalculateDistance();

                graph.BuildContigGraph(contigs.ToList(), this.KmerLength);

                // Validate ScaffoldPath using BFS.
                TracePath trace = new TracePath();
                IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null),
                                                            Int32.Parse(expectedDepth, null));

                ScaffoldPath scaffold = paths.First();

                foreach (KeyValuePair<Node, Edge> kvp in scaffold)
                {
                    ISequence seq = graph.GetNodeSequence(kvp.Key);
                    string sequence = seq.ConvertToString();
                    string reversedSequence = seq.GetReverseComplementedSequence().ConvertToString();

                    Assert.IsTrue(expectedScaffoldNodes.Contains(sequence)
                               || expectedScaffoldNodes.Contains(reversedSequence),
                               "Failed to find " + sequence + ", or " + reversedSequence);
                }
            }

            ApplicationLog.WriteLine("PADENA P1 : FindPaths() validation for Padena step6:step6 completed successfully");
        }
Example #2
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst = 5;
            const int dangleThreshold = 3;
            const int redundantThreshold = 6;

            var sequences = new List<ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"},
                new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"},
                new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"},
                new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"},
                new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"},
                new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"},
                new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"},
                new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"},
                new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"},
                new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"},
                new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"},
                new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"},
                new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"},
                new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"},
                new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"},
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"},
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();
            
            SetSequenceReads(sequences);
            CreateGraph();
            
            DanglingLinksThreshold = dangleThreshold;
            DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold);
            
            UnDangleGraph();
            RemoveRedundancy();

            IList<ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper mapper = new ReadContigMapper();

            ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();
            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
            
            ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator(overlap);
            
            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();
            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath path = new TracePath();
            IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }
Example #3
0
        /// <summary>
        /// Validate FilterPairedRead.FilterPairedRead() by passing graph object
        /// </summary>
        /// <param name="nodeName">xml node name used for a differnt testcase.</param>
        internal void ValidateContigDistance(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RedundantThreshold);
            string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ContigPairedReadsCount);
            string distanceBetweenFirstContigs = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DistanceBetweenFirstContig);
            string distanceBetweenSecondContigs = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DistanceBetweenSecondContig);
            string firstStandardDeviation = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FirstContigStandardDeviation);
            string secondStandardDeviation = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.SecondContigStandardDeviation);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null);
                this.DanglingLinksPurger =
                  new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                  new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                // Calculate the distance between contigs.
                DistanceCalculator calc = new DistanceCalculator(contigpairedReads);
                calc.CalculateDistance();
                Assert.AreEqual(expectedContigPairedReadsCount,
                    contigpairedReads.Values.Count.ToString((IFormatProvider)null));

                Dictionary<ISequence, IList<ValidMatePair>> map;
                IList<ValidMatePair> valid;
                ISequence firstSeq = sortedContigs[0];
                ISequence secondSeq = sortedContigs[1];
                if (contigpairedReads.ContainsKey(firstSeq))
                {
                    map = contigpairedReads[firstSeq];
                }
                else
                {
                    map = contigpairedReads[secondSeq];
                }

                if (map.ContainsKey(firstSeq))
                {
                    valid = map[firstSeq];
                }
                else
                {
                    valid = map[secondSeq];
                }

                // Validate distance and standard deviation between contigs.
                Assert.AreEqual(float.Parse(distanceBetweenFirstContigs, (IFormatProvider)null),
                  valid.First().DistanceBetweenContigs[0]);
                Assert.AreEqual(float.Parse(distanceBetweenSecondContigs, (IFormatProvider)null),
                  valid.First().DistanceBetweenContigs[1]);
                Assert.AreEqual(float.Parse(firstStandardDeviation, (IFormatProvider)null),
                  valid.First().StandardDeviation[0]);
                Assert.AreEqual(float.Parse(secondStandardDeviation, (IFormatProvider)null),
                  valid.First().StandardDeviation[1]);
            }

            ApplicationLog.WriteLine("PADENA P1 : DistanceCalculator() validation for Padena step6:step5 completed successfully");
        }
Example #4
0
        /// <summary>
        /// Validate Filter contig nodes.
        /// </summary>
        /// <param name="nodeName">xml node name used for a differnt testcase.</param>
        /// <param name="isFirstContig">Is First Contig?</param>
        internal void ValidateFilterPaired(string nodeName, bool isFirstContig)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RedundantThreshold);
            string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ContigPairedReadsCount);
            string forwardReadStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ForwardReadStartPos);
            string reverseReadStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ReverseReadStartPos);
            string reverseComplementStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RerverseReadReverseCompPos);
            string[] expectedForwardReadStartPos = forwardReadStartPos.Split(',');
            string[] expectedReverseReadStartPos = reverseReadStartPos.Split(',');
            string[] expectedReverseComplementStartPos = reverseComplementStartPos.Split(',');

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                  new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(
                    sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);


                Assert.AreEqual(expectedContigPairedReadsCount,
                  contigpairedReads.Values.Count.ToString((IFormatProvider)null));

                Dictionary<ISequence, IList<ValidMatePair>> map = null;
                IList<ValidMatePair> valid = null;
                ISequence firstSeq = sortedContigs[0];
                ISequence secondSeq = sortedContigs[1];
                // Validate Contig paired reads after filtering contig sequences.
                if (isFirstContig)
                {

                    map = contigpairedReads[firstSeq];
                    valid = SortPairedReads(map[secondSeq], sequenceReads);
                }
                else
                {
                    map = contigpairedReads[secondSeq];
                    valid = SortPairedReads(map[firstSeq], sequenceReads);
                }

                for (int index = 0; index < valid.Count; index++)
                {
                    Assert.IsTrue((expectedForwardReadStartPos[index] ==
                          valid[index].ForwardReadStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedForwardReadStartPos[index] ==
                          valid[index].ForwardReadStartPosition[1].ToString((IFormatProvider)null))));

                    if (valid[index].ReverseReadReverseComplementStartPosition.Count > 1)
                    {
                        Assert.IsTrue((expectedReverseReadStartPos[index] ==
                          valid[index].ReverseReadReverseComplementStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedReverseReadStartPos[index] ==
                          valid[index].ReverseReadReverseComplementStartPosition[1].ToString((IFormatProvider)null))));
                    }

                    if (valid[index].ReverseReadStartPosition.Count > 1)
                    {
                        Assert.IsTrue((expectedReverseComplementStartPos[index] ==
                          valid[index].ReverseReadStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedReverseComplementStartPos[index] ==
                          valid[index].ReverseReadStartPosition[1].ToString((IFormatProvider)null))));
                    }
                }
            }
            ApplicationLog.WriteLine("PADENA P1 : FilterPairedReads() validation for Padena step6:step4 completed successfully");
        }
Example #5
0
        /// <summary>
        /// Validate building map reads to contigs.
        /// </summary>
        /// <param name="nodeName">xml node name used for a different testcases</param>
        /// <param name="isFullOverlap">True if full overlap else false</param>
        internal void ValidateMapReadsToContig(string nodeName, bool isFullOverlap)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string readMapLengthString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadMapLength);
            string readStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadStartPos);
            string contigStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigStartPos);

            string[] expectedReadmapLength = readMapLengthString.Split(',');
            string[] expectedReadStartPos = readStartPosString.Split(',');
            string[] expectedContigStartPos = contigStartPosString.Split(',');

            // Get the input reads and build kmers
            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();

                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();
                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                Assert.AreEqual(maps.Count, sequenceReads.Count());

                Dictionary<ISequence, IList<ReadMap>> readMaps = maps[sequenceReads.ToList()[0].ID];

                for (int i = 0; i < SortContigsData(readMaps.Keys.ToList()).Count; i++)
                {
                    IList<ReadMap> readMap = readMaps[SortContigsData(readMaps.Keys.ToList())[i]];

                    if (isFullOverlap)
                    {
                        Assert.AreEqual(expectedReadmapLength[i], readMap[0].Length.ToString((IFormatProvider)null), "readMap failed for pos " + i);
                        Assert.AreEqual(expectedContigStartPos[i], readMap[0].StartPositionOfContig.ToString((IFormatProvider)null), "contigStart failed for pos " + i);
                        Assert.AreEqual(expectedReadStartPos[i], readMap[0].StartPositionOfRead.ToString((IFormatProvider)null), "readStart failed for pos " + i);
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap);
                    }
                    else
                    {
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.PartialOverlap);
                        break;
                    }
                }
            }

            ApplicationLog.WriteLine("PADENA P1 :ReadContigMapper.Map() validation for Padena step6:step2 completed successfully");
        }
Example #6
0
        /// <summary>
        /// Validate Assembled paths for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateAssembledPath(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName);
            string StdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean);
            string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DepthNode);
            string[] assembledPath = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencePathNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads

                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                ContigGraph graph = new ContigGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, null), float.Parse(StdDeviation, null));
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                DistanceCalculator dist = new DistanceCalculator(contigpairedReads);
                dist.CalculateDistance();
                graph.BuildContigGraph(contigs.ToList(), this.KmerLength);

                // Validate ScaffoldPath using BFS.
                TracePath trace = new TracePath();
                IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null),
                                                            Int32.Parse(expectedDepth, null));

                // Assemble paths.
                PathPurger pathsAssembler = new PathPurger();
                pathsAssembler.PurgePath(paths);

                // Get sequences from assembled path.
                IList<ISequence> seqList = paths.Select(temp => temp.BuildSequenceFromPath(graph, Int32.Parse(kmerLength, null))).ToList();

                //Validate assembled sequence paths.
                foreach (string sequence in seqList.Select(t => t.ConvertToString()))
                {
                    Assert.IsTrue(assembledPath.Contains(sequence), "Failed to locate " + sequence);
                }


            ApplicationLog.WriteLine("PADENA P1 : AssemblePath() validation for Padena step6:step7 completed successfully");
        }
Example #7
0
        /// <summary>
        /// Validate building map reads to contigs.
        /// </summary>
        /// <param name="nodeName">xml node name used for a different testcases</param>
        /// <param name="IsFullOverlap">True if full overlap else false</param>
        /// //TODO: This test was originally written with hard coded assumptions about the direction of the 
        /// returned reads, currently this test has a hack to "flip" some reads to match these hard coded 
        /// assumptions.  This should be cleaned up.
        internal void ValidateMapReadsToContig(string nodeName, bool IsFullOverlap)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.RedundantThreshold);
            string readMapLengthString = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.ReadMapLength);
            string readStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.ReadStartPos);
            string contigStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.ContigStartPos);
            string[] expectedReadmapLength = readMapLengthString.Split(',');
            string[] expectedReadStartPos = readStartPosString.Split(',');
            string[] expectedContigStartPos = contigStartPosString.Split(',');

            // Get the input reads and build kmerssequences
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
                parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
                sequenceReads = parser.Parse().ToList();
                parser.Close();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); ;
                this.DanglingLinksPurger =
                    new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                    new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();

                //IList<ISequence> contigs = this.BuildContigs().ToList();

                IList<ISequence> listContigs = this.BuildContigs().ToList();
                //Hack to satisfy the assumptions of one test by flipping the read to its reverse complement
                if (nodeName == Constants.MapReadsToContigFullOverlapNode)
                {
                    listContigs[0] = (listContigs[0] as Sequence).GetReverseComplementedSequence();
                }

                IList<ISequence> sortedContigs = SortContigsData(listContigs);
                ReadContigMapper mapper = new ReadContigMapper();
                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);
                Assert.AreEqual(maps.Count, sequenceReads.Count());

                Dictionary<ISequence, IList<ReadMap>> readMaps = maps[sequenceReads.ElementAt(0).ID];
                IList<ReadMap> readMap = null;

                for (int i = 0; i < sortedContigs.Count; i++)
                {
                    readMap = readMaps[sortedContigs[i]];
                    if (IsFullOverlap)
                    {
                        Assert.AreEqual(expectedReadmapLength[i], readMap[0].Length.ToString((IFormatProvider)null), "readMap failed for pos " + i);
                        Assert.AreEqual(expectedContigStartPos[i], readMap[0].StartPositionOfContig.ToString((IFormatProvider)null), "contigStart failed for pos " + i);
                        Assert.AreEqual(expectedReadStartPos[i], readMap[0].StartPositionOfRead.ToString((IFormatProvider)null), "readStart failed for pos " + i);
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap);
                    }
                    else
                    {
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.PartialOverlap);
                        break;
                    }
                }


            ApplicationLog.WriteLine(
                "Padena BVT :ReadContigMapper.Map() validation for Padena step6:step2 completed successfully");
        }