public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List<ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"}, new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"}, new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"}, new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"}, new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"}, new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"}, new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"}, new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"}, new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"}, new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"}, new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"}, new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"}, new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"}, new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"}, new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"}, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"}, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList<ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }
/// <summary> /// Validate scaffold paths for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateScaffoldPath(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string[] expectedScaffoldNodes = utilityObj.xmlUtil.GetTextValues(nodeName,Constants.ScaffoldNodes); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean); string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.DepthNode); // Get the input reads and build kmers using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); ContigGraph graph = new ContigGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null)); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(contigpairedReads); dist.CalculateDistance(); graph.BuildContigGraph(contigs.ToList(), this.KmerLength); // Validate ScaffoldPath using BFS. TracePath trace = new TracePath(); IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null)); ScaffoldPath scaffold = paths.First(); foreach (KeyValuePair<Node, Edge> kvp in scaffold) { ISequence seq = graph.GetNodeSequence(kvp.Key); string sequence = seq.ConvertToString(); string reversedSequence = seq.GetReverseComplementedSequence().ConvertToString(); Assert.IsTrue(expectedScaffoldNodes.Contains(sequence) || expectedScaffoldNodes.Contains(reversedSequence), "Failed to find " + sequence + ", or " + reversedSequence); } } ApplicationLog.WriteLine("PADENA P1 : FindPaths() validation for Padena step6:step6 completed successfully"); }
/// <summary> /// Validate Filter contig nodes. /// </summary> /// <param name="nodeName">xml node name used for a differnt testcase.</param> /// <param name="isFirstContig">Is First Contig?</param> internal void ValidateFilterPaired(string nodeName, bool isFirstContig) { string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RedundantThreshold); string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ContigPairedReadsCount); string forwardReadStartPos = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ForwardReadStartPos); string reverseReadStartPos = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ReverseReadStartPos); string reverseComplementStartPos = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RerverseReadReverseCompPos); string[] expectedForwardReadStartPos = forwardReadStartPos.Split(','); string[] expectedReverseReadStartPos = reverseReadStartPos.Split(','); string[] expectedReverseComplementStartPos = reverseComplementStartPos.Split(','); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map( sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); Assert.AreEqual(expectedContigPairedReadsCount, contigpairedReads.Values.Count.ToString((IFormatProvider)null)); Dictionary<ISequence, IList<ValidMatePair>> map = null; IList<ValidMatePair> valid = null; ISequence firstSeq = sortedContigs[0]; ISequence secondSeq = sortedContigs[1]; // Validate Contig paired reads after filtering contig sequences. if (isFirstContig) { map = contigpairedReads[firstSeq]; valid = SortPairedReads(map[secondSeq], sequenceReads); } else { map = contigpairedReads[secondSeq]; valid = SortPairedReads(map[firstSeq], sequenceReads); } for (int index = 0; index < valid.Count; index++) { Assert.IsTrue((expectedForwardReadStartPos[index] == valid[index].ForwardReadStartPosition[0].ToString((IFormatProvider)null) || (expectedForwardReadStartPos[index] == valid[index].ForwardReadStartPosition[1].ToString((IFormatProvider)null)))); if (valid[index].ReverseReadReverseComplementStartPosition.Count > 1) { Assert.IsTrue((expectedReverseReadStartPos[index] == valid[index].ReverseReadReverseComplementStartPosition[0].ToString((IFormatProvider)null) || (expectedReverseReadStartPos[index] == valid[index].ReverseReadReverseComplementStartPosition[1].ToString((IFormatProvider)null)))); } if (valid[index].ReverseReadStartPosition.Count > 1) { Assert.IsTrue((expectedReverseComplementStartPos[index] == valid[index].ReverseReadStartPosition[0].ToString((IFormatProvider)null) || (expectedReverseComplementStartPos[index] == valid[index].ReverseReadStartPosition[1].ToString((IFormatProvider)null)))); } } } ApplicationLog.WriteLine("PADENA P1 : FilterPairedReads() validation for Padena step6:step4 completed successfully"); }
/// <summary> /// Validate FilterPairedRead.FilterPairedRead() by passing graph object /// </summary> /// <param name="nodeName">xml node name used for a differnt testcase.</param> internal void ValidateContigDistance(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RedundantThreshold); string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ContigPairedReadsCount); string distanceBetweenFirstContigs = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DistanceBetweenFirstContig); string distanceBetweenSecondContigs = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DistanceBetweenSecondContig); string firstStandardDeviation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstContigStandardDeviation); string secondStandardDeviation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondContigStandardDeviation); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); // Calculate the distance between contigs. DistanceCalculator calc = new DistanceCalculator(contigpairedReads); calc.CalculateDistance(); Assert.AreEqual(expectedContigPairedReadsCount, contigpairedReads.Values.Count.ToString((IFormatProvider)null)); Dictionary<ISequence, IList<ValidMatePair>> map; IList<ValidMatePair> valid; ISequence firstSeq = sortedContigs[0]; ISequence secondSeq = sortedContigs[1]; if (contigpairedReads.ContainsKey(firstSeq)) { map = contigpairedReads[firstSeq]; } else { map = contigpairedReads[secondSeq]; } if (map.ContainsKey(firstSeq)) { valid = map[firstSeq]; } else { valid = map[secondSeq]; } // Validate distance and standard deviation between contigs. Assert.AreEqual(float.Parse(distanceBetweenFirstContigs, (IFormatProvider)null), valid.First().DistanceBetweenContigs[0]); Assert.AreEqual(float.Parse(distanceBetweenSecondContigs, (IFormatProvider)null), valid.First().DistanceBetweenContigs[1]); Assert.AreEqual(float.Parse(firstStandardDeviation, (IFormatProvider)null), valid.First().StandardDeviation[0]); Assert.AreEqual(float.Parse(secondStandardDeviation, (IFormatProvider)null), valid.First().StandardDeviation[1]); } ApplicationLog.WriteLine("PADENA P1 : DistanceCalculator() validation for Padena step6:step5 completed successfully"); }
/// <summary> /// Validate Assembled paths for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateAssembledPath(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string StdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DepthNode); string[] assembledPath = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencePathNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open(filePath); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); ContigGraph graph = new ContigGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, null), float.Parse(StdDeviation, null)); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(contigpairedReads); dist.CalculateDistance(); graph.BuildContigGraph(contigs.ToList(), this.KmerLength); // Validate ScaffoldPath using BFS. TracePath trace = new TracePath(); IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null)); // Assemble paths. PathPurger pathsAssembler = new PathPurger(); pathsAssembler.PurgePath(paths); // Get sequences from assembled path. IList<ISequence> seqList = paths.Select(temp => temp.BuildSequenceFromPath(graph, Int32.Parse(kmerLength, null))).ToList(); //Validate assembled sequence paths. foreach (string sequence in seqList.Select(t => t.ConvertToString())) { Assert.IsTrue(assembledPath.Contains(sequence), "Failed to locate " + sequence); } ApplicationLog.WriteLine("PADENA P1 : AssemblePath() validation for Padena step6:step7 completed successfully"); }