/// <summary> /// Validate Assembled paths for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateAssembledPath(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DepthNode); string expectedScaffoldPathCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScaffoldPathCount); string[] assembledPath = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencePathNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); ; this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); ContigGraph graph = new ContigGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> listContigs = contigs.ToList(); IList<ISequence> sortedContigs = SortContigsData(listContigs); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null)); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(contigpairedReads); dist.CalculateDistance(); graph.BuildContigGraph(contigs.ToList(), this.KmerLength); // Validate ScaffoldPath using BFS. TracePath trace = new TracePath(); IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null)); Assert.AreEqual(expectedScaffoldPathCount, paths.Count.ToString((IFormatProvider)null)); // Assemble paths. PathPurger pathsAssembler = new PathPurger(); pathsAssembler.PurgePath(paths); // Get sequences from assembled path. IList<ISequence> seqList = paths.Select(temp => temp.BuildSequenceFromPath(graph, Int32.Parse(kmerLength, null))).ToList(); // Validate assembled sequence paths. foreach (string sequence in seqList.Select(t => t.ConvertToString())) { Assert.IsTrue (assembledPath.Contains (sequence), "Could not locate " + sequence); } ApplicationLog.WriteLine( "Padena BVT : AssemblePath() validation for Padena step6:step7 completed successfully"); }
/// <summary> /// Validate scaffold paths for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateScaffoldPath(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string[] expectedScaffoldNodes = utilityObj.xmlUtil.GetTextValues(nodeName,Constants.ScaffoldNodes); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean); string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.DepthNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open(filePath); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); ContigGraph graph = new ContigGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null)); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(contigpairedReads); dist.CalculateDistance(); graph.BuildContigGraph(contigs.ToList(), this.KmerLength); // Validate ScaffoldPath using BFS. TracePath trace = new TracePath(); IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null)); ScaffoldPath scaffold = paths.First(); foreach (KeyValuePair<Node, Edge> kvp in scaffold) { ISequence seq = graph.GetNodeSequence(kvp.Key); string sequence = seq.ConvertToString(); string reversedSequence = seq.GetReverseComplementedSequence().ConvertToString(); Assert.IsTrue(expectedScaffoldNodes.Contains(sequence) || expectedScaffoldNodes.Contains(reversedSequence), "Failed to find " + sequence + ", or " + reversedSequence); } ApplicationLog.WriteLine("PADENA P1 : FindPaths() validation for Padena step6:step6 completed successfully"); }
/// <summary> /// Validate Parallel Denovo Assembly Assembled sequences. /// </summary> /// <param name="nodeName">XML node used to validate different test scenarios</param> internal void ValidatePadenaAssembledSeqs(string nodeName) { // Get values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RedundantThreshold); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); string assembledSequences = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequencePathNode); string assembledSeqCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AssembledSeqCountNode); string[] updatedAssembledSeqs = assembledSequences.Split(','); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Create a ParallelDeNovoAssembler instance. ParallelDeNovoAssembler denovoObj = null; try { denovoObj = new ParallelDeNovoAssembler(); denovoObj.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null); denovoObj.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); denovoObj.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, (IFormatProvider)null), float.Parse(stdDeviation, (IFormatProvider)null)); byte[] symbols = sequenceReads.ElementAt(0).Alphabet.GetSymbolValueMap(); IDeNovoAssembly assembly = denovoObj.Assemble(sequenceReads.Select(a => new Sequence(Alphabets.DNA, a.Select(b => symbols[b]).ToArray()) { ID = a.ID }), true); IList<ISequence> assembledSequenceList = assembly.AssembledSequences.ToList(); // Validate assembled sequences. Assert.AreEqual(assembledSeqCount, assembledSequenceList.Count.ToString((IFormatProvider)null)); for (int i = 0; i < assembledSequenceList.Count; i++) { Assert.IsTrue(assembledSequences.Contains( new string(assembledSequenceList[i].Select(a => (char)a).ToArray())) || updatedAssembledSeqs.Contains( new string(assembledSequenceList[i].GetReverseComplementedSequence().Select(a => (char)a).ToArray()))); } } finally { if (denovoObj != null) denovoObj.Dispose(); } ApplicationLog.WriteLine("Padena P1 : Assemble() validation for Padena step6:step7 completed successfully"); }
/// <summary> /// Validate Add library information in existing libraries. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="IsLibraryInfo">Is library info?</param> internal void AddLibraryInformation(string nodeName, bool IsLibraryInfo) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedPairedReadsCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadsCountNode); string[] backwardReadsNode = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.BackwardReadsNode); string[] forwardReadsNode = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ForwardReadsNode); string expectedLibraray = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string expectedStdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); IList<ISequence> sequenceReads = new List<ISequence>(); IList<MatePair> pairedreads = new List<MatePair>(); // Get the input reads IEnumerable<ISequence> sequences = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequences = parser.Parse().ToList(); parser.Close (); foreach (ISequence seq in sequences) { sequenceReads.Add(seq); } // Add a new library infomration. if (IsLibraryInfo) { CloneLibraryInformation libraryInfo = new CloneLibraryInformation(); libraryInfo.LibraryName = expectedLibraray; libraryInfo.MeanLengthOfInsert = float.Parse(mean, (IFormatProvider)null); libraryInfo.StandardDeviationOfInsert = float.Parse(expectedStdDeviation, (IFormatProvider)null); CloneLibrary.Instance.AddLibrary(libraryInfo); } else { CloneLibrary.Instance.AddLibrary(expectedLibraray, float.Parse(mean, (IFormatProvider)null), float.Parse(expectedStdDeviation, (IFormatProvider)null)); } // Convert reads to map paired reads. MatePairMapper pair = new MatePairMapper(); pairedreads = pair.Map(sequenceReads); // Validate Map paired reads. Assert.AreEqual(expectedPairedReadsCount, pairedreads.Count.ToString((IFormatProvider)null)); for (int index = 0; index < pairedreads.Count; index++) { Assert.IsTrue(forwardReadsNode.Contains(new string(pairedreads[index].GetForwardRead(sequenceReads).Select(a => (char)a).ToArray()))); Assert.IsTrue(backwardReadsNode.Contains(new string(pairedreads[index].GetReverseRead(sequenceReads).Select(a => (char)a).ToArray()))); Assert.AreEqual(expectedStdDeviation, pairedreads[index].StandardDeviationOfLibrary.ToString((IFormatProvider)null)); Assert.AreEqual(expectedLibraray, pairedreads[index].Library.ToString((IFormatProvider)null)); Assert.AreEqual(mean, pairedreads[index].MeanLengthOfLibrary.ToString((IFormatProvider)null)); } ApplicationLog.WriteLine(@"Padena P1 : Map paired reads has been verified successfully"); }
/// <summary> /// Validate Filter contig nodes. /// </summary> /// <param name="nodeName">xml node name used for a differnt testcase.</param> /// <param name="isFirstContig">Is First Contig?</param> internal void ValidateFilterPaired(string nodeName, bool isFirstContig) { string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RedundantThreshold); string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ContigPairedReadsCount); string forwardReadStartPos = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ForwardReadStartPos); string reverseReadStartPos = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ReverseReadStartPos); string reverseComplementStartPos = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RerverseReadReverseCompPos); string[] expectedForwardReadStartPos = forwardReadStartPos.Split(','); string[] expectedReverseReadStartPos = reverseReadStartPos.Split(','); string[] expectedReverseComplementStartPos = reverseComplementStartPos.Split(','); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map( sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); Assert.AreEqual(expectedContigPairedReadsCount, contigpairedReads.Values.Count.ToString((IFormatProvider)null)); Dictionary<ISequence, IList<ValidMatePair>> map = null; IList<ValidMatePair> valid = null; ISequence firstSeq = sortedContigs[0]; ISequence secondSeq = sortedContigs[1]; // Validate Contig paired reads after filtering contig sequences. if (isFirstContig) { map = contigpairedReads[firstSeq]; valid = SortPairedReads(map[secondSeq], sequenceReads); } else { map = contigpairedReads[secondSeq]; valid = SortPairedReads(map[firstSeq], sequenceReads); } for (int index = 0; index < valid.Count; index++) { Assert.IsTrue((expectedForwardReadStartPos[index] == valid[index].ForwardReadStartPosition[0].ToString((IFormatProvider)null) || (expectedForwardReadStartPos[index] == valid[index].ForwardReadStartPosition[1].ToString((IFormatProvider)null)))); if (valid[index].ReverseReadReverseComplementStartPosition.Count > 1) { Assert.IsTrue((expectedReverseReadStartPos[index] == valid[index].ReverseReadReverseComplementStartPosition[0].ToString((IFormatProvider)null) || (expectedReverseReadStartPos[index] == valid[index].ReverseReadReverseComplementStartPosition[1].ToString((IFormatProvider)null)))); } if (valid[index].ReverseReadStartPosition.Count > 1) { Assert.IsTrue((expectedReverseComplementStartPos[index] == valid[index].ReverseReadStartPosition[0].ToString((IFormatProvider)null) || (expectedReverseComplementStartPos[index] == valid[index].ReverseReadStartPosition[1].ToString((IFormatProvider)null)))); } } ApplicationLog.WriteLine("PADENA P1 : FilterPairedReads() validation for Padena step6:step4 completed successfully"); }
/// <summary> /// Creates RedundantPathPurger instance by passing pathlength and count. Detect /// redundant error nodes and remove these nodes from the graph. Validate the graph. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="isMicroOrganism">Is micro organism</param> internal void ValidateRedundantPathPurgerCtor(string nodeName, bool isMicroOrganism) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedNodesCountAfterDangling); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Validate the graph this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; this.DanglingLinksPurger = new DanglingLinksPurger(this.KmerLength); this.UnDangleGraph(); // Create RedundantPathPurger instance, detect redundant nodes and remove error nodes RedundantPathsPurger redundantPathPurger = new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1); DeBruijnPathList redundantnodelist = redundantPathPurger.DetectErroneousNodes(graph); redundantPathPurger.RemoveErroneousNodes(graph, redundantnodelist); if (isMicroOrganism) Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count()); else ValidateGraph(graph, nodeName); ApplicationLog.WriteLine(@"Padena P1 :RedundantPathsPurger ctor and methods validation for Padena step4 completed successfully"); }
/// <summary> /// Validate the SimpleContigBuilder Build() method using step 4 graph /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="isChromosomeRC">Is Chromosome RC?</param> internal void ValidateSimpleContigBuilderBuild(string nodeName, bool isChromosomeRC) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedContigsString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsNode); string[] expectedContigs = expectedContigsString.Split(','); string expectedContigsCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsCount); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles from graph in step4 this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; this.UnDangleGraph(); this.RemoveRedundancy(); // Validate the SimpleContigBuilder.Build() by passing graph SimplePathContigBuilder builder = new SimplePathContigBuilder(); IList<ISequence> contigs = builder.Build(graph).ToList(); if (isChromosomeRC) { Assert.AreEqual(expectedContigsCount, contigs.Count.ToString((IFormatProvider)null)); } else { // Validate the contigs for (int index = 0; index < contigs.Count; index++) { Assert.IsTrue(expectedContigs.Contains(new string(contigs[index].Select(a => (char)a).ToArray()))); } } ApplicationLog.WriteLine(@"Padena P1 :SimpleContigBuilder.BuildContigs() validation for Padena step5 completed successfully"); }
/// <summary> /// Validate ParallelDeNovothis step1 Build kmers /// </summary> /// <param name="nodeName">xml node for test data</param> /// <param name="isSmallSize">Is file small size?</param> internal void ValidatePadenaBuildKmers(string nodeName, bool isSmallSize) { // Read all the input sequences from xml config file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedKmersCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedKmersCount); // Set kmerLength this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Set all the input reads and execute build kmers this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IEnumerable<KmersOfSequence> lstKmers = (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength); if (isSmallSize) { Assert.AreEqual(expectedKmersCount, lstKmers.Count().ToString((IFormatProvider)null)); } else { ValidateKmersList(new List<KmersOfSequence>(lstKmers), sequenceReads.ToList(), nodeName); } ApplicationLog.WriteLine(@"Padena P1 : Validation of Build with all input reads using ParallelDeNovothis sequence completed successfully"); }
/// <summary> /// Validate graph generated using ParallelDeNovothis.CreateGraph() with kmers /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="isLargeSizeReads">Is large size reads?</param> internal void ValidatePadenaBuildGraph(string nodeName, bool isLargeSizeReads) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedGraphsNodeCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GraphNodesCountNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open(filePath); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); this.KmerLength = int.Parse(kmerLength, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully"); if (isLargeSizeReads) Assert.AreEqual(Int32.Parse(expectedGraphsNodeCount, null), graph.GetNodes().Count()); else ValidateGraph(graph, nodeName); ApplicationLog.WriteLine(@"Padena P1 : ParallelDeNovothis CreateGraph() validation for Padena step2 completed successfully"); }
/// <summary> /// Validate SequenceRangeToKmerBuilder Build() method which build kmers /// </summary> /// <param name="nodeName">xml node name for test data</param> internal void ValidateKmerBuilderBuild(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); // Get the input reads IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser (); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); sequenceReads = parser.Parse().ToList(); parser.Close (); // Pass all the input reads and kmerLength to generate kmers SequenceToKmerBuilder builder = new SequenceToKmerBuilder(); IEnumerable<KmersOfSequence> lstKmers = builder.Build(sequenceReads, int.Parse(kmerLength, (IFormatProvider)null)); // Validate kmers list ValidateKmersList(new List<KmersOfSequence>(lstKmers), new List<ISequence>(sequenceReads), nodeName); ApplicationLog.WriteLine( @"Padena BVT : Validation of Build with all input reads sequence completed successfully"); }
/// <summary> /// Validate SequenceRangeToKmerBuilder Build() which build kmers /// using one base sequence /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateKmerBuilderBuildWithSequence(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); // Get the input reads IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser (); parser.Open (filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Pass each input read and kmerLength // Add all the generated kmers to kmer list SequenceToKmerBuilder builder = new SequenceToKmerBuilder(); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(); foreach (ISequence sequence in sequenceReads) { lstKmers.Add(builder.Build(sequence, int.Parse(kmerLength, (IFormatProvider)null))); } // Validate all the kmers ValidateKmersList(lstKmers, sequenceReads.ToList(), nodeName); ApplicationLog.WriteLine( @"Padena BVT : Validation of Build with each input read sequence completed successfully"); }
/// <summary> /// Validate ParallelDeNovoAssembler step1 Build kmers /// </summary> /// <param name="nodeName">xml node for test data</param> internal void ValidateDe2AssemblerBuildKmers(string nodeName) { // Read all the input sequences from xml config file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // set kmerLength this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser (); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); sequenceReads = parser.Parse().ToList(); parser.Close (); this.SequenceReads.Clear(); // set all the input reads and execute build kmers this.SetSequenceReads(sequenceReads.ToList()); IEnumerable<KmersOfSequence> lstKmers = (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength); ValidateKmersList(new List<KmersOfSequence>(lstKmers), new List<ISequence>(sequenceReads), nodeName); ApplicationLog.WriteLine( @"Padena BVT : Validation of Build with all input reads using ParallelDeNovoAssembler sequence completed successfully"); }
public void ValidateReadContigMap() { // Read all the input sequences from xml config file string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SmallChromosomeReadsNode, Constants.FilePathNode); IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser (); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); sequenceReads = parser.Parse().ToList(); parser.Close (); ReadContigMap map = new ReadContigMap(sequenceReads); Assert.IsNotNull(map); for (int i = 0; i < 10; i++) { Assert.IsTrue(map.ContainsKey(sequenceReads.ElementAt(0).ID)); } }
/// <summary> /// Validate Parallel Denovo Assembly Assembled sequences. /// </summary> /// <param name="nodeName">XML node used to validate different test scenarios</param> /// <param name="isScaffold"></param> /// <param name="enableLowerContigRemoval"></param> /// <param name="allowErosion"></param> internal void ValidatePadenaAssembledSeqs(string nodeName, bool isScaffold, bool enableLowerContigRemoval, bool allowErosion) { // Get values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); string erosionThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ErosionNode); string lowCCThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LowCoverageContigNode); string expectedSequences = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequencePathNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Create a ParallelDeNovoAssembler instance. ParallelDeNovoAssembler assembler = null; try { assembler = new ParallelDeNovoAssembler { KmerLength = Int32.Parse(kmerLength, null), DanglingLinksThreshold = Int32.Parse(daglingThreshold, null), RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null) }; if (enableLowerContigRemoval) { assembler.AllowLowCoverageContigRemoval = enableLowerContigRemoval; assembler.ContigCoverageThreshold = double.Parse(lowCCThreshold, null); } if (allowErosion) { assembler.AllowErosion = true; assembler.ErosionThreshold = Int32.Parse(erosionThreshold, null); } CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, null), float.Parse(stdDeviation, null)); IDeNovoAssembly assembly = assembler.Assemble(sequenceReads.ToList(), isScaffold); IList<ISequence> assembledSequenceList = assembly.AssembledSequences.ToList(); HashSet<string> expected = new HashSet<string>(expectedSequences.Split(',').Select(s => s.Trim())); AlignmentHelpers.CompareSequenceLists(expected, assembledSequenceList); ApplicationLog.WriteLine("Padena BVT : Assemble() validation for Padena step6:step7 completed successfully"); } finally { if (assembler != null) assembler.Dispose(); } }
/// <summary> /// Validate the DeBruijnNode ctor by passing the kmer and validating /// the node object. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeCtor(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string nodeExtensionsCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodeExtensionsCountNode); string kmersCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmersCountNode); string leftNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LeftNodeExtensionsCountNode); string rightNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RightNodeExtensionsCountNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build the kmers using this this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>( (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); node.SetExtensionNode(true, true, rightnode); // Validate DeBruijnNode class properties. Assert.AreEqual(nodeExtensionsCount, node.ExtensionsCount.ToString((IFormatProvider)null)); Assert.AreEqual(kmersCount, node.KmerCount.ToString((IFormatProvider)null)); Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null)); ApplicationLog.WriteLine("Padena P1 : DeBruijnNode ctor() validation for Padena step2 completed successfully"); }
/// <summary> /// Validate ParallelDeNovothis.RemoveRedundancy() which removes bubbles formed in the graph /// and validate the graph /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="defaultThreshold">Is Default Threshold?</param> /// <param name="isMicroorganism">Is micro organsm?</param> internal void ValidatePadenaRemoveRedundancy(string nodeName, bool defaultThreshold, bool isMicroorganism) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedNodesCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedNodesCountRemoveRedundancy); string danglingThreshold = null; string pathlengthThreshold = null; if (!defaultThreshold) { danglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); pathlengthThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.PathLengthThresholdNode); } // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles from graph in step4 // Validate the graph if (!defaultThreshold) { this.DanglingLinksThreshold = int.Parse(danglingThreshold, (IFormatProvider)null); this.DanglingLinksPurger = new DanglingLinksPurger(this.DanglingLinksThreshold); this.RedundantPathLengthThreshold = int.Parse(pathlengthThreshold, (IFormatProvider)null); this.RedundantPathsPurger = new RedundantPathsPurger(this.RedundantPathLengthThreshold); } else { this.DanglingLinksPurger = new DanglingLinksPurger(int.Parse(kmerLength, (IFormatProvider)null)); this.RedundantPathsPurger = new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1); } this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully"); this.UnDangleGraph(); ApplicationLog.WriteLine("Padena P1 : Step3 Completed Successfully"); this.RemoveRedundancy(); ApplicationLog.WriteLine("Padena P1 : Step4 Completed Successfully"); if (isMicroorganism) { Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count().ToString((IFormatProvider)null)); } else { ValidateGraph(graph, nodeName); } ApplicationLog.WriteLine(@"Padena P1 :ParallelDeNovothis.RemoveRedundancy() validation for Padena step4 completed successfully"); }
/// <summary> /// Validate RemoveErrorNodes() method is removing dangling nodes from the graph /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidatePadenaRemoveErrorNodes(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // and remove the dangling links from graph in step3 // Validate the graph this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; // Find the dangling nodes and remove the dangling node DanglingLinksPurger danglingLinksPurger = new DanglingLinksPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1); DeBruijnPathList danglingnodes = danglingLinksPurger.DetectErroneousNodes(graph); danglingLinksPurger.RemoveErroneousNodes(graph, danglingnodes); Assert.IsFalse(graph.GetNodes().Contains(danglingnodes.Paths[0].PathNodes[0])); ApplicationLog.WriteLine(@"Padena P1 :DeBruijnGraph.RemoveErrorNodes() validation for Padena step3 completed successfully"); }
/// <summary> /// Validate the ParallelDeNovothis unDangleGraph() method which removes the dangling link /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="defaultThreshold">Default Threshold</param> /// <param name="smallSizeChromosome">Small size chromosome</param> internal void ValidatePadenaUnDangleGraph(string nodeName, bool defaultThreshold, bool smallSizeChromosome) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesCountAfterDanglingGraphNode); string danglingThreshold = null; if (!defaultThreshold) danglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 this.KmerLength = int.Parse(kmerLength, null); if (!defaultThreshold) { this.DanglingLinksThreshold = int.Parse(danglingThreshold, null); } else { this.DanglingLinksThreshold = int.Parse(kmerLength, null) + 1; } this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully"); this.DanglingLinksPurger = new DanglingLinksPurger(this.DanglingLinksThreshold); this.UnDangleGraph(); ApplicationLog.WriteLine("Padena P1 : Step3 Completed Successfully"); if (smallSizeChromosome) { Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count().ToString((IFormatProvider)null)); } else { ValidateGraph(graph, nodeName); } ApplicationLog.WriteLine(@"Padena P1 :ParallelDeNovothis.UndangleGraph() validation for Padena step3 completed successfully"); }
/// <summary> /// Validate ParallelDeNovothis.BuildContigs() by passing graph object /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> /// <param name="isChromosomeRC">Is chromosome RC?</param> internal void ValidateDe2thisBuildContigs(string nodeName, bool isChromosomeRC) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedContigsString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsNode); string[] expectedContigs; if (!expectedContigsString.ToUpper(CultureInfo.InstalledUICulture).Contains("PADENATESTDATA")) expectedContigs = expectedContigsString.Split(','); else expectedContigs = ReadStringFromFile(expectedContigsString).Replace("\r\n", "").Split(','); string expectedContigsCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsCount); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate the contigs this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.DanglingLinksPurger = new DanglingLinksPurger(this.KmerLength); this.UnDangleGraph(); this.RedundantPathsPurger = new RedundantPathsPurger(this.KmerLength + 1); this.RemoveRedundancy(); this.ContigBuilder = new SimplePathContigBuilder(); IList<ISequence> contigs = this.BuildContigs().ToList(); // Validate contigs count only for Chromosome files. if (isChromosomeRC) { Assert.AreEqual(expectedContigsCount, contigs.Count.ToString((IFormatProvider)null)); } // validate all contigs of a sequence. else { for (int index = 0; index < contigs.Count(); index++) { Assert.IsTrue(expectedContigs.Contains(new string(contigs[index].Select(a => (char)a).ToArray())) || expectedContigs.Contains(new string(contigs[index].GetReverseComplementedSequence().Select(a => (char)a).ToArray()))); } } ApplicationLog.WriteLine(@"Padena P1 :ParallelDeNovothis.BuildContigs() validation for Padena step5 completed successfully"); }
/// <summary> /// Validate KmersOfSequence ctor by passing base sequence reads, kmer length and /// built kmers /// </summary> /// <param name="nodeName">xml node name.</param> internal void ValidateKmersOfSequenceCtorWithBuildKmers(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); SequenceToKmerBuilder builder = new SequenceToKmerBuilder(); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(); // Validate KmersOfSequence ctor using build kmers foreach (ISequence sequence in sequenceReads) { KmersOfSequence kmer = builder.Build(sequence, int.Parse(kmerLength, (IFormatProvider)null)); KmersOfSequence kmerSequence = new KmersOfSequence(sequence, int.Parse(kmerLength, (IFormatProvider)null), kmer.Kmers); lstKmers.Add(kmerSequence); } ValidateKmersList(lstKmers, sequenceReads.ToList(), nodeName); ApplicationLog.WriteLine(@"Padena P1 : KmersOfSequence ctor with build kmers method validation completed successfully"); }
/// <summary> /// Validate library information /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void GetLibraryInformation(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedPairedReadsCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadsCountNode); string expectedLibraray = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string expectedStdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); IList<MatePair> pairedreads = new List<MatePair>(); // Get the input reads IEnumerable<ISequence> sequences = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequences = parser.Parse().ToList(); parser.Close (); // Convert reads to map paired reads. MatePairMapper pair = new MatePairMapper(); pairedreads = pair.Map(new List<ISequence>(sequences)); // Validate Map paired reads. Assert.AreEqual(expectedPairedReadsCount, pairedreads.Count.ToString((IFormatProvider)null)); // Get library infomration and validate CloneLibraryInformation libraryInfo = CloneLibrary.Instance.GetLibraryInformation (pairedreads[0].Library); Assert.AreEqual(expectedStdDeviation, libraryInfo.StandardDeviationOfInsert.ToString((IFormatProvider)null)); Assert.AreEqual(expectedLibraray, libraryInfo.LibraryName.ToString((IFormatProvider)null)); Assert.AreEqual(mean, libraryInfo.MeanLengthOfInsert.ToString((IFormatProvider)null)); ApplicationLog.WriteLine(@"Padena P1 : Map paired reads has been verified successfully"); }
/// <summary> /// Validate KmersOfSequence ctor by passing base sequence reads, kmer length and /// built kmers and validate its properties. /// </summary> /// <param name="nodeName">xml node name.</param> internal void ValidateKmersOfSequenceCtorProperties(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string expectedSeq = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BaseSequenceNode); string expectedKmers = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmersCountNode); // Get the input reads IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); SequenceToKmerBuilder builder = new SequenceToKmerBuilder(); KmersOfSequence kmer = builder.Build(sequenceReads.ToList()[0], int.Parse(kmerLength, (IFormatProvider)null)); KmersOfSequence kmerSequence = new KmersOfSequence(sequenceReads.ToList()[0], int.Parse(kmerLength, (IFormatProvider)null), kmer.Kmers); // Validate KmerOfSequence properties. Assert.AreEqual(expectedSeq, new string(kmerSequence.BaseSequence.Select(a => (char)a).ToArray())); Assert.AreEqual(expectedKmers, kmerSequence.Kmers.Count.ToString((IFormatProvider)null)); ApplicationLog.WriteLine(@"Padena P1 : KmersOfSequence ctor with build kmers method validation completed successfully"); }
/// <summary> /// Validate building map reads to contigs. /// </summary> /// <param name="nodeName">xml node name used for a different testcases</param> /// <param name="isFullOverlap">True if full overlap else false</param> internal void ValidateMapReadsToContig(string nodeName, bool isFullOverlap) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string readMapLengthString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadMapLength); string readStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadStartPos); string contigStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigStartPos); string[] expectedReadmapLength = readMapLengthString.Split(','); string[] expectedReadStartPos = readStartPosString.Split(','); string[] expectedContigStartPos = contigStartPosString.Split(','); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open(filePath); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); Assert.AreEqual(maps.Count, sequenceReads.Count()); Dictionary<ISequence, IList<ReadMap>> readMaps = maps[sequenceReads.ToList()[0].ID]; for (int i = 0; i < SortContigsData(readMaps.Keys.ToList()).Count; i++) { IList<ReadMap> readMap = readMaps[SortContigsData(readMaps.Keys.ToList())[i]]; if (isFullOverlap) { Assert.AreEqual(expectedReadmapLength[i], readMap[0].Length.ToString((IFormatProvider)null), "readMap failed for pos " + i); Assert.AreEqual(expectedContigStartPos[i], readMap[0].StartPositionOfContig.ToString((IFormatProvider)null), "contigStart failed for pos " + i); Assert.AreEqual(expectedReadStartPos[i], readMap[0].StartPositionOfRead.ToString((IFormatProvider)null), "readStart failed for pos " + i); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); } else { Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.PartialOverlap); break; } } ApplicationLog.WriteLine("PADENA P1 :ReadContigMapper.Map() validation for Padena step6:step2 completed successfully"); }
/// <summary> /// Validate KmersOfSequence ToSequences() method which returns kmers sequence /// using its positions /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateKmersOfSequenceToSequences(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string kmerOutputFile = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmersOutputFileNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>( (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Get the array of kmer sequence using ToSequence() int index = 0; // Validate the generated kmer sequence with the expected output using (StreamReader kmerFile = new StreamReader(kmerOutputFile)) { string line = string.Empty; List<string[]> fileContent = new List<string[]>(); while (null != (line = kmerFile.ReadLine())) { fileContent.Add(line.Split(',')); } foreach (ISequence sequenceRead in sequenceReads) { int count = 0; KmersOfSequence kmerSequence = new KmersOfSequence(sequenceRead, int.Parse(kmerLength, (IFormatProvider)null), lstKmers[index].Kmers); IEnumerable<ISequence> sequences = kmerSequence.KmersToSequences(); foreach (ISequence sequence in sequences) { string aab = new string(sequence.Select(a => (char)a).ToArray()); Assert.AreEqual(fileContent[index][count], aab); count++; } index++; } } ApplicationLog.WriteLine(@"Padena P1 : KmersOfSequence ToSequences() method validation completed successfully"); }
/// <summary> /// Validate FilterPairedRead.FilterPairedRead() by passing graph object /// </summary> /// <param name="nodeName">xml node name used for a differnt testcase.</param> internal void ValidateContigDistance(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RedundantThreshold); string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ContigPairedReadsCount); string distanceBetweenFirstContigs = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DistanceBetweenFirstContig); string distanceBetweenSecondContigs = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DistanceBetweenSecondContig); string firstStandardDeviation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstContigStandardDeviation); string secondStandardDeviation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondContigStandardDeviation); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); // Calculate the distance between contigs. DistanceCalculator calc = new DistanceCalculator(contigpairedReads); calc.CalculateDistance(); Assert.AreEqual(expectedContigPairedReadsCount, contigpairedReads.Values.Count.ToString((IFormatProvider)null)); Dictionary<ISequence, IList<ValidMatePair>> map; IList<ValidMatePair> valid; ISequence firstSeq = sortedContigs[0]; ISequence secondSeq = sortedContigs[1]; if (contigpairedReads.ContainsKey(firstSeq)) { map = contigpairedReads[firstSeq]; } else { map = contigpairedReads[secondSeq]; } if (map.ContainsKey(firstSeq)) { valid = map[firstSeq]; } else { valid = map[secondSeq]; } // Validate distance and standard deviation between contigs. Assert.AreEqual(float.Parse(distanceBetweenFirstContigs, (IFormatProvider)null), valid.First().DistanceBetweenContigs[0]); Assert.AreEqual(float.Parse(distanceBetweenSecondContigs, (IFormatProvider)null), valid.First().DistanceBetweenContigs[1]); Assert.AreEqual(float.Parse(firstStandardDeviation, (IFormatProvider)null), valid.First().StandardDeviation[0]); Assert.AreEqual(float.Parse(secondStandardDeviation, (IFormatProvider)null), valid.First().StandardDeviation[1]); ApplicationLog.WriteLine("PADENA P1 : DistanceCalculator() validation for Padena step6:step5 completed successfully"); }
/// <summary> /// Validate Validate DeBruijinGraph properties /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijinGraphproperties(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string ExpectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GraphNodesCountNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); DeBruijnGraph graph = this.Graph; ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully"); // Validate DeBruijnGraph Properties. Assert.AreEqual(ExpectedNodesCount, graph.GetNodes().Count().ToString((IFormatProvider)null)); ApplicationLog.WriteLine(@"Padena P1 : ParallelDeNovothis CreateGraph() validation for Padena step2 completed successfully"); }
/// <summary> /// Validate scaffold sequence for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateScaffoldSequence(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean); string inputRedundancy = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.InputRedundancy); string expectedSeq = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScaffoldSeq); string[] scaffoldSeqNodes = expectedSeq.Split(','); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open(filePath); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); // Build contig. this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); // Find map paired reads. CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null)); IEnumerable<ISequence> scaffoldSeq; using (GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder()) { scaffoldSeq = scaffold.BuildScaffold( sequenceReads, contigs.ToList(), this.KmerLength, redundancy: Int32.Parse(inputRedundancy, null)); } AlignmentHelpers.CompareSequenceLists(new HashSet<string>(scaffoldSeqNodes), scaffoldSeq.ToList()); ApplicationLog.WriteLine("PADENA P1 : Scaffold sequence : validation for Padena step6:step8 completed successfully"); }
/// <summary> /// Validate RemoveExtension() method of DeBruijnNode /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeRemoveExtension(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1 this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>( (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); node.SetExtensionNode(true, true, rightnode); // Validates count before removing right and left extension nodes. Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.RightExtensionNodesCount); Assert.AreEqual(1, node.RightExtensionNodesCount); Assert.AreEqual(1, node.LeftExtensionNodesCount); // Remove right and left extension nodes. node.RemoveExtensionThreadSafe(rightnode); node.RemoveExtensionThreadSafe(leftnode); // Validate node after removing right and left extensions. Assert.AreEqual(0, node.RightExtensionNodesCount); Assert.AreEqual(0, node.LeftExtensionNodesCount); ApplicationLog.WriteLine(@"Padena P1 :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully"); }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isFilePath"></param> /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param> void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList) { ISequence referenceSeq; ISequence querySeq; IList<ISequence> querySeqs = new List<ISequence>(); string referenceSequence; string querySequence; IList<IPairwiseSequenceAlignment> align; if (isFilePath) { // Gets the reference sequence from the configuration file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastAParser parser = new FastAParser(); IEnumerable<ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs.FirstOrDefault(); Assert.IsNotNull(referenceSeq); referenceSequence = referenceSeq.ConvertToString(); parser.Close(); // Gets the reference sequence from the configuration file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastAParser queryParserObj = new FastAParser(); querySeqs = queryParserObj.Parse(queryFilePath).ToList(); querySeq = querySeqs.FirstOrDefault(); Assert.IsNotNull(querySeq); querySequence = querySeq.ConvertToString(); queryParserObj.Close(); } else { // Gets the reference sequence from the configuration file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List<ISequence> {querySeq}; } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mumAlignObj = new Bio.Algorithms.MUMmer.MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; if (isSeqList) { querySeqs.Add(referenceSeq); align = mumAlignObj.Align(querySeqs); } else { align = mumAlignObj.AlignSimple(referenceSeq, querySeqs); } string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null)); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); }
/// <summary> /// Validate building map reads to contigs. /// </summary> /// <param name="nodeName">xml node name used for a different testcases</param> /// <param name="IsFullOverlap">True if full overlap else false</param> /// //TODO: This test was originally written with hard coded assumptions about the direction of the /// returned reads, currently this test has a hack to "flip" some reads to match these hard coded /// assumptions. This should be cleaned up. internal void ValidateMapReadsToContig(string nodeName, bool IsFullOverlap) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string readMapLengthString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadMapLength); string readStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadStartPos); string contigStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigStartPos); string[] expectedReadmapLength = readMapLengthString.Split(','); string[] expectedReadStartPos = readStartPosString.Split(','); string[] expectedContigStartPos = contigStartPosString.Split(','); // Get the input reads and build kmerssequences IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); sequenceReads = parser.Parse().ToList(); parser.Close(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); ; this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); //IList<ISequence> contigs = this.BuildContigs().ToList(); IList<ISequence> listContigs = this.BuildContigs().ToList(); //Hack to satisfy the assumptions of one test by flipping the read to its reverse complement if (nodeName == Constants.MapReadsToContigFullOverlapNode) { listContigs[0] = (listContigs[0] as Sequence).GetReverseComplementedSequence(); } IList<ISequence> sortedContigs = SortContigsData(listContigs); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); Assert.AreEqual(maps.Count, sequenceReads.Count()); Dictionary<ISequence, IList<ReadMap>> readMaps = maps[sequenceReads.ElementAt(0).ID]; IList<ReadMap> readMap = null; for (int i = 0; i < sortedContigs.Count; i++) { readMap = readMaps[sortedContigs[i]]; if (IsFullOverlap) { Assert.AreEqual(expectedReadmapLength[i], readMap[0].Length.ToString((IFormatProvider)null), "readMap failed for pos " + i); Assert.AreEqual(expectedContigStartPos[i], readMap[0].StartPositionOfContig.ToString((IFormatProvider)null), "contigStart failed for pos " + i); Assert.AreEqual(expectedReadStartPos[i], readMap[0].StartPositionOfRead.ToString((IFormatProvider)null), "readStart failed for pos " + i); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); } else { Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.PartialOverlap); break; } } ApplicationLog.WriteLine( "Padena BVT :ReadContigMapper.Map() validation for Padena step6:step2 completed successfully"); }