public void ValidateEdgeCount() { string dnaSequence = "ATGCA"; Sequence sequence = new Sequence(Alphabets.DNA, dnaSequence); MultiWaySuffixTree dnaSuffixTree = new MultiWaySuffixTree(sequence); Assert.AreEqual(8, dnaSuffixTree.EdgesCount); ApplicationLog.WriteLine(@"MUMmer BVT : Validation of edge count for a Dna sequence completed successfully"); string ambiguousDnasequence = "RSVTW"; sequence = new Sequence(AmbiguousDnaAlphabet.Instance, ambiguousDnasequence); MultiWaySuffixTree ambiguousDnaSuffixTree = new MultiWaySuffixTree(sequence); Assert.AreEqual(7, ambiguousDnaSuffixTree.EdgesCount); ApplicationLog.WriteLine(@"MUMmer BVT : Validation of edge count for a Ambiguous Dna sequence completed successfully"); }
private void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, AdditionalParameters additionalParam, PropertyParameters propParam) { ISequence referenceSeq; var searchSeqList = new List<ISequence>(); if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", filePath)); var parser = new FastAParser(); IEnumerable<ISequence> referenceSeqList = parser.Parse(filePath); var byteList = new List<Byte>(); foreach (ISequence seq in referenceSeqList) { byteList.AddRange(seq); byteList.Add((byte) '+'); } referenceSeq = new Sequence(referenceSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray()); // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", queryFilePath)); var queryParserObj = new FastAParser(); IEnumerable<ISequence> querySeqList = queryParserObj.Parse(queryFilePath); searchSeqList.AddRange(querySeqList); } else { // Gets the reference & search sequences from the configuration file string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); var refSeqList = referenceSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))).Cast<ISequence>().ToList(); var byteList = new List<Byte>(); foreach (ISequence seq in refSeqList) { byteList.AddRange(seq); byteList.Add((byte) '+'); } referenceSeq = new Sequence(refSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray()); searchSeqList.AddRange(searchSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t)))); } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. var suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence) { MinLengthOfMatch = long.Parse(mumLength, null) }; var matches = searchSeqList.ToDictionary(t => t, suffixTreeBuilder.SearchMatchesUniqueInReference); var mums = new List<Match>(); foreach (var a in matches.Values) { mums.AddRange(a); } switch (additionalParam) { case AdditionalParameters.FindUniqueMatches: // Validates the Unique Matches. ApplicationLog.WriteLine("NUCmer P1 : Validating the Unique Matches"); Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, isFilePath)); ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences."); break; case AdditionalParameters.PerformClusterBuilder: // Validates the Unique Matches. ApplicationLog.WriteLine( "NUCmer P1 : Validating the Unique Matches using Cluster Builder"); Assert.IsTrue(this.ValidateClusterBuilderMatches(mums, nodeName, propParam)); ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the cluster builder matches for the sequences."); break; default: break; } ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences."); }
void ValidateFindMatchSimpleSuffixGeneralTestCases(string nodeName, bool isFilePath) { ISequence referenceSeq = null; ISequence querySeq = null; string referenceSequence = string.Empty; string querySequence = string.Empty; IEnumerable<ISequence> referenceSeqs = null; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastAParser parser = new FastAParser(); referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs.ElementAt(0); referenceSequence = new string(referenceSeq.Select(a => (char)a).ToArray()); // Gets the reference sequence from the configurtion file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); IEnumerable<ISequence> querySeqs = null; querySeqs = parser.Parse(queryFilePath); querySeq = querySeqs.ElementAt(0); querySequence = new string(querySeq.Select(a => (char)a).ToArray()); } else { // Gets the reference sequence from the configuration file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(seqAlp), this.encodingObj.GetBytes(referenceSequence)); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(seqAlp), this.encodingObj.GetBytes(querySequence)); } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence); suffixTreeBuilder.MinLengthOfMatch = long.Parse(mumLength, null); IEnumerable<Match> matches = null; matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeq); // Validates the Unique Matches. ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches"); Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName, LISParameters.FindUniqueMatches)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); }
/// <summary> /// Validates SearchMatch() with different inputs. /// </summary> /// <param name="nodeName">Parent Node from Xml.</param> void ValidateSearchMatch(string nodeName) { string referenceSequence = string.Empty; string querySequence = string.Empty; string seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); // Gets the reference sequence from the configurtion file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); IEnumerable<Match> matches = null; string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); Sequence referenceSequenceForMatches = new Sequence(Utility.GetAlphabet(seqAlp), referenceSequence); MultiWaySuffixTree suffixTree = new MultiWaySuffixTree(referenceSequenceForMatches); suffixTree.MinLengthOfMatch = long.Parse(mumLength, null); Sequence querySequenceForMatches = new Sequence(Utility.GetAlphabet(seqAlp), querySequence); matches = suffixTree.SearchMatches(querySequenceForMatches); // Validates the Unique Matches. ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches"); Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName, LISParameters.FindUniqueMatches)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); }
/// <summary> /// Validates most of the find matches suffix tree test cases with varying parameters. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is File Path?</param> /// <param name="additionalParam">LIS action type enum</param> void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, AdditionalParameters additionalParam) { ISequence referenceSeqs; var searchSeqList = new List<ISequence>(); if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); FastAParser parser = new FastAParser(); IEnumerable<ISequence> referenceSeqList = parser.Parse(filePath); List<Byte> byteList = new List<Byte>(); foreach (ISequence seq in referenceSeqList) { byteList.AddRange(seq); byteList.Add((byte)'+'); } referenceSeqs = new Sequence(referenceSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray()); // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); IEnumerable<ISequence> querySeqList = parser.Parse(queryFilePath); searchSeqList.AddRange(querySeqList); } else { // Gets the reference & search sequences from the configuration file string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); List<ISequence> refSeqList = referenceSequences.Select(t => new Sequence(seqAlphabet, this.encodingObj.GetBytes(t))).Cast<ISequence>().ToList(); List<Byte> byteListQuery = new List<Byte>(); foreach (ISequence seq in refSeqList) { byteListQuery.AddRange(seq); byteListQuery.Add((byte)'+'); } referenceSeqs = new Sequence(refSeqList.First().Alphabet.GetMummerAlphabet(), byteListQuery.ToArray()); searchSeqList.AddRange(searchSequences.Select(t => new Sequence(seqAlphabet, this.encodingObj.GetBytes(t))).Cast<ISequence>()); } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeqs as Sequence) { MinLengthOfMatch = long.Parse(mumLength, null) }; var matches = new Dictionary<ISequence, IEnumerable<Match>>(); foreach (ISequence sequence in searchSeqList) { matches.Add(sequence, suffixTreeBuilder.SearchMatchesUniqueInReference(sequence)); } List<Match> mums = new List<Match>(); foreach (var a in matches.Values) { mums.AddRange(a); } switch (additionalParam) { case AdditionalParameters.FindUniqueMatches: // Validates the Unique Matches. Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath)); break; case AdditionalParameters.PerformClusterBuilder: // Validates the Unique Matches. Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath)); break; default: break; } }
static void Main(string[] args) { try { // DateTime dStart = DateTime.Now; Stopwatch swMumUtil = Stopwatch.StartNew(); Stopwatch swInterval = new Stopwatch(); Console.Error.WriteLine(SplashString()); if (args.Length > 0) { CommandLineOptions myArgs = ProcessCommandLine(args); if (myArgs.help) { Console.WriteLine(Resources.MumUtilHelp); } else { FileInfo refFileinfo = new FileInfo(myArgs.fileList[0]); long refFileLength = refFileinfo.Length; refFileinfo = null; swInterval.Restart(); IEnumerable<ISequence> referenceSequences = ParseFastA(myArgs.fileList[0]); Sequence referenceSequence = referenceSequences.First() as Sequence; swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.fileList[0])); Console.Error.WriteLine(" Length of first Sequence: {0:#,000}", referenceSequence.Count); Console.Error.WriteLine(" Read/Processing time: {0}", swInterval.Elapsed); Console.Error.WriteLine(" File size : {0:#,000} bytes", refFileLength); } FileInfo queryFileinfo = new FileInfo(myArgs.fileList[1]); long queryFileLength = queryFileinfo.Length; refFileinfo = null; IEnumerable<ISequence> parsedQuerySequences = ParseFastA(myArgs.fileList[1]); IEnumerable<ISequence> querySequences = parsedQuerySequences; if (myArgs.reverseOnly) { // convert to reverse complement sequences querySequences = ReverseComplementSequenceList(parsedQuerySequences); } else if (myArgs.both) { // add the reverse complement sequences along with query sequences. querySequences = AddReverseComplementsToSequenceList(parsedQuerySequences); } // DISCUSSION: // Three possible outputs desired. Globally unique 'mum' (v1), unique in reference sequence (v2), // or get the maximum matches of length or greater. // mummerTime = new TimeSpan(); writetime = new TimeSpan(); IEnumerable<Match> mums; long memoryAtStart = 0; long memoryAtEnd = 0; if (myArgs.verbose) { swMumUtil.Stop(); memoryAtStart = GC.GetTotalMemory(true); swMumUtil.Start(); } swInterval.Restart(); MultiWaySuffixTree suffixTreee = new MultiWaySuffixTree(referenceSequence); swInterval.Stop(); if (myArgs.verbose) { swMumUtil.Stop(); memoryAtEnd = GC.GetTotalMemory(true); swMumUtil.Start(); } MUMmer mummer = new MUMmer(suffixTreee); if (myArgs.verbose) { Console.Error.WriteLine(); Console.Error.WriteLine("Suffix tree construction time : {0}", swInterval.Elapsed); Console.Error.WriteLine("Memory consumed by Suffix tree : {0:#,000}", memoryAtEnd - memoryAtStart); Console.Error.WriteLine("Total edges created : {0:#,000}", suffixTreee.EdgesCount); Console.Error.WriteLine("Memory per edge : {0:#,000.00} bytes", (((double)(memoryAtEnd - memoryAtStart)) / suffixTreee.EdgesCount)); Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Query FastA file: {0}", Path.GetFullPath(myArgs.fileList[1])); Console.Error.WriteLine(" File Size : {0:#,000} bytes", queryFileLength); } mummer.LengthOfMUM = myArgs.length; mummer.NoAmbiguity = myArgs.noAmbiguity; long querySeqCount = 0; double sumofSeqLength = 0; TimeSpan totalTimetakenToProcessQuerySequences = new TimeSpan(); string outputOption = string.Empty; if (myArgs.maxmatch) { outputOption = "GetMumsMaxMatch()"; swInterval.Restart(); foreach (Sequence qSeq in querySequences) { // Stop the wath after each query sequence parsed. swInterval.Stop(); // Add total time to process query sequence. // if reverse complement option is set, includes reverse complement time also. totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed); mums = mummer.GetMatches(qSeq); WriteMums(mums, referenceSequence, qSeq, myArgs); querySeqCount++; sumofSeqLength += qSeq.Count; // Start the watch for next query sequence parse. swInterval.Restart(); } swInterval.Stop(); } else if (myArgs.mum) { // mums = mum3.GetMumsMum( referenceSequences[0], querySequences); outputOption = "GetMumsMum()"; swInterval.Restart(); foreach (Sequence qSeq in querySequences) { // Stop the wath after each query sequence parsed. swInterval.Stop(); // Add total time to process query sequence. // if reverse complement option is set, includes reverse complement time also. totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed); swInterval.Restart(); // TODO: After implementing GetMatchesUniqueInBothReferenceAndQuery() in MUMmer //// GetMatchesUniqueInReference() with GetMatchesUniqueInBothReferenceAndQuery() in the line below. mums = mummer.GetMatchesUniqueInReference(qSeq); swInterval.Stop(); // Add time taken by GetMatchesUniqueInBothReferenceAndQuery(). mummerTime = mummerTime.Add(swInterval.Elapsed); swInterval.Restart(); WriteMums(mums, referenceSequence, qSeq, myArgs); swInterval.Stop(); // Add time taken by write matches. writetime = writetime.Add(swInterval.Elapsed); querySeqCount++; sumofSeqLength += qSeq.Count; // Start the watch for next query sequence parse. swInterval.Restart(); } swInterval.Stop(); } else if (myArgs.mumreference) { // NOTE: // mum3.GetMUMs() this really implements the GetMumReference() functionality // mums = mum3.GetMumsReference( referenceSequences[0], querySequences); // should be //swInterval.Restart(); outputOption = "GetMumsReference()"; swInterval.Restart(); foreach (Sequence qSeq in querySequences) { // Stop the watch after each query sequence parsed. swInterval.Stop(); // Add total time to process query sequence. // if reverse complement option is set, includes reverse complement time also. totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed); swInterval.Restart(); mums = mummer.GetMatchesUniqueInReference(qSeq); swInterval.Stop(); // Add time taken by GetMatchesUniqueInReference(). mummerTime = mummerTime.Add(swInterval.Elapsed); swInterval.Restart(); WriteMums(mums, referenceSequence, qSeq, myArgs); swInterval.Stop(); // Add time taken by write matches. writetime = writetime.Add(swInterval.Elapsed); querySeqCount++; sumofSeqLength += qSeq.Count; // Start the watch for next query sequence parse. swInterval.Restart(); } swInterval.Stop(); } else { // cannot happen as argument processing already asserted one of the three options must be specified Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified."); Environment.Exit(-1); // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize // Environment.Exit() as a no-return function throw new Exception("Never hit this"); } if (myArgs.verbose) { if (myArgs.reverseOnly || myArgs.both) { Console.Error.WriteLine(" Read/Processing time : {0}", timeTakenToParseQuerySequences); Console.Error.WriteLine(" Reverse Complement time : {0}", timeTakenToGetReverseComplement); Console.Error.WriteLine(" Total time taken to Process reads: {0}", totalTimetakenToProcessQuerySequences); } else { Console.Error.WriteLine(" Read/Processing time : {0}", totalTimetakenToProcessQuerySequences); } Console.Error.WriteLine(); Console.Error.WriteLine(" Number of query Sequences : {0:#,000}", querySeqCount); Console.Error.WriteLine(" Average length of query Sequences: {0:#,000}", sumofSeqLength / querySeqCount); Console.Error.WriteLine(); Console.Error.WriteLine("Compute {0,20} time : {1}", outputOption, mummerTime); Console.Error.WriteLine(" WriteMums() time : {0}", writetime); } swMumUtil.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" Total MumUtil Runtime : {0}", swMumUtil.Elapsed); } } } else { Console.WriteLine(Resources.MumUtilHelp); } } catch (Exception ex) { DisplayException(ex); } }
void ValidateBuildSuffixTreeGeneralTestCases(string nodeName, bool isFilePath) { ISequence referenceSeq; string referenceSequence; if (isFilePath) { // Gets the reference sequence from the configuration file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P1 : Successfully validated the File Path '{0}'.", filePath)); FastAParser fastaParserObj = new FastAParser(); IEnumerable<ISequence> referenceSeqs = fastaParserObj.Parse(filePath); referenceSeq = referenceSeqs.FirstOrDefault(); Assert.IsNotNull(referenceSeq); referenceSequence = referenceSeq.ConvertToString(); } else { // Gets the reference sequence from the configuration file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); referenceSeq = new Sequence(Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)), this.encodingObj.GetBytes(referenceSequence)); } // Builds the suffix for the reference sequence passed. MultiWaySuffixTree suffixTree = new MultiWaySuffixTree(referenceSeq as Sequence); Assert.AreEqual(new string(suffixTree.Sequence.Select(a => (char)a).ToArray()), referenceSequence); ApplicationLog.WriteLine(string.Format(null, "MUMmer P1 : Successfully validated the Suffix Tree properties for the sequence '{0}'.", referenceSequence)); }
void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, bool isMultiSequenceSearchFile, PhaseOneAmbiguityParameters isAmbiguousCharacter) { ISequence referenceSeq; ISequence querySeq; string referenceSequence = string.Empty; string querySequence = string.Empty; IEnumerable<ISequence> referenceSeqs; IEnumerable<ISequence> querySeqs = null; if (isFilePath) { // Gets the reference sequence from the configuration file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastAParser parser = new FastAParser(); switch (isAmbiguousCharacter) { case PhaseOneAmbiguityParameters.Dna: parser.Alphabet = AmbiguousDnaAlphabet.Instance; break; case PhaseOneAmbiguityParameters.Rna: parser.Alphabet = AmbiguousRnaAlphabet.Instance; break; default: break; } referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs.ElementAt(0); referenceSequence = new string(referenceSeq.Select(a => (char)a).ToArray()); // Gets the reference sequence from the configuration file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastAParser queryParser = new FastAParser(); switch (isAmbiguousCharacter) { case PhaseOneAmbiguityParameters.Dna: queryParser.Alphabet = AmbiguousDnaAlphabet.Instance; break; case PhaseOneAmbiguityParameters.Rna: queryParser.Alphabet = AmbiguousRnaAlphabet.Instance; break; default: break; } querySeqs = queryParser.Parse(queryFilePath); querySeq = querySeqs.ElementAt(0); querySequence = new string(querySeq.Select(a => (char)a).ToArray()); } else { // Gets the reference sequence from the configuration file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceAlphabet), this.encodingObj.GetBytes(referenceSequence)); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceAlphabet), this.encodingObj.GetBytes(querySequence)); } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence); suffixTreeBuilder.MinLengthOfMatch = long.Parse(mumLength, null); IEnumerable<Match> matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeq); // For multi sequence query file validate all the sequences with the reference sequence if (isMultiSequenceSearchFile) { matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeqs.ElementAt(0)); Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName)); matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeqs.ElementAt(1)); Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName)); } else { matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeq); Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName)); } ApplicationLog.WriteLine(string.Format(null, "MUMmer P1 : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); }
/// <summary> /// Validates Constructor of Mummer Class with a Suffix tree as parameter. /// </summary> /// <param name="nodeName">Parent Node from Xml.</param> void ValidateConstructorWithSuffixTree(string nodeName) { string referenceSequence = string.Empty; string querySequence = string.Empty; string seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); // Gets the reference sequence from the configurtion file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); Sequence refSequence = new Sequence(Utility.GetAlphabet(seqAlp), referenceSequence); MultiWaySuffixTree suffixTree = new MultiWaySuffixTree(refSequence); Bio.Algorithms.MUMmer.MUMmer mum = new Bio.Algorithms.MUMmer.MUMmer(suffixTree); string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); mum.LengthOfMUM = long.Parse(mumLength, null); IEnumerable<Match> matches = null; Sequence sequence = new Sequence(Utility.GetAlphabet(seqAlp), querySequence); matches = mum.GetMatches(sequence); // Validates the Unique Matches. ApplicationLog.WriteLine(@"MUMmer BVT : Validating the Unique Matches for implementation of customised MUMer Constructor"); Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); }