/// <summary> /// Build Suffix Tree using reference sequence. /// This method using Kurtz algorithm to build the Suffix Tree /// </summary> /// <param name="referenceSequence">Reference sequence number</param> /// <returns>Suffix Tree</returns> protected override SequenceSuffixTree BuildSuffixTree(ISequence referenceSequence) { ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); SequenceSuffixTree suffixTree = suffixTreeBuilder.BuildSuffixTree(referenceSequence); return(suffixTree); }
public void TestStreamingInSegmentedSequence() { string sequenceString = "AAATTGGC"; Sequence sequence = new Sequence(Alphabets.Protein, sequenceString); SegmentedSequence segmentedSequece = new SegmentedSequence(sequence); sequenceString = "ANANA"; sequence = new Sequence(Alphabets.Protein, sequenceString); segmentedSequece.Sequences.Add(sequence); ISuffixTreeBuilder kurtzSuffixTreeBuilder = new KurtzSuffixTreeBuilder(); ApplicationLog.WriteLine("Begin SuffixTree Test for string '{0}'", segmentedSequece.ToString()); SequenceSuffixTree kurtzSuffixTree = kurtzSuffixTreeBuilder.BuildSuffixTree(segmentedSequece); string queryString = "AATTNANAGGC"; Sequence querySequence = new Sequence(Alphabets.Protein, queryString); ApplicationLog.WriteLine("Query string : {0}. Minimum Length of MUM : 3.", queryString); ApplicationLog.WriteTime("Start Time.", DateTime.Now.ToString()); IList <MaxUniqueMatch> MUMs = kurtzSuffixTreeBuilder.FindMatches(kurtzSuffixTree, querySequence, 3); ApplicationLog.WriteTime("End Time.", DateTime.Now.ToString()); // Verify the count of MUMs found Assert.AreEqual(3, MUMs.Count); }
/// <summary> /// Traverse the suffix tree using query sequence and return list of MUMs /// </summary> /// <param name="suffixTree">Suffix tree</param> /// <param name="sequence">Query sequence</param> /// <param name="lengthOfMUM">Minimum length of MUM</param> /// <returns>List of MUMs</returns> protected override IList <MaxUniqueMatch> Streaming( SequenceSuffixTree suffixTree, ISequence sequence, long lengthOfMUM) { ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); return(suffixTreeBuilder.FindMatches(suffixTree, sequence, lengthOfMUM)); }
/// <summary> /// Validates most of the build suffix tree test cases with varying parameters. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is file path?</param> void ValidateBuildSuffixTreeGeneralTestCases(string nodeName, bool isFilePath) { ISequence referenceSeqs = null; string[] referenceSequences = null; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "NUCmer BVT : Successfully validated the File Path '{0}'.", filePath)); using (FastaParser parser = new FastaParser()) { IList <ISequence> referenceSeqList = parser.Parse(filePath); referenceSeqs = new SegmentedSequence(referenceSeqList); } } else { // Gets the reference & search sequences from the configurtion file referenceSequences = _utilityObj._xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(_utilityObj._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); List <ISequence> refSeqList = new List <ISequence>(); for (int i = 0; i < referenceSequences.Length; i++) { ISequence referSeq = new Sequence(seqAlphabet, referenceSequences[i]); refSeqList.Add(referSeq); } referenceSeqs = new SegmentedSequence(refSeqList); } // Builds the suffix for the reference sequence passed. ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); SequenceSuffixTree suffixTree = suffixTreeBuilder.BuildSuffixTree(referenceSeqs); // Validates the edges for a given sequence. ApplicationLog.WriteLine("NUCmer BVT : Validating the Edges"); Assert.IsTrue(ValidateEdges(suffixTree, nodeName, isFilePath)); Console.WriteLine( "NUCmer BVT : Successfully validated the all the Edges for the sequence specified."); ApplicationLog.WriteLine( "NUCmer BVT : Successfully validated the all the Edges for the sequence specified."); }
/// <summary> /// Validates most of the build suffix tree test cases with varying parameters. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is file path?</param> void ValidateBuildSuffixTreeGeneralTestCases(string nodeName, bool isFilePath) { ISequence referenceSeq = null; string referenceSequence = string.Empty; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); using (FastaParser parser = new FastaParser()) { IList <ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); } } else { // Gets the reference sequence from the configurtion file referenceSequence = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); referenceSeq = new Sequence(Utility.GetAlphabet(_utilityObj._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)), referenceSequence); } // Builds the suffix for the reference sequence passed. ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); SequenceSuffixTree suffixTree = suffixTreeBuilder.BuildSuffixTree(referenceSeq); // Validates the edges for a given sequence. ApplicationLog.WriteLine("MUMmer BVT : Validating the Edges"); Assert.IsTrue(ValidateEdges(suffixTree, nodeName)); Console.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the all the Edges for the sequence '{0}'.", referenceSequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the all the Edges for the sequence '{0}'.", referenceSequence)); Assert.AreEqual(suffixTree.Sequence.ToString(), referenceSequence); Console.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the Suffix Tree properties for the sequence '{0}'.", referenceSequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the Suffix Tree properties for the sequence '{0}'.", referenceSequence)); }
public void TestSequence() { string sequenceString = "BANANA"; Sequence sequence = new Sequence(Alphabets.Protein, sequenceString); ISuffixTreeBuilder kurtzSuffixTreeBuilder = new KurtzSuffixTreeBuilder(); ApplicationLog.WriteLine("Begin SuffixTree Test for string '{0}'", sequenceString); ApplicationLog.WriteTime("Start Time.", DateTime.Now.ToString()); SequenceSuffixTree kurtzSuffixTree = kurtzSuffixTreeBuilder.BuildSuffixTree(sequence); ApplicationLog.WriteTime("End Time.", DateTime.Now.ToString()); // Verify the edges in Suffix Tree Assert.AreEqual(10, kurtzSuffixTree.Edges.Count); // Verify the sequence in Suffix Tree Assert.AreEqual(kurtzSuffixTree.Sequence.ToString(), sequenceString); }
/// <summary> /// Validates most of the find matches suffix tree test cases with varying parameters. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is File Path?</param> /// <param name="additionalParam">LIS action type enum</param> static void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, AdditionalParameters additionalParam) { ISequence referenceSeqs = null; ISequence searchSeqs = null; string[] referenceSequences = null; string[] searchSequences = null; if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqList = parser.Parse(filePath); referenceSeqs = new SegmentedSequence(referenceSeqList); // Gets the query sequence from the FastA file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer BVT : Successfully validated the File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); IList <ISequence> querySeqList = queryParser.Parse(queryFilePath); searchSeqs = new SegmentedSequence(querySeqList); } else { // Gets the reference & search sequences from the configurtion file referenceSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); searchSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); List <ISequence> refSeqList = new List <ISequence>(); List <ISequence> searchSeqList = new List <ISequence>(); for (int i = 0; i < referenceSequences.Length; i++) { ISequence referSeq = new Sequence(seqAlphabet, referenceSequences[i]); refSeqList.Add(referSeq); } referenceSeqs = new SegmentedSequence(refSeqList); for (int i = 0; i < searchSequences.Length; i++) { ISequence searchSeq = new Sequence(seqAlphabet, searchSequences[i]); searchSeqList.Add(searchSeq); } searchSeqs = new SegmentedSequence(searchSeqList); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); SequenceSuffixTree suffixTree = suffixTreeBuilder.BuildSuffixTree(referenceSeqs); IList <MaxUniqueMatch> matches = suffixTreeBuilder.FindMatches(suffixTree, searchSeqs, long.Parse(mumLength, null)); switch (additionalParam) { case AdditionalParameters.FindUniqueMatches: // Validates the Unique Matches. ApplicationLog.WriteLine("NUCmer BVT : Validating the Unique Matches"); Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, additionalParam, isFilePath)); Console.WriteLine( "NUCmer BVT : Successfully validated the all the unique matches for the sequences."); break; case AdditionalParameters.PerformClusterBuilder: // Validates the Unique Matches. ApplicationLog.WriteLine( "NUCmer BVT : Validating the Unique Matches using Cluster Builder"); Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, additionalParam, isFilePath)); Console.WriteLine( "NUCmer BVT : Successfully validated the all the cluster builder matches for the sequences."); break; default: break; } ApplicationLog.WriteLine( "NUCmer BVT : Successfully validated the all the unique matches for the sequences."); }
/// <summary> /// Validates most of the find matches suffix tree test cases with varying parameters. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is File Path?</param> /// <param name="LISActionType">LIS action type enum</param> static void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, LISParameters LISActionType) { ISequence referenceSeq = null; ISequence querySeq = null; string referenceSequence = string.Empty; string querySequence = string.Empty; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); // Gets the reference sequence from the configurtion file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); IList <ISequence> querySeqs = queryParser.Parse(queryFilePath); querySeq = querySeqs[0]; querySequence = querySeq.ToString(); } else { // Gets the reference sequence from the configurtion file referenceSequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string seqAlp = Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(seqAlp), referenceSequence); querySequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); seqAlp = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(seqAlp), querySequence); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); SequenceSuffixTree suffixTree = suffixTreeBuilder.BuildSuffixTree(referenceSeq); IList <MaxUniqueMatch> matches = suffixTreeBuilder.FindMatches(suffixTree, querySeq, long.Parse(mumLength, null)); switch (LISActionType) { case LISParameters.FindUniqueMatches: // Validates the Unique Matches. ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches"); Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, LISActionType)); break; case LISParameters.PerformLIS: // Validates the Unique Matches. ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches using LIS"); LongestIncreasingSubsequence lisObj = new LongestIncreasingSubsequence(); IList <MaxUniqueMatch> lisMatches = lisObj.GetLongestSequence(matches); Assert.IsTrue(ValidateUniqueMatches(lisMatches, nodeName, LISActionType)); break; default: break; } Console.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); }