public void TestMUMmer3GetMUMsMultipleMum() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence querySeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); querySeq = new Sequence(Alphabets.DNA, search); List <ISequence> querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); MUMmer mummer = new MUMmer3(); mummer.LengthOfMUM = 4; var result = mummer.GetMUMs(referenceSeq, querySeqs); // Check if output is not null Assert.AreNotEqual(null, result); IDictionary <ISequence, IList <MaxUniqueMatch> > expectedOutput = new Dictionary <ISequence, IList <MaxUniqueMatch> >(); MaxUniqueMatch mum1 = new MaxUniqueMatch(); mum1.FirstSequenceMumOrder = 1; mum1.FirstSequenceStart = 2; mum1.SecondSequenceMumOrder = 1; mum1.SecondSequenceStart = 0; mum1.Length = 4; mum1.Query = querySeq; MaxUniqueMatch mum2 = new MaxUniqueMatch(); mum2.FirstSequenceMumOrder = 2; mum2.FirstSequenceStart = 8; mum2.SecondSequenceMumOrder = 2; mum2.SecondSequenceStart = 3; mum2.Length = 4; mum2.Query = querySeq; MaxUniqueMatch mum3 = new MaxUniqueMatch(); mum3.FirstSequenceMumOrder = 3; mum3.FirstSequenceStart = 8; mum3.SecondSequenceMumOrder = 3; mum3.SecondSequenceStart = 4; mum3.Length = 5; mum3.Query = querySeq; expectedOutput.Add(querySeq, new List <MaxUniqueMatch> { mum1, mum2, mum3 }); Assert.IsTrue(CompareMUMs(result, expectedOutput)); }
public void MUMmerProperties() { MUMmer mum = new MUMmer3(); Assert.AreEqual(Constants.MUMDescription, mum.Description); Assert.AreEqual(Constants.MUMLength, mum.LengthOfMUM.ToString((IFormatProvider)null)); Assert.AreEqual(Constants.MUMName, mum.Name); Assert.AreEqual(Constants.MUMPairWiseAlgorithm, mum.PairWiseAlgorithm.ToString()); Assert.AreEqual(Constants.MUMRefSeqNumber, mum.ReferenceSequenceNumber.ToString((IFormatProvider)null)); Assert.AreEqual(Constants.MUMGapOpenCost, mum.GapOpenCost.ToString((IFormatProvider)null)); Console.WriteLine("Successfully validated all the properties of MUMmer class."); ApplicationLog.WriteLine("Successfully validated all the properties of MUMmer class."); }
public void TestMUMmer3GetMUMsMaxMatch() { string reference = "TTTTAATTTTAG"; string search = "ACTTTTGGA"; Sequence referenceSeq = null; Sequence querySeq = null; List <ISequence> querySeqs = null; referenceSeq = new Sequence(Alphabets.DNA, reference); querySeq = new Sequence(Alphabets.DNA, search); querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); MUMmer mummer = new MUMmer3(); mummer.LengthOfMUM = 3; var result = mummer.GetMUMs(referenceSeq, querySeqs); // Check if output is not null Assert.AreNotEqual(null, result); // Check the mums count. Assert.AreEqual(1, result.Count); Assert.AreEqual(0, result.Values.First().Count); mummer.MaximumMatchEnabled = true; result = mummer.GetMUMs(referenceSeq, querySeqs); IDictionary <ISequence, IList <MaxUniqueMatch> > expectedOutput = new Dictionary <ISequence, IList <MaxUniqueMatch> >(); MaxUniqueMatch mum = new MaxUniqueMatch(); mum.FirstSequenceMumOrder = 1; mum.FirstSequenceStart = 0; mum.SecondSequenceMumOrder = 1; mum.SecondSequenceStart = 2; mum.Length = 4; mum.Query = querySeq; expectedOutput.Add(querySeq, new List <MaxUniqueMatch> { mum }); Assert.IsTrue(CompareMUMs(result, expectedOutput)); }
public void TestMUMmer3GetFinalMUMsWithRNASingleMum() { string reference = "AUGCSWRYKMBVHDN"; string search = "UAUASWRYBB"; Sequence referenceSeq = null; Sequence querySeq = null; List <ISequence> querySeqs = null; referenceSeq = new Sequence(Alphabets.RNA, reference); querySeq = new Sequence(Alphabets.RNA, search); querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); MUMmer3 mummer = new MUMmer3(); mummer.LengthOfMUM = 3; var result = mummer.GetMUMs(referenceSeq, querySeqs, true); // Check if output is not null Assert.AreNotEqual(null, result); IDictionary <ISequence, IList <MaxUniqueMatch> > expectedOutput = new Dictionary <ISequence, IList <MaxUniqueMatch> >(); MaxUniqueMatch mum1 = new MaxUniqueMatch(); mum1.FirstSequenceMumOrder = 1; mum1.FirstSequenceStart = 4; mum1.SecondSequenceMumOrder = 1; mum1.SecondSequenceStart = 4; mum1.Length = 4; mum1.Query = querySeq; expectedOutput.Add(querySeq, new List <MaxUniqueMatch> { mum1 }); Assert.IsTrue(CompareMUMs(result, expectedOutput)); }
public void ValidateSequenceAlignersAll() { MUMmer mumobj = new MUMmer3(); Assert.AreEqual(mumobj.ToString(), SequenceAligners.MUMmer.ToString()); NeedlemanWunschAligner nwAlignerobj = new NeedlemanWunschAligner(); Assert.AreEqual(nwAlignerobj.ToString(), SequenceAligners.NeedlemanWunsch.ToString()); NUCmer nucobj = new NUCmer3(); Assert.AreEqual(nucobj.ToString(), SequenceAligners.NUCmer.ToString()); PairwiseOverlapAligner poAlignerobj = new PairwiseOverlapAligner(); Assert.AreEqual(poAlignerobj.ToString(), SequenceAligners.PairwiseOverlap.ToString()); SmithWatermanAligner swAlignerobj = new SmithWatermanAligner(); Assert.AreEqual(swAlignerobj.ToString(), SequenceAligners.SmithWaterman.ToString()); Assert.IsNotNull(SequenceAligners.All); Console.Write("Successfully created all the objects in Sequence Aligners"); ApplicationLog.Write("Successfully created all the objects in Sequence Aligners"); }
public void PerformMUMmerPerf() { Stopwatch _watchObj = new Stopwatch(); IList <IPairwiseSequenceAlignment> alignment = null; // Get Sequence file path. string refPath = Utility._xmlUtil.GetTextValue(Constants.MUMmerNodeName, Constants.RefFilePathNode); string queryPath = Utility._xmlUtil.GetTextValue(Constants.MUMmerNodeName, Constants.QueryFilePathNode); string smFilePath = Utility._xmlUtil.GetTextValue(Constants.MUMmerNodeName, Constants.SMFilePathNode); // Create a List for input files. List <string> lstInputFiles = new List <string>(); lstInputFiles.Add(refPath); lstInputFiles.Add(queryPath); FastaParser parserObj = new FastaParser(); IList <ISequence> seqs1 = parserObj.Parse(refPath); parserObj = new FastaParser(); IList <ISequence> seqs2 = parserObj.Parse(queryPath); IAlphabet alphabet = Alphabets.DNA; ISequence originalSequence1 = seqs1[0]; ISequence originalSequence2 = seqs2[0]; ISequence aInput = new Sequence(alphabet, originalSequence1.ToString()); ISequence bInput = new Sequence(alphabet, originalSequence2.ToString()); SimilarityMatrix sm = new SimilarityMatrix(smFilePath); mummerObj = new MUMmer3(); mummerObj.GapOpenCost = -10; mummerObj.GapExtensionCost = -10; mummerObj.SimilarityMatrix = sm; mummerObj.LengthOfMUM = Int32.Parse( Utility._xmlUtil.GetTextValue(Constants.MUMmerNodeName, Constants.MUMLengthNode)); _watchObj.Reset(); _watchObj.Start(); long memoryStart = GC.GetTotalMemory(false); // Align sequences using MUMmer. alignment = mummerObj.AlignSimple(aInput, seqs2); _watchObj.Stop(); long memoryEnd = GC.GetTotalMemory(false); string memoryUsed = (memoryEnd - memoryStart).ToString(); // Display MUMmer perf test case execution details. DisplayTestCaseHeader(lstInputFiles, _watchObj, memoryUsed, "MUMmer"); Console.WriteLine(string.Format( "MUMmer AlignSimple() method, Alignment Score is : {0}", alignment[0].PairwiseAlignedSequences[0].Score.ToString())); // Dispose MUMmer object. mummerObj = null; }
static void Main(string[] args) { // DateTime dStart = DateTime.Now; Stopwatch swMumUtil = Stopwatch.StartNew(); Stopwatch swInterval = new Stopwatch(); Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); myArgs.verbose = true; swInterval.Restart(); IList <ISequence> referenceSequences = ParseFastA(myArgs.fileList[0]); swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.fileList[0])); Console.Error.WriteLine(" Number of Sequences: {0}", referenceSequences.Count); Console.Error.WriteLine(" Length of first Sequence: {0:#,000}", referenceSequences[0].Count); Console.Error.WriteLine(" Read/Processing time: {0}", swInterval.Elapsed); // ShowSequence(referenceSequences[0]); } swInterval.Restart(); IList <ISequence> querySequences = ParseFastA(myArgs.fileList[1]); swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Query FastA file: {0}", Path.GetFullPath(myArgs.fileList[1])); Console.Error.WriteLine(" Number of Sequences: {0}", querySequences.Count); Console.Error.WriteLine(" Length of first Sequence: {0:#,000}", querySequences[0].Count); Console.Error.WriteLine(" Read/Processing time: {0}", swInterval.Elapsed); // ShowSequence(querySequences[0]); } if (myArgs.reverseOnly) { // convert list to reverse complement sequences swInterval.Restart(); querySequences = ReverseComplementSequenceList(querySequences); swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" Reverse Complement time: {0}", swInterval.Elapsed); } } else if (myArgs.both) { // add the reverse complement sequences to the query list too swInterval.Restart(); querySequences = AddReverseComplementsToSequenceList(querySequences); swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" Add Reverse Complement time: {0}", swInterval.Elapsed); } } // DISCUSSION: // why empty constructor here? // Why not pass the reference / query info on construction? // ANSWER: // That would break the "Constructors should not do a 'lot' of work" philosophy // DISCUSSION: // Why an IDictionary return? Why not encapsulate MUMs into a class of its own? // Or perhaps a MumList // // DISCUSSION: // Three possible outputs desired. Globally unique 'mum' (v1), unique in reference sequence (v2), // or get the maximum matches of length or greater. // IDictionary <ISequence, IList <MaxUniqueMatch> > mums; MUMmer3 mum3 = new MUMmer3(); if (myArgs.maxmatch) { // DISCUSSION: // If there are a small number of configuration parameters // it is frequently better to create a funtion to do the // work e.g. // mums = mum3.GetMumsMaxMatch( referenceSequences[0], querySequences ); // // If we have a large number of configuration parameters there are // several styles to pass the information. Do not pass a 'true' or // 'false' as a parameter. It is frequently uncommented as to what // the 'true' or 'false' means in that context of the call and leads // to confusion. // If many arguments are necessary to configure the call, seriously // consider a re-design. It if MUST be, there are two prefered // ways to pass the configuration information in. // 1. If the same parameter values will be frequently re-used, // then use a parameter structure and save it for use between // calls. // 2. If the parameter values are local to this invocation and // may change between calls, set the parameter values on // the object you will be invoking. Good 'defaults' during // object construction and allow properties to update them. // Mummer3 mum3 = new Mummer3( ProcessWithMaxMum=true, ProcessWithAmbiguityDisallowed=true ); // or // mum3.ProcessWithMaxMum = true; // mum3.ProcessWithAmbiguityDisallowed = true; // mums = mum3.GetMums(referenceSequences[0], querySequences); // This is a placeholder stub for now!!!!! mum3.MaximumMatchEnabled = true; swInterval.Restart(); mums = mum3.GetMUMs(referenceSequences[0], querySequences); swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" Compute GetMumsMaxMatch() time: {0}", swInterval.Elapsed); } } else if (myArgs.mum) { // // mums = mum3.GetMumsMum( referenceSequences[0], querySequences); swInterval.Restart(); mums = mum3.GetMUMs(referenceSequences[0], querySequences); // swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" Compute GetMumsMum() time: {0}", swInterval.Elapsed); } } else if (myArgs.mumreference) { // NOTE: // mum3.GetMUMs() this really implements the GetMumReference() functionality // mums = mum3.GetMumsReference( referenceSequences[0], querySequences); // should be swInterval.Restart(); mums = mum3.GetMUMs(referenceSequences[0], querySequences); swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" Compute GetMumsReference() time: {0}", swInterval.Elapsed); } } else { // cannot happen as argument processing already asserted one of the three options must be specified Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified."); Environment.Exit(-1); // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize // Environment.Exit() as a no-return function throw new Exception("Never hit this"); } swInterval.Restart(); WriteMums(mums, myArgs); swInterval.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" WriteMums() time: {0}", swInterval.Elapsed); } swMumUtil.Stop(); if (myArgs.verbose) { Console.Error.WriteLine(" Total MumUtil Runtime: {0}", swMumUtil.Elapsed); } }
/// <summary> /// Validate the Mummer GetMUMs method for different test cases. /// </summary> /// <param name="nodeName">Name of the XML node to be read.</param> /// <param name="isFilePath">Is Sequence saved in File</param> /// <param name="isAfterLIS">Is Mummer execution after LIS</param> /// <param name="isLIS">Is Mummer execution with LIS option</param> static void ValidateMUMsGeneralTestCases(string nodeName, bool isFilePath, bool isAfterLIS, bool isLIS) { ISequence referenceSeq = null; ISequence querySeq = null; IList <ISequence> querySeqs = null; string referenceSequence = string.Empty; string querySequence = string.Empty; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); // Gets the reference sequence from the configurtion file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); querySeqs = queryParser.Parse(queryFilePath); querySeq = querySeqs[0]; querySequence = querySeq.ToString(); } else { // Gets the reference sequence from the configurtion file referenceSequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); MUMmer mum = new MUMmer3(); mum.LengthOfMUM = long.Parse(mumLength, null); IDictionary <ISequence, IList <MaxUniqueMatch> > actualResult = null; if (!isLIS) { actualResult = mum.GetMUMs(referenceSeq, querySeqs); } else { actualResult = mum.GetMUMs(referenceSeq, querySeqs, isAfterLIS); } // Validate MUMs output. Assert.IsTrue(ValidateMums(nodeName, actualResult, querySeq)); Console.WriteLine("MUMmer BVT : Successfully validated the Mumms"); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the Mumms."); }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param> static void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList) { ISequence referenceSeq = null; ISequence querySeq = null; IList <ISequence> querySeqs = null; string referenceSequence = string.Empty; string querySequence = string.Empty; IList <IPairwiseSequenceAlignment> align = null; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); // Gets the reference sequence from the configurtion file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); querySeqs = queryParser.Parse(queryFilePath); querySeq = querySeqs[0]; querySequence = querySeq.ToString(); } else { // Gets the reference sequence from the configurtion file referenceSequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); MUMmer mum = new MUMmer3(); mum.LengthOfMUM = long.Parse(mumLength, null); mum.PairWiseAlgorithm = new NeedlemanWunschAligner(); mum.GapOpenCost = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), (IFormatProvider)null); if (isSeqList) { querySeqs.Add(referenceSeq); align = mum.Align(querySeqs); } else { align = mum.AlignSimple(referenceSeq, querySeqs); } string expectedScore = Utility._xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null)); Console.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); string[] expectedSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]); alignedSeq.SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]); alignedSeq.Score = int.Parse(expectedScore); seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); Console.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); }