Example #1
0
        public void TestMUMmer3GetMUMsMaxMatch()
        {
            string reference = "TTTTAATTTTAG";
            string search    = "ACTTTTGGA";

            Sequence         referenceSeq = null;
            Sequence         querySeq     = null;
            List <ISequence> querySeqs    = null;

            referenceSeq = new Sequence(Alphabets.DNA, reference);
            querySeq     = new Sequence(Alphabets.DNA, search);

            querySeqs = new List <ISequence>();
            querySeqs.Add(querySeq);

            MUMmer mummer = new MUMmer3();

            mummer.LengthOfMUM = 3;

            var result = mummer.GetMUMs(referenceSeq, querySeqs);

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            // Check the mums count.
            Assert.AreEqual(1, result.Count);
            Assert.AreEqual(0, result.Values.First().Count);

            mummer.MaximumMatchEnabled = true;
            result = mummer.GetMUMs(referenceSeq, querySeqs);

            IDictionary <ISequence, IList <MaxUniqueMatch> > expectedOutput = new Dictionary <ISequence, IList <MaxUniqueMatch> >();
            MaxUniqueMatch mum = new MaxUniqueMatch();

            mum.FirstSequenceMumOrder  = 1;
            mum.FirstSequenceStart     = 0;
            mum.SecondSequenceMumOrder = 1;
            mum.SecondSequenceStart    = 2;
            mum.Length = 4;
            mum.Query  = querySeq;
            expectedOutput.Add(querySeq, new List <MaxUniqueMatch> {
                mum
            });

            Assert.IsTrue(CompareMUMs(result, expectedOutput));
        }
Example #2
0
        public void TestMUMmer3GetMUMsMultipleMum()
        {
            string reference = "ATGCGCATCCCCTT";
            string search    = "GCGCCCCCTA";

            Sequence referenceSeq = null;
            Sequence querySeq     = null;

            referenceSeq = new Sequence(Alphabets.DNA, reference);
            querySeq     = new Sequence(Alphabets.DNA, search);

            List <ISequence> querySeqs = new List <ISequence>();

            querySeqs.Add(querySeq);

            MUMmer mummer = new MUMmer3();

            mummer.LengthOfMUM = 4;

            var result = mummer.GetMUMs(referenceSeq, querySeqs);

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IDictionary <ISequence, IList <MaxUniqueMatch> > expectedOutput = new Dictionary <ISequence, IList <MaxUniqueMatch> >();
            MaxUniqueMatch mum1 = new MaxUniqueMatch();

            mum1.FirstSequenceMumOrder  = 1;
            mum1.FirstSequenceStart     = 2;
            mum1.SecondSequenceMumOrder = 1;
            mum1.SecondSequenceStart    = 0;
            mum1.Length = 4;
            mum1.Query  = querySeq;

            MaxUniqueMatch mum2 = new MaxUniqueMatch();

            mum2.FirstSequenceMumOrder  = 2;
            mum2.FirstSequenceStart     = 8;
            mum2.SecondSequenceMumOrder = 2;
            mum2.SecondSequenceStart    = 3;
            mum2.Length = 4;
            mum2.Query  = querySeq;

            MaxUniqueMatch mum3 = new MaxUniqueMatch();

            mum3.FirstSequenceMumOrder  = 3;
            mum3.FirstSequenceStart     = 8;
            mum3.SecondSequenceMumOrder = 3;
            mum3.SecondSequenceStart    = 4;
            mum3.Length = 5;
            mum3.Query  = querySeq;

            expectedOutput.Add(querySeq, new List <MaxUniqueMatch> {
                mum1, mum2, mum3
            });

            Assert.IsTrue(CompareMUMs(result, expectedOutput));
        }
Example #3
0
        public void TestMUMmer3GetFinalMUMsWithRNASingleMum()
        {
            string reference = "AUGCSWRYKMBVHDN";
            string search    = "UAUASWRYBB";

            Sequence         referenceSeq = null;
            Sequence         querySeq     = null;
            List <ISequence> querySeqs    = null;

            referenceSeq = new Sequence(Alphabets.RNA, reference);
            querySeq     = new Sequence(Alphabets.RNA, search);

            querySeqs = new List <ISequence>();
            querySeqs.Add(querySeq);

            MUMmer3 mummer = new MUMmer3();

            mummer.LengthOfMUM = 3;

            var result = mummer.GetMUMs(referenceSeq, querySeqs, true);

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IDictionary <ISequence, IList <MaxUniqueMatch> > expectedOutput = new Dictionary <ISequence, IList <MaxUniqueMatch> >();
            MaxUniqueMatch mum1 = new MaxUniqueMatch();

            mum1.FirstSequenceMumOrder  = 1;
            mum1.FirstSequenceStart     = 4;
            mum1.SecondSequenceMumOrder = 1;
            mum1.SecondSequenceStart    = 4;
            mum1.Length = 4;
            mum1.Query  = querySeq;

            expectedOutput.Add(querySeq, new List <MaxUniqueMatch> {
                mum1
            });

            Assert.IsTrue(CompareMUMs(result, expectedOutput));
        }
Example #4
0
        static void Main(string[] args)
        {
//            DateTime dStart = DateTime.Now;
            Stopwatch swMumUtil  = Stopwatch.StartNew();
            Stopwatch swInterval = new Stopwatch();

            Console.Error.WriteLine(SplashString());
            CommandLineOptions myArgs = ProcessCommandLine(args);

            myArgs.verbose = true;

            swInterval.Restart();
            IList <ISequence> referenceSequences = ParseFastA(myArgs.fileList[0]);

            swInterval.Stop();
            if (myArgs.verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.fileList[0]));
                Console.Error.WriteLine("             Number of Sequences: {0}", referenceSequences.Count);
                Console.Error.WriteLine("        Length of first Sequence: {0:#,000}", referenceSequences[0].Count);
                Console.Error.WriteLine("            Read/Processing time: {0}", swInterval.Elapsed);
                // ShowSequence(referenceSequences[0]);
            }

            swInterval.Restart();
            IList <ISequence> querySequences = ParseFastA(myArgs.fileList[1]);

            swInterval.Stop();
            if (myArgs.verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("      Processed Query FastA file: {0}", Path.GetFullPath(myArgs.fileList[1]));
                Console.Error.WriteLine("             Number of Sequences: {0}", querySequences.Count);
                Console.Error.WriteLine("        Length of first Sequence: {0:#,000}", querySequences[0].Count);
                Console.Error.WriteLine("            Read/Processing time: {0}", swInterval.Elapsed);
                // ShowSequence(querySequences[0]);
            }

            if (myArgs.reverseOnly)
            {   // convert list to reverse complement sequences
                swInterval.Restart();
                querySequences = ReverseComplementSequenceList(querySequences);
                swInterval.Stop();
                if (myArgs.verbose)
                {
                    Console.Error.WriteLine("         Reverse Complement time: {0}", swInterval.Elapsed);
                }
            }
            else if (myArgs.both)
            {   // add the reverse complement sequences to the query list too
                swInterval.Restart();
                querySequences = AddReverseComplementsToSequenceList(querySequences);
                swInterval.Stop();
                if (myArgs.verbose)
                {
                    Console.Error.WriteLine("     Add Reverse Complement time: {0}", swInterval.Elapsed);
                }
            }

            // DISCUSSION:
            // why empty constructor here?
            // Why not pass the reference / query info on construction?
            // ANSWER:
            //     That would break the "Constructors should not do a 'lot' of work" philosophy

            // DISCUSSION:
            // Why an IDictionary return?  Why not encapsulate MUMs into a class of its own?
            //   Or perhaps a MumList
            //
            // DISCUSSION:
            // Three possible outputs desired.  Globally unique 'mum' (v1), unique in reference sequence (v2),
            //   or get the maximum matches of length or greater.
            //
            IDictionary <ISequence, IList <MaxUniqueMatch> > mums;
            MUMmer3 mum3 = new MUMmer3();

            if (myArgs.maxmatch)
            {
                // DISCUSSION:
                //   If there are a small number of configuration parameters
                //   it is frequently better to create a funtion to do the
                //   work e.g.
                //   mums = mum3.GetMumsMaxMatch( referenceSequences[0], querySequences );
                //
                //   If we have a large number of configuration parameters there are
                //   several styles to pass the information.  Do not pass a 'true' or
                //   'false' as a parameter.  It is frequently uncommented as to what
                //   the 'true' or 'false' means in that context of the call and leads
                //   to confusion.
                //   If many arguments are necessary to configure the call, seriously
                //   consider a re-design.  It if MUST be, there are two prefered
                //   ways to pass the configuration information in.
                //     1.  If the same parameter values will be frequently re-used,
                //         then use a parameter structure and save it for use between
                //         calls.
                //     2.  If the parameter values are local to this invocation and
                //         may change between calls, set the parameter values on
                //         the object you will be invoking.  Good 'defaults' during
                //         object construction and allow properties to update them.
                // Mummer3 mum3 = new Mummer3( ProcessWithMaxMum=true, ProcessWithAmbiguityDisallowed=true );
                //  or
                // mum3.ProcessWithMaxMum = true;
                // mum3.ProcessWithAmbiguityDisallowed = true;
                // mums = mum3.GetMums(referenceSequences[0], querySequences);

                // This is a placeholder stub for now!!!!!
                mum3.MaximumMatchEnabled = true;
                swInterval.Restart();
                mums = mum3.GetMUMs(referenceSequences[0], querySequences);
                swInterval.Stop();
                if (myArgs.verbose)
                {
                    Console.Error.WriteLine("  Compute GetMumsMaxMatch() time: {0}", swInterval.Elapsed);
                }
            }
            else if (myArgs.mum)
            {
                //
                // mums = mum3.GetMumsMum( referenceSequences[0], querySequences);
                swInterval.Restart();
                mums = mum3.GetMUMs(referenceSequences[0], querySequences);                 //
                swInterval.Stop();
                if (myArgs.verbose)
                {
                    Console.Error.WriteLine("       Compute GetMumsMum() time: {0}", swInterval.Elapsed);
                }
            }
            else if (myArgs.mumreference)
            {
                // NOTE:
                //     mum3.GetMUMs() this really implements the GetMumReference() functionality
                // mums = mum3.GetMumsReference( referenceSequences[0], querySequences);     // should be
                swInterval.Restart();
                mums = mum3.GetMUMs(referenceSequences[0], querySequences);
                swInterval.Stop();
                if (myArgs.verbose)
                {
                    Console.Error.WriteLine(" Compute GetMumsReference() time: {0}", swInterval.Elapsed);
                }
            }
            else
            {
                // cannot happen as argument processing already asserted one of the three options must be specified
                Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified.");
                Environment.Exit(-1);
                // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize
                //   Environment.Exit() as a no-return function
                throw new Exception("Never hit this");
            }

            swInterval.Restart();
            WriteMums(mums, myArgs);
            swInterval.Stop();
            if (myArgs.verbose)
            {
                Console.Error.WriteLine("                WriteMums() time: {0}", swInterval.Elapsed);
            }
            swMumUtil.Stop();
            if (myArgs.verbose)
            {
                Console.Error.WriteLine("           Total MumUtil Runtime: {0}", swMumUtil.Elapsed);
            }
        }
Example #5
0
        /// <summary>
        /// Validate the Mummer GetMUMs method for different test cases.
        /// </summary>
        /// <param name="nodeName">Name of the XML node to be read.</param>
        /// <param name="isFilePath">Is Sequence saved in File</param>
        /// <param name="isAfterLIS">Is Mummer execution after LIS</param>
        /// <param name="isLIS">Is Mummer execution with LIS option</param>
        static void ValidateMUMsGeneralTestCases(string nodeName, bool isFilePath,
                                                 bool isAfterLIS, bool isLIS)
        {
            ISequence         referenceSeq      = null;
            ISequence         querySeq          = null;
            IList <ISequence> querySeqs         = null;
            string            referenceSequence = string.Empty;
            string            querySequence     = string.Empty;

            if (isFilePath)
            {
                // Gets the reference sequence from the configurtion file
                string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastaParser       parser        = new FastaParser();
                IList <ISequence> referenceSeqs = parser.Parse(filePath);
                referenceSeq      = referenceSeqs[0];
                referenceSequence = referenceSeq.ToString();

                // Gets the reference sequence from the configurtion file
                string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastaParser queryParser = new FastaParser();
                querySeqs     = queryParser.Parse(queryFilePath);
                querySeq      = querySeqs[0];
                querySequence = querySeq.ToString();
            }
            else
            {
                // Gets the reference sequence from the configurtion file
                referenceSequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.SequenceNode);

                string referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName,
                                                                            Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                            referenceSequence);

                querySequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                              Constants.SearchSequenceNode);

                referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                        querySequence);
                querySeqs = new List <ISequence>();
                querySeqs.Add(querySeq);
            }

            string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            MUMmer mum = new MUMmer3();

            mum.LengthOfMUM = long.Parse(mumLength, null);
            IDictionary <ISequence, IList <MaxUniqueMatch> > actualResult = null;

            if (!isLIS)
            {
                actualResult = mum.GetMUMs(referenceSeq, querySeqs);
            }
            else
            {
                actualResult = mum.GetMUMs(referenceSeq, querySeqs, isAfterLIS);
            }

            // Validate MUMs output.
            Assert.IsTrue(ValidateMums(nodeName, actualResult, querySeq));

            Console.WriteLine("MUMmer BVT : Successfully validated the Mumms");
            ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the Mumms.");
        }