Beispiel #1
0
        private static void Main(string[] args)
        {
            try
            {
                Stopwatch stopWatchMumUtil  = Stopwatch.StartNew();
                Stopwatch stopWatchInterval = new Stopwatch();
                Console.Error.WriteLine(SplashString());
                if (args.Length > 0)
                {
                    CommandLineOptions myArgs = ProcessCommandLine(args);

                    TimeSpan writetime = new TimeSpan();
                    LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
                    IEnumerable <Match>          mums;
                    if (myArgs.PerformLISOnly)
                    {
                        stopWatchInterval.Restart();
                        IList <Match> parsedMUMs = ParseMums(myArgs.FileList[0]);
                        stopWatchInterval.Stop();

                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed MUM file: {0}", Path.GetFullPath(myArgs.FileList[0]));
                            Console.Error.WriteLine("        Total MUMs: {0:#,000}", parsedMUMs.Count);
                            Console.Error.WriteLine("            Read/Processing time: {0}", stopWatchInterval.Elapsed);
                        }

                        stopWatchInterval.Restart();
                        IList <Match> sortedMUMs = lis.SortMum(parsedMUMs);
                        stopWatchInterval.Stop();

                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Sort MUM time: {0}", stopWatchInterval.Elapsed);
                        }

                        stopWatchInterval.Restart();
                        if (sortedMUMs.Count != 0)
                        {
                            mums = lis.GetLongestSequence(sortedMUMs);

                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Perform LIS time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchInterval.Restart();
                            WriteMums(mums);
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Write MUM time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchMumUtil.Stop();
                        }
                        else
                        {
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Perform LIS time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchInterval.Restart();
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Write MUM time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchMumUtil.Stop();
                        }
                    }
                    else
                    {
                        FileInfo refFileinfo   = new FileInfo(myArgs.FileList[0]);
                        long     refFileLength = refFileinfo.Length;
                        refFileinfo = null;

                        stopWatchInterval.Restart();
                        IEnumerable <ISequence> referenceSequences = ParseFastA(myArgs.FileList[0]);
                        ISequence referenceSequence = referenceSequences.First();
                        stopWatchInterval.Stop();
                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.FileList[0]));
                            Console.Error.WriteLine("        Length of first Sequence: {0:#,000}", referenceSequence.Count);
                            Console.Error.WriteLine("            Read/Processing time: {0}", stopWatchInterval.Elapsed);
                            Console.Error.WriteLine("            File Size           : {0}", refFileLength);
                        }

                        FileInfo queryFileinfo   = new FileInfo(myArgs.FileList[1]);
                        long     queryFileLength = queryFileinfo.Length;
                        refFileinfo = null;

                        stopWatchInterval.Restart();
                        IEnumerable <ISequence> querySequences = ParseFastA(myArgs.FileList[1]);
                        stopWatchInterval.Stop();
                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("      Processed Query FastA file: {0}", Path.GetFullPath(myArgs.FileList[1]));
                            Console.Error.WriteLine("            Read/Processing time: {0}", stopWatchInterval.Elapsed);
                            Console.Error.WriteLine("            File Size           : {0}", queryFileLength);
                        }

                        if (myArgs.ReverseOnly)
                        {
                            stopWatchInterval.Restart();
                            querySequences = ReverseComplementSequenceList(querySequences);
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("         Reverse Complement time: {0}", stopWatchInterval.Elapsed);
                            }
                        }
                        else if (myArgs.Both)
                        {   // add the reverse complement sequences to the query list too
                            stopWatchInterval.Restart();
                            querySequences = AddReverseComplementsToSequenceList(querySequences);
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("     Add Reverse Complement time: {0}", stopWatchInterval.Elapsed);
                            }
                        }

                        TimeSpan mummerTime = new TimeSpan(0, 0, 0);
                        stopWatchInterval.Restart();
                        Sequence seq = referenceSequence as Sequence;
                        if (seq == null)
                        {
                            throw new ArgumentException("MUMmer supports only Sequence class");
                        }

                        MUMmer mummer = new MUMmer(seq);
                        stopWatchInterval.Stop();
                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine("Suffix tree construction time: {0}", stopWatchInterval.Elapsed);
                        }

                        mummer.LengthOfMUM = myArgs.Length;
                        mummer.NoAmbiguity = myArgs.NoAmbiguity;
                        long   querySeqCount  = 0;
                        double sumofSeqLength = 0;
                        if (myArgs.MaxMatch)
                        {
                            foreach (ISequence querySeq in querySequences)
                            {
                                stopWatchInterval.Restart();
                                IList <Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq));
                                if (mumList.Count != 0)
                                {
                                    mums = lis.GetLongestSequence(lis.SortMum(mumList));
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    WriteMums(mums, referenceSequence, querySeq, myArgs);
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                                else
                                {
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                            }

                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("             Number of query Sequences: {0}", querySeqCount);
                                Console.Error.WriteLine("             Average length of query Sequences: {0}", sumofSeqLength / querySeqCount);
                                Console.Error.WriteLine("  Compute GetMumsMaxMatch() with LIS time: {0}", mummerTime);
                            }
                        }
                        else if (myArgs.Mum)
                        {
                            foreach (ISequence querySeq in querySequences)
                            {
                                stopWatchInterval.Restart();
                                IList <Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq));
                                if (mumList.Count != 0)
                                {
                                    mums = lis.GetLongestSequence(lis.SortMum(mumList));
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    WriteMums(mums, referenceSequence, querySeq, myArgs);
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                                else
                                {
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                            }

                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("             Number of query Sequences: {0}", querySeqCount);
                                Console.Error.WriteLine("             Average length of query Sequences: {0}", sumofSeqLength / querySeqCount);
                                Console.Error.WriteLine("       Compute GetMumsMum() with LIS time: {0}", mummerTime);
                            }
                        }
                        else if (myArgs.Mumreference)
                        {
                            // NOTE:
                            //     mum3.GetMUMs() this really implements the GetMumReference() functionality
                            // mums = mum3.GetMumsReference( referenceSequences[0], querySequences);     // should be
                            foreach (ISequence querySeq in querySequences)
                            {
                                stopWatchInterval.Restart();
                                IList <Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq));
                                if (mumList.Count != 0)
                                {
                                    mums = lis.GetLongestSequence(lis.SortMum(mumList));
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();

                                    // do sort
                                    // WriteLongestIncreasingSubsequences
                                    WriteMums(mums, referenceSequence, querySeq, myArgs);
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                                else
                                {
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                            }

                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("             Number of query Sequences: {0}", querySeqCount);
                                Console.Error.WriteLine("             Average length of query Sequences: {0}", sumofSeqLength / querySeqCount);
                                Console.Error.WriteLine(" Compute GetMumsReference() time: {0}", mummerTime);
                            }
                        }
                        else
                        {
                            // cannot happen as argument processing already asserted one of the three options must be specified
                            Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified.");
                            Environment.Exit(-1);

                            // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize
                            // Environment.Exit() as a no-return function
                            throw new Exception("Never hit this");
                        }
                    }

                    if (myArgs.Verbose)
                    {
                        Console.Error.WriteLine("                WriteMums() time: {0}", writetime);
                    }

                    stopWatchMumUtil.Stop();
                    if (myArgs.Verbose)
                    {
                        Console.Error.WriteLine("           Total LisUtil Runtime: {0}", stopWatchMumUtil.Elapsed);
                    }
                }
                else
                {
                    Console.WriteLine(Resources.LisUtilHelp);
                }
            }
            catch (Exception ex)
            {
                DisplayException(ex);
            }
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            try
            {
                //            DateTime dStart = DateTime.Now;
                Stopwatch swMumUtil  = Stopwatch.StartNew();
                Stopwatch swInterval = new Stopwatch();

                Console.Error.WriteLine(SplashString());
                if (args.Length > 0)
                {
                    CommandLineOptions myArgs = ProcessCommandLine(args);
                    if (myArgs.help)
                    {
                        Console.WriteLine(Resources.MumUtilHelp);
                    }
                    else
                    {
                        FileInfo refFileinfo   = new FileInfo(myArgs.fileList[0]);
                        long     refFileLength = refFileinfo.Length;
                        refFileinfo = null;

                        swInterval.Restart();
                        IEnumerable <ISequence> referenceSequences = ParseFastA(myArgs.fileList[0]);
                        Sequence referenceSequence = referenceSequences.First() as Sequence;
                        swInterval.Stop();
                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.fileList[0]));
                            Console.Error.WriteLine("        Length of first Sequence: {0:#,000}", referenceSequence.Count);
                            Console.Error.WriteLine("            Read/Processing time: {0}", swInterval.Elapsed);
                            Console.Error.WriteLine("            File size           : {0:#,000} bytes", refFileLength);
                        }

                        FileInfo queryFileinfo   = new FileInfo(myArgs.fileList[1]);
                        long     queryFileLength = queryFileinfo.Length;
                        refFileinfo = null;

                        IEnumerable <ISequence> parsedQuerySequences = ParseFastA(myArgs.fileList[1]);

                        IEnumerable <ISequence> querySequences = parsedQuerySequences;

                        if (myArgs.reverseOnly)
                        {
                            // convert to reverse complement sequences
                            querySequences = ReverseComplementSequenceList(parsedQuerySequences);
                        }
                        else if (myArgs.both)
                        {
                            // add the reverse complement sequences along with query sequences.
                            querySequences = AddReverseComplementsToSequenceList(parsedQuerySequences);
                        }

                        // DISCUSSION:
                        // Three possible outputs desired.  Globally unique 'mum' (v1), unique in reference sequence (v2),
                        //   or get the maximum matches of length or greater.
                        //
                        mummerTime = new TimeSpan();
                        writetime  = new TimeSpan();
                        IEnumerable <Match> mums;
                        long memoryAtStart = 0;
                        long memoryAtEnd   = 0;
                        if (myArgs.verbose)
                        {
                            swMumUtil.Stop();
                            memoryAtStart = GC.GetTotalMemory(true);
                            swMumUtil.Start();
                        }

                        swInterval.Restart();
                        MultiWaySuffixTree suffixTreee = new MultiWaySuffixTree(referenceSequence);
                        swInterval.Stop();

                        if (myArgs.verbose)
                        {
                            swMumUtil.Stop();
                            memoryAtEnd = GC.GetTotalMemory(true);
                            swMumUtil.Start();
                        }

                        MUMmer mummer = new MUMmer(suffixTreee);

                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("Suffix tree construction time   : {0}", swInterval.Elapsed);
                            Console.Error.WriteLine("Memory consumed by Suffix tree  : {0:#,000}", memoryAtEnd - memoryAtStart);
                            Console.Error.WriteLine("Total edges created             : {0:#,000}", suffixTreee.EdgesCount);
                            Console.Error.WriteLine("Memory per edge                 : {0:#,000.00} bytes", (((double)(memoryAtEnd - memoryAtStart)) / suffixTreee.EdgesCount));
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("      Processed Query FastA file: {0}", Path.GetFullPath(myArgs.fileList[1]));
                            Console.Error.WriteLine("            File Size           : {0:#,000} bytes", queryFileLength);
                        }

                        mummer.LengthOfMUM = myArgs.length;
                        mummer.NoAmbiguity = myArgs.noAmbiguity;

                        long     querySeqCount  = 0;
                        double   sumofSeqLength = 0;
                        TimeSpan totalTimetakenToProcessQuerySequences = new TimeSpan();
                        string   outputOption = string.Empty;

                        if (myArgs.maxmatch)
                        {
                            outputOption = "GetMumsMaxMatch()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                mums = mummer.GetMatches(qSeq);

                                WriteMums(mums, referenceSequence, qSeq, myArgs);

                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else if (myArgs.mum)
                        {
                            // mums = mum3.GetMumsMum( referenceSequences[0], querySequences);
                            outputOption = "GetMumsMum()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                // TODO: After implementing GetMatchesUniqueInBothReferenceAndQuery() in MUMmer
                                ////       GetMatchesUniqueInReference() with GetMatchesUniqueInBothReferenceAndQuery() in the line below.
                                mums = mummer.GetMatchesUniqueInReference(qSeq);
                                swInterval.Stop();

                                // Add time taken by GetMatchesUniqueInBothReferenceAndQuery().
                                mummerTime = mummerTime.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                WriteMums(mums, referenceSequence, qSeq, myArgs);
                                swInterval.Stop();

                                // Add time taken by write matches.
                                writetime = writetime.Add(swInterval.Elapsed);

                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else if (myArgs.mumreference)
                        {
                            // NOTE:
                            //     mum3.GetMUMs() this really implements the GetMumReference() functionality
                            // mums = mum3.GetMumsReference( referenceSequences[0], querySequences);     // should be
                            //swInterval.Restart();
                            outputOption = "GetMumsReference()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                mums = mummer.GetMatchesUniqueInReference(qSeq);
                                swInterval.Stop();

                                // Add time taken by GetMatchesUniqueInReference().
                                mummerTime = mummerTime.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                WriteMums(mums, referenceSequence, qSeq, myArgs);
                                swInterval.Stop();

                                // Add time taken by write matches.
                                writetime = writetime.Add(swInterval.Elapsed);
                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else
                        {
                            // cannot happen as argument processing already asserted one of the three options must be specified
                            Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified.");
                            Environment.Exit(-1);
                            // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize
                            //   Environment.Exit() as a no-return function
                            throw new Exception("Never hit this");
                        }

                        if (myArgs.verbose)
                        {
                            if (myArgs.reverseOnly || myArgs.both)
                            {
                                Console.Error.WriteLine("        Read/Processing time          : {0}", timeTakenToParseQuerySequences);
                                Console.Error.WriteLine("     Reverse Complement time          : {0}", timeTakenToGetReverseComplement);
                                Console.Error.WriteLine("     Total time taken to Process reads: {0}", totalTimetakenToProcessQuerySequences);
                            }
                            else
                            {
                                Console.Error.WriteLine("        Read/Processing time          : {0}", totalTimetakenToProcessQuerySequences);
                            }

                            Console.Error.WriteLine();
                            Console.Error.WriteLine("         Number of query Sequences        : {0:#,000}", querySeqCount);
                            Console.Error.WriteLine("         Average length of query Sequences: {0:#,000}", sumofSeqLength / querySeqCount);
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("Compute {0,20} time              : {1}", outputOption, mummerTime);
                            Console.Error.WriteLine("                WriteMums() time          : {0}", writetime);
                        }

                        swMumUtil.Stop();
                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine("           Total MumUtil Runtime      : {0}", swMumUtil.Elapsed);
                        }
                    }
                }
                else
                {
                    Console.WriteLine(Resources.MumUtilHelp);
                }
            }
            catch (Exception ex)
            {
                DisplayException(ex);
            }
        }