Ejemplo n.º 1
0
        public void TestLISWithCrossAndOverlap()
        {
            // Create a list of Mum classes.
            List <Match> MUM = new List <Match>();
            Match        mum;

            mum = new Match();
            mum.ReferenceSequenceOffset = 0;
            mum.Length = 5;
            mum.QuerySequenceOffset = 5;
            MUM.Add(mum);

            mum = new Match();
            mum.ReferenceSequenceOffset = 3;
            mum.Length = 5;
            mum.QuerySequenceOffset = 0;
            MUM.Add(mum);

            // ILongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
            LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
            IList <Match> lisList            = lis.SortMum(MUM);
            IList <Match> lisList1           = lis.GetLongestSequence(lisList);

            List <Match> expectedOutput = new List <Match>();

            mum = new Match();
            mum.ReferenceSequenceOffset = 0;
            mum.Length = 5;
            mum.QuerySequenceOffset = 5;
            expectedOutput.Add(mum);

            Assert.IsTrue(CompareMumList(lisList1, expectedOutput));
        }
Ejemplo n.º 2
0
        public void TestLISWithCross1()
        {
            // Create a list of Mum classes.
            List <MaxUniqueMatch> MUM = new List <MaxUniqueMatch>();
            MaxUniqueMatch        mum = null;

            mum = new MaxUniqueMatch();
            mum.FirstSequenceStart    = 0;
            mum.FirstSequenceMumOrder = 1;
            mum.Length = 4;
            mum.SecondSequenceStart    = 4;
            mum.SecondSequenceMumOrder = 1;
            MUM.Add(mum);

            mum = new MaxUniqueMatch();
            mum.FirstSequenceStart    = 4;
            mum.FirstSequenceMumOrder = 2;
            mum.Length = 3;
            mum.SecondSequenceStart    = 0;
            mum.SecondSequenceMumOrder = 2;
            MUM.Add(mum);

            mum = new MaxUniqueMatch();
            mum.FirstSequenceStart    = 10;
            mum.FirstSequenceMumOrder = 3;
            mum.Length = 3;
            mum.SecondSequenceStart    = 10;
            mum.SecondSequenceMumOrder = 3;
            MUM.Add(mum);

            ILongestIncreasingSubsequence lis     = new LongestIncreasingSubsequence();
            IList <MaxUniqueMatch>        lisList = lis.GetLongestSequence(MUM);

            List <MaxUniqueMatch> expectedOutput = new List <MaxUniqueMatch>();

            mum = new MaxUniqueMatch();
            mum.FirstSequenceStart    = 0;
            mum.FirstSequenceMumOrder = 1;
            mum.Length = 4;
            mum.SecondSequenceStart    = 4;
            mum.SecondSequenceMumOrder = 1;
            expectedOutput.Add(mum);

            mum = new MaxUniqueMatch();
            mum.FirstSequenceStart    = 10;
            mum.FirstSequenceMumOrder = 3;
            mum.Length = 3;
            mum.SecondSequenceStart    = 10;
            mum.SecondSequenceMumOrder = 3;
            expectedOutput.Add(mum);

            Assert.IsTrue(this.CompareMumList(lisList, expectedOutput));
        }
Ejemplo n.º 3
0
        private static void Main(string[] args)
        {
            try
            {
                Stopwatch stopWatchMumUtil  = Stopwatch.StartNew();
                Stopwatch stopWatchInterval = new Stopwatch();
                Console.Error.WriteLine(SplashString());
                if (args.Length > 0)
                {
                    CommandLineOptions myArgs = ProcessCommandLine(args);

                    TimeSpan writetime = new TimeSpan();
                    LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
                    IEnumerable <Match>          mums;
                    if (myArgs.PerformLISOnly)
                    {
                        stopWatchInterval.Restart();
                        IList <Match> parsedMUMs = ParseMums(myArgs.FileList[0]);
                        stopWatchInterval.Stop();

                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed MUM file: {0}", Path.GetFullPath(myArgs.FileList[0]));
                            Console.Error.WriteLine("        Total MUMs: {0:#,000}", parsedMUMs.Count);
                            Console.Error.WriteLine("            Read/Processing time: {0}", stopWatchInterval.Elapsed);
                        }

                        stopWatchInterval.Restart();
                        IList <Match> sortedMUMs = lis.SortMum(parsedMUMs);
                        stopWatchInterval.Stop();

                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Sort MUM time: {0}", stopWatchInterval.Elapsed);
                        }

                        stopWatchInterval.Restart();
                        if (sortedMUMs.Count != 0)
                        {
                            mums = lis.GetLongestSequence(sortedMUMs);

                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Perform LIS time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchInterval.Restart();
                            WriteMums(mums);
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Write MUM time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchMumUtil.Stop();
                        }
                        else
                        {
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Perform LIS time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchInterval.Restart();
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine();
                                Console.Error.WriteLine("  Write MUM time: {0}", stopWatchInterval.Elapsed);
                            }

                            stopWatchMumUtil.Stop();
                        }
                    }
                    else
                    {
                        FileInfo refFileinfo   = new FileInfo(myArgs.FileList[0]);
                        long     refFileLength = refFileinfo.Length;
                        refFileinfo = null;

                        stopWatchInterval.Restart();
                        IEnumerable <ISequence> referenceSequences = ParseFastA(myArgs.FileList[0]);
                        ISequence referenceSequence = referenceSequences.First();
                        stopWatchInterval.Stop();
                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.FileList[0]));
                            Console.Error.WriteLine("        Length of first Sequence: {0:#,000}", referenceSequence.Count);
                            Console.Error.WriteLine("            Read/Processing time: {0}", stopWatchInterval.Elapsed);
                            Console.Error.WriteLine("            File Size           : {0}", refFileLength);
                        }

                        FileInfo queryFileinfo   = new FileInfo(myArgs.FileList[1]);
                        long     queryFileLength = queryFileinfo.Length;
                        refFileinfo = null;

                        stopWatchInterval.Restart();
                        IEnumerable <ISequence> querySequences = ParseFastA(myArgs.FileList[1]);
                        stopWatchInterval.Stop();
                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("      Processed Query FastA file: {0}", Path.GetFullPath(myArgs.FileList[1]));
                            Console.Error.WriteLine("            Read/Processing time: {0}", stopWatchInterval.Elapsed);
                            Console.Error.WriteLine("            File Size           : {0}", queryFileLength);
                        }

                        if (myArgs.ReverseOnly)
                        {
                            stopWatchInterval.Restart();
                            querySequences = ReverseComplementSequenceList(querySequences);
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("         Reverse Complement time: {0}", stopWatchInterval.Elapsed);
                            }
                        }
                        else if (myArgs.Both)
                        {   // add the reverse complement sequences to the query list too
                            stopWatchInterval.Restart();
                            querySequences = AddReverseComplementsToSequenceList(querySequences);
                            stopWatchInterval.Stop();
                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("     Add Reverse Complement time: {0}", stopWatchInterval.Elapsed);
                            }
                        }

                        TimeSpan mummerTime = new TimeSpan(0, 0, 0);
                        stopWatchInterval.Restart();
                        Sequence seq = referenceSequence as Sequence;
                        if (seq == null)
                        {
                            throw new ArgumentException("MUMmer supports only Sequence class");
                        }

                        MUMmer mummer = new MUMmer(seq);
                        stopWatchInterval.Stop();
                        if (myArgs.Verbose)
                        {
                            Console.Error.WriteLine("Suffix tree construction time: {0}", stopWatchInterval.Elapsed);
                        }

                        mummer.LengthOfMUM = myArgs.Length;
                        mummer.NoAmbiguity = myArgs.NoAmbiguity;
                        long   querySeqCount  = 0;
                        double sumofSeqLength = 0;
                        if (myArgs.MaxMatch)
                        {
                            foreach (ISequence querySeq in querySequences)
                            {
                                stopWatchInterval.Restart();
                                IList <Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq));
                                if (mumList.Count != 0)
                                {
                                    mums = lis.GetLongestSequence(lis.SortMum(mumList));
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    WriteMums(mums, referenceSequence, querySeq, myArgs);
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                                else
                                {
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                            }

                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("             Number of query Sequences: {0}", querySeqCount);
                                Console.Error.WriteLine("             Average length of query Sequences: {0}", sumofSeqLength / querySeqCount);
                                Console.Error.WriteLine("  Compute GetMumsMaxMatch() with LIS time: {0}", mummerTime);
                            }
                        }
                        else if (myArgs.Mum)
                        {
                            foreach (ISequence querySeq in querySequences)
                            {
                                stopWatchInterval.Restart();
                                IList <Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq));
                                if (mumList.Count != 0)
                                {
                                    mums = lis.GetLongestSequence(lis.SortMum(mumList));
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    WriteMums(mums, referenceSequence, querySeq, myArgs);
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                                else
                                {
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                            }

                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("             Number of query Sequences: {0}", querySeqCount);
                                Console.Error.WriteLine("             Average length of query Sequences: {0}", sumofSeqLength / querySeqCount);
                                Console.Error.WriteLine("       Compute GetMumsMum() with LIS time: {0}", mummerTime);
                            }
                        }
                        else if (myArgs.Mumreference)
                        {
                            // NOTE:
                            //     mum3.GetMUMs() this really implements the GetMumReference() functionality
                            // mums = mum3.GetMumsReference( referenceSequences[0], querySequences);     // should be
                            foreach (ISequence querySeq in querySequences)
                            {
                                stopWatchInterval.Restart();
                                IList <Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq));
                                if (mumList.Count != 0)
                                {
                                    mums = lis.GetLongestSequence(lis.SortMum(mumList));
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();

                                    // do sort
                                    // WriteLongestIncreasingSubsequences
                                    WriteMums(mums, referenceSequence, querySeq, myArgs);
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                                else
                                {
                                    stopWatchInterval.Stop();
                                    mummerTime = mummerTime.Add(stopWatchInterval.Elapsed);
                                    stopWatchInterval.Restart();
                                    stopWatchInterval.Stop();
                                    writetime = writetime.Add(stopWatchInterval.Elapsed);
                                    querySeqCount++;
                                    sumofSeqLength += querySeq.Count;
                                }
                            }

                            if (myArgs.Verbose)
                            {
                                Console.Error.WriteLine("             Number of query Sequences: {0}", querySeqCount);
                                Console.Error.WriteLine("             Average length of query Sequences: {0}", sumofSeqLength / querySeqCount);
                                Console.Error.WriteLine(" Compute GetMumsReference() time: {0}", mummerTime);
                            }
                        }
                        else
                        {
                            // cannot happen as argument processing already asserted one of the three options must be specified
                            Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified.");
                            Environment.Exit(-1);

                            // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize
                            // Environment.Exit() as a no-return function
                            throw new Exception("Never hit this");
                        }
                    }

                    if (myArgs.Verbose)
                    {
                        Console.Error.WriteLine("                WriteMums() time: {0}", writetime);
                    }

                    stopWatchMumUtil.Stop();
                    if (myArgs.Verbose)
                    {
                        Console.Error.WriteLine("           Total LisUtil Runtime: {0}", stopWatchMumUtil.Elapsed);
                    }
                }
                else
                {
                    Console.WriteLine(Resources.LisUtilHelp);
                }
            }
            catch (Exception ex)
            {
                DisplayException(ex);
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method. Store generated MUMs in properties MUMs, SortedMUMs.
        /// Alignment first finds MUMs for all the query sequence, and then
        /// runs pairwise algorithm on gaps to produce alignments.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        /// <param name="querySequenceList">List of input sequences.</param>
        /// <returns>A list of sequence alignments.</returns>
        private IList <IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs(
            ISequence referenceSequence,
            IEnumerable <ISequence> querySequenceList)
        {
            // Get MUMs
            this.mums = new Dictionary <ISequence, IEnumerable <Match> >();
            IList <IPairwiseSequenceAlignment> results   = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         alignment = null;
            IEnumerable <Match> mum;

            if (this.Validate(referenceSequence, querySequenceList))
            {
                // Safety check for public methods to ensure that null
                // inputs are handled.
                if (referenceSequence == null || querySequenceList == null)
                {
                    return(null);
                }

                Sequence seq = referenceSequence as Sequence;
                if (seq == null)
                {
                    throw new ArgumentException(Properties.Resource.OnlySequenceClassSupported);
                }

                MUMmer mummer = new MUMmer(seq);
                mummer.LengthOfMUM = this.LengthOfMUM;
                mummer.NoAmbiguity = this.AmbigiousMatchesAllowed;
                foreach (ISequence sequence in querySequenceList)
                {
                    if (sequence.Equals(referenceSequence))
                    {
                        continue;
                    }

                    alignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                    // Step2 : streaming process is performed with the query sequence
                    if (this.MaximumMatchEnabled)
                    {
                        mum = mummer.GetMatches(sequence);
                    }
                    else
                    {
                        mum = mummer.GetMatchesUniqueInReference(sequence);
                    }

                    this.mums.Add(sequence, mum);

                    // Step3(a) : sorted mum list based on reference sequence
                    LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
                    IList <Match> sortedMumList      = lis.SortMum(GetMumsForLIS(mum));

                    if (sortedMumList.Count > 0)
                    {
                        // Step3(b) : LIS using greedy cover algorithm
                        IList <Match> finalMumList = lis.GetLongestSequence(sortedMumList);

                        if (finalMumList.Count > 0)
                        {
                            // Step 4 : get all the gaps in each sequence and call
                            // pairwise alignment
                            alignment.PairwiseAlignedSequences.Add(
                                this.ProcessGaps(referenceSequence, sequence, finalMumList));
                        }

                        results.Add(alignment);
                    }
                    else
                    {
                        IList <IPairwiseSequenceAlignment> sequenceAlignment = this.RunPairWise(
                            referenceSequence,
                            sequence);

                        foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                        {
                            results.Add(pairwiseAlignment);
                        }
                    }
                }
            }

            return(results);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Validates most of the find matches suffix tree test cases with varying parameters.
        /// </summary>
        /// <param name="nodeName">Node name which needs to be read for execution.</param>
        /// <param name="isFilePath">Is File Path?</param>
        /// <param name="LISActionType">LIS action type enum</param>
        static void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
                                                            LISParameters LISActionType)
        {
            ISequence referenceSeq      = null;
            ISequence querySeq          = null;
            string    referenceSequence = string.Empty;
            string    querySequence     = string.Empty;

            if (isFilePath)
            {
                // Gets the reference sequence from the configurtion file
                string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastaParser       parser        = new FastaParser();
                IList <ISequence> referenceSeqs = parser.Parse(filePath);
                referenceSeq      = referenceSeqs[0];
                referenceSequence = referenceSeq.ToString();

                // Gets the reference sequence from the configurtion file
                string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastaParser       queryParser = new FastaParser();
                IList <ISequence> querySeqs   = queryParser.Parse(queryFilePath);
                querySeq      = querySeqs[0];
                querySequence = querySeq.ToString();
            }
            else
            {
                // Gets the reference sequence from the configurtion file
                referenceSequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.SequenceNode);

                string seqAlp = Utility._xmlUtil.GetTextValue(nodeName,
                                                              Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(seqAlp),
                                            referenceSequence);

                querySequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                              Constants.SearchSequenceNode);

                seqAlp = Utility._xmlUtil.GetTextValue(nodeName,
                                                       Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(seqAlp),
                                        querySequence);
            }

            string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.
            ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder();
            SequenceSuffixTree suffixTree        = suffixTreeBuilder.BuildSuffixTree(referenceSeq);

            IList <MaxUniqueMatch> matches = suffixTreeBuilder.FindMatches(suffixTree, querySeq,
                                                                           long.Parse(mumLength, null));

            switch (LISActionType)
            {
            case LISParameters.FindUniqueMatches:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches");
                Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, LISActionType));
                break;

            case LISParameters.PerformLIS:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches using LIS");
                LongestIncreasingSubsequence lisObj     = new LongestIncreasingSubsequence();
                IList <MaxUniqueMatch>       lisMatches = lisObj.GetLongestSequence(matches);
                Assert.IsTrue(ValidateUniqueMatches(lisMatches, nodeName, LISActionType));
                break;

            default:
                break;
            }

            Console.WriteLine(string.Format(null,
                                            "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                                            referenceSequence, querySequence));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                                                   referenceSequence, querySequence));
        }