public void TestLISWithoutCrossAndOverlap() { // Create a list of Mum classes. List<Match> MUM = new List<Match>(); Match mum; mum = new Match(); mum.ReferenceSequenceOffset = 0; mum.Length = 3; mum.QuerySequenceOffset = 0; MUM.Add(mum); mum = new Match(); mum.ReferenceSequenceOffset = 4; mum.Length = 3; mum.QuerySequenceOffset = 3; MUM.Add(mum); // ILongestIncreasingSubsequence lis = new LongestIncreasingSubsequence(); LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence(); IList<Match> lisList = lis.SortMum(MUM); IList<Match> lisList1 = lis.GetLongestSequence(lisList); List<Match> expectedOutput = new List<Match>(); mum = new Match(); mum.ReferenceSequenceOffset = 0; mum.Length = 3; mum.QuerySequenceOffset = 0; expectedOutput.Add(mum); mum = new Match(); mum.ReferenceSequenceOffset = 4; mum.Length = 3; mum.QuerySequenceOffset = 3; expectedOutput.Add(mum); Assert.IsTrue(CompareMumList(lisList1, expectedOutput)); }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. Store generated MUMs in properties MUMs, SortedMUMs. /// Alignment first finds MUMs for all the query sequence, and then /// runs pairwise algorithm on gaps to produce alignments. /// </summary> /// <param name="referenceSequence">Reference sequence.</param> /// <param name="querySequenceList">List of input sequences.</param> /// <returns>A list of sequence alignments.</returns> private IList<IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs( ISequence referenceSequence, IEnumerable<ISequence> querySequenceList) { // Get MUMs this.mums = new Dictionary<ISequence, IEnumerable<Match>>(); IList<IPairwiseSequenceAlignment> results = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment alignment = null; IEnumerable<Match> mum; if (this.Validate(referenceSequence, querySequenceList)) { // Safety check for public methods to ensure that null // inputs are handled. if (referenceSequence == null || querySequenceList == null) { return null; } Sequence seq = referenceSequence as Sequence; if (seq == null) { throw new ArgumentException(Properties.Resource.OnlySequenceClassSupported); } MUMmer mummer = new MUMmer(seq); mummer.LengthOfMUM = this.LengthOfMUM; mummer.NoAmbiguity = this.AmbigiousMatchesAllowed; foreach (ISequence sequence in querySequenceList) { if (sequence.Equals(referenceSequence)) { continue; } alignment = new PairwiseSequenceAlignment(referenceSequence, sequence); // Step2 : streaming process is performed with the query sequence if (this.MaximumMatchEnabled) { mum = mummer.GetMatches(sequence); } else { mum = mummer.GetMatchesUniqueInReference(sequence); } this.mums.Add(sequence, mum); // Step3(a) : sorted mum list based on reference sequence LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence(); IList<Match> sortedMumList = lis.SortMum(GetMumsForLIS(mum)); if (sortedMumList.Count > 0) { // Step3(b) : LIS using greedy cover algorithm IList<Match> finalMumList = lis.GetLongestSequence(sortedMumList); if (finalMumList.Count > 0) { // Step 4 : get all the gaps in each sequence and call // pairwise alignment alignment.PairwiseAlignedSequences.Add( this.ProcessGaps(referenceSequence, sequence, finalMumList)); } results.Add(alignment); } else { IList<IPairwiseSequenceAlignment> sequenceAlignment = this.RunPairWise( referenceSequence, sequence); foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment) { results.Add(pairwiseAlignment); } } } } return results; }
/// <summary> /// Validates Longest Increasing sequences. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is File Path?</param> void ValidateLongestIncreasingSubsequenceTestCases(string nodeName, bool isFilePath) { ISequence referenceSeq = null; ISequence querySeq = null; string referenceSequence = string.Empty; string querySequence = string.Empty; IEnumerable<ISequence> referenceSeqs = null; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastAParser parser = new FastAParser(); referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs.ElementAt(0); referenceSequence = new string(referenceSeq.Select(a => (char)a).ToArray()); // Gets the reference sequence from the configuration file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); IEnumerable<ISequence> querySeqs = null; querySeqs = parser.Parse(queryFilePath); querySeq = querySeqs.ElementAt(0); querySequence = new string(querySeq.Select(a => (char)a).ToArray()); } else { // Gets the reference sequence from the configuration file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(seqAlp), referenceSequence); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(seqAlp), querySequence); } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); IEnumerable<Match> matches; Bio.Algorithms.MUMmer.MUMmer mum = new Bio.Algorithms.MUMmer.MUMmer(referenceSeq as Sequence); mum.LengthOfMUM = long.Parse(mumLength); matches = mum.GetMatchesUniqueInReference(querySeq); // Validates the Unique Matches. ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches using LIS"); LongestIncreasingSubsequence lisObj = new LongestIncreasingSubsequence(); List<Match> listMatch = new List<Match>(); foreach (Match mtch in matches) { listMatch.Add(mtch); } IList<Match> lisSorted = null, actualLis = null; lisSorted = lisObj.SortMum(listMatch); actualLis = lisObj.GetLongestSequence(lisSorted); Assert.IsTrue(this.ValidateUniqueMatches(actualLis, nodeName, LISParameters.PerformLIS)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); }
private static void Main(string[] args) { try { Stopwatch stopWatchMumUtil = Stopwatch.StartNew(); Stopwatch stopWatchInterval = new Stopwatch(); Console.Error.WriteLine(SplashString()); if (args.Length > 0) { CommandLineOptions myArgs = ProcessCommandLine(args); TimeSpan writetime = new TimeSpan(); LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence(); IEnumerable<Match> mums; if (myArgs.PerformLISOnly) { stopWatchInterval.Restart(); IList<Match> parsedMUMs = ParseMums(myArgs.FileList[0]); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed MUM file: {0}", Path.GetFullPath(myArgs.FileList[0])); Console.Error.WriteLine(" Total MUMs: {0:#,000}", parsedMUMs.Count); Console.Error.WriteLine(" Read/Processing time: {0}", stopWatchInterval.Elapsed); } stopWatchInterval.Restart(); IList<Match> sortedMUMs = lis.SortMum(parsedMUMs); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Sort MUM time: {0}", stopWatchInterval.Elapsed); } stopWatchInterval.Restart(); if (sortedMUMs.Count != 0) { mums = lis.GetLongestSequence(sortedMUMs); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Perform LIS time: {0}", stopWatchInterval.Elapsed); } stopWatchInterval.Restart(); WriteMums(mums); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Write MUM time: {0}", stopWatchInterval.Elapsed); } stopWatchMumUtil.Stop(); } else { stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Perform LIS time: {0}", stopWatchInterval.Elapsed); } stopWatchInterval.Restart(); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Write MUM time: {0}", stopWatchInterval.Elapsed); } stopWatchMumUtil.Stop(); } } else { FileInfo refFileinfo = new FileInfo(myArgs.FileList[0]); long refFileLength = refFileinfo.Length; refFileinfo = null; stopWatchInterval.Restart(); IEnumerable<ISequence> referenceSequences = ParseFastA(myArgs.FileList[0]); ISequence referenceSequence = referenceSequences.First(); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.FileList[0])); Console.Error.WriteLine(" Length of first Sequence: {0:#,000}", referenceSequence.Count); Console.Error.WriteLine(" Read/Processing time: {0}", stopWatchInterval.Elapsed); Console.Error.WriteLine(" File Size : {0}", refFileLength); } FileInfo queryFileinfo = new FileInfo(myArgs.FileList[1]); long queryFileLength = queryFileinfo.Length; refFileinfo = null; stopWatchInterval.Restart(); IEnumerable<ISequence> querySequences = ParseFastA(myArgs.FileList[1]); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Query FastA file: {0}", Path.GetFullPath(myArgs.FileList[1])); Console.Error.WriteLine(" Read/Processing time: {0}", stopWatchInterval.Elapsed); Console.Error.WriteLine(" File Size : {0}", queryFileLength); } if (myArgs.ReverseOnly) { stopWatchInterval.Restart(); querySequences = ReverseComplementSequenceList(querySequences); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(" Reverse Complement time: {0}", stopWatchInterval.Elapsed); } } else if (myArgs.Both) { // add the reverse complement sequences to the query list too stopWatchInterval.Restart(); querySequences = AddReverseComplementsToSequenceList(querySequences); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(" Add Reverse Complement time: {0}", stopWatchInterval.Elapsed); } } TimeSpan mummerTime = new TimeSpan(0, 0, 0); stopWatchInterval.Restart(); Sequence seq = referenceSequence as Sequence; if (seq == null) { throw new ArgumentException("MUMmer supports only Sequence class"); } MUMmer mummer = new MUMmer(seq); stopWatchInterval.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine("Suffix tree construction time: {0}", stopWatchInterval.Elapsed); } mummer.LengthOfMUM = myArgs.Length; mummer.NoAmbiguity = myArgs.NoAmbiguity; long querySeqCount = 0; double sumofSeqLength = 0; if (myArgs.MaxMatch) { foreach (ISequence querySeq in querySequences) { stopWatchInterval.Restart(); IList<Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq)); if (mumList.Count != 0) { mums = lis.GetLongestSequence(lis.SortMum(mumList)); stopWatchInterval.Stop(); mummerTime = mummerTime.Add(stopWatchInterval.Elapsed); stopWatchInterval.Restart(); WriteMums(mums, referenceSequence, querySeq, myArgs); stopWatchInterval.Stop(); writetime = writetime.Add(stopWatchInterval.Elapsed); querySeqCount++; sumofSeqLength += querySeq.Count; } else { stopWatchInterval.Stop(); mummerTime = mummerTime.Add(stopWatchInterval.Elapsed); stopWatchInterval.Restart(); stopWatchInterval.Stop(); writetime = writetime.Add(stopWatchInterval.Elapsed); querySeqCount++; sumofSeqLength += querySeq.Count; } } if (myArgs.Verbose) { Console.Error.WriteLine(" Number of query Sequences: {0}", querySeqCount); Console.Error.WriteLine(" Average length of query Sequences: {0}", sumofSeqLength / querySeqCount); Console.Error.WriteLine(" Compute GetMumsMaxMatch() with LIS time: {0}", mummerTime); } } else if (myArgs.Mum) { foreach (ISequence querySeq in querySequences) { stopWatchInterval.Restart(); IList<Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq)); if (mumList.Count != 0) { mums = lis.GetLongestSequence(lis.SortMum(mumList)); stopWatchInterval.Stop(); mummerTime = mummerTime.Add(stopWatchInterval.Elapsed); stopWatchInterval.Restart(); WriteMums(mums, referenceSequence, querySeq, myArgs); stopWatchInterval.Stop(); writetime = writetime.Add(stopWatchInterval.Elapsed); querySeqCount++; sumofSeqLength += querySeq.Count; } else { stopWatchInterval.Stop(); mummerTime = mummerTime.Add(stopWatchInterval.Elapsed); stopWatchInterval.Restart(); stopWatchInterval.Stop(); writetime = writetime.Add(stopWatchInterval.Elapsed); querySeqCount++; sumofSeqLength += querySeq.Count; } } if (myArgs.Verbose) { Console.Error.WriteLine(" Number of query Sequences: {0}", querySeqCount); Console.Error.WriteLine(" Average length of query Sequences: {0}", sumofSeqLength / querySeqCount); Console.Error.WriteLine(" Compute GetMumsMum() with LIS time: {0}", mummerTime); } } else if (myArgs.Mumreference) { // NOTE: // mum3.GetMUMs() this really implements the GetMumReference() functionality // mums = mum3.GetMumsReference( referenceSequences[0], querySequences); // should be foreach (ISequence querySeq in querySequences) { stopWatchInterval.Restart(); IList<Match> mumList = GetMumsForLIS(mummer.GetMatchesUniqueInReference(querySeq)); if (mumList.Count != 0) { mums = lis.GetLongestSequence(lis.SortMum(mumList)); stopWatchInterval.Stop(); mummerTime = mummerTime.Add(stopWatchInterval.Elapsed); stopWatchInterval.Restart(); // do sort // WriteLongestIncreasingSubsequences WriteMums(mums, referenceSequence, querySeq, myArgs); stopWatchInterval.Stop(); writetime = writetime.Add(stopWatchInterval.Elapsed); querySeqCount++; sumofSeqLength += querySeq.Count; } else { stopWatchInterval.Stop(); mummerTime = mummerTime.Add(stopWatchInterval.Elapsed); stopWatchInterval.Restart(); stopWatchInterval.Stop(); writetime = writetime.Add(stopWatchInterval.Elapsed); querySeqCount++; sumofSeqLength += querySeq.Count; } } if (myArgs.Verbose) { Console.Error.WriteLine(" Number of query Sequences: {0}", querySeqCount); Console.Error.WriteLine(" Average length of query Sequences: {0}", sumofSeqLength / querySeqCount); Console.Error.WriteLine(" Compute GetMumsReference() time: {0}", mummerTime); } } else { // cannot happen as argument processing already asserted one of the three options must be specified Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified."); Environment.Exit(-1); // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize // Environment.Exit() as a no-return function throw new Exception("Never hit this"); } } if (myArgs.Verbose) { Console.Error.WriteLine(" WriteMums() time: {0}", writetime); } stopWatchMumUtil.Stop(); if (myArgs.Verbose) { Console.Error.WriteLine(" Total LisUtil Runtime: {0}", stopWatchMumUtil.Elapsed); } } else { Console.WriteLine(Resources.LisUtilHelp); } } catch (Exception ex) { DisplayException(ex); } }