Example #1
0
        public void ValidateEdgeCount()
        {
            string dnaSequence = "ATGCA";
            Sequence sequence = new Sequence(Alphabets.DNA, dnaSequence);
            MultiWaySuffixTree dnaSuffixTree = new MultiWaySuffixTree(sequence);
            Assert.AreEqual(8, dnaSuffixTree.EdgesCount);
            ApplicationLog.WriteLine(@"MUMmer BVT : Validation of edge
                        count for a Dna sequence completed successfully");

            string ambiguousDnasequence = "RSVTW";

            sequence = new Sequence(AmbiguousDnaAlphabet.Instance, ambiguousDnasequence);
            MultiWaySuffixTree ambiguousDnaSuffixTree = new MultiWaySuffixTree(sequence);
            Assert.AreEqual(7, ambiguousDnaSuffixTree.EdgesCount);
            ApplicationLog.WriteLine(@"MUMmer BVT : Validation of edge
                        count for a Ambiguous Dna sequence completed successfully");
        }
Example #2
0
        private void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
                                                             AdditionalParameters additionalParam,
                                                             PropertyParameters propParam)
        {
            ISequence referenceSeq;
            var searchSeqList = new List<ISequence>();

            if (isFilePath)
            {
                // Gets the reference sequence from the FastA file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", filePath));

                var parser = new FastAParser();
                IEnumerable<ISequence> referenceSeqList = parser.Parse(filePath);

                var byteList = new List<Byte>();
                foreach (ISequence seq in referenceSeqList)
                {
                    byteList.AddRange(seq);
                    byteList.Add((byte) '+');
                }

                referenceSeq = new Sequence(referenceSeqList.First().Alphabet.GetMummerAlphabet(),
                                            byteList.ToArray());

                // Gets the query sequence from the FastA file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", queryFilePath));

                var queryParserObj = new FastAParser();
                IEnumerable<ISequence> querySeqList = queryParserObj.Parse(queryFilePath);
                searchSeqList.AddRange(querySeqList);
            }
            else
            {
                // Gets the reference & search sequences from the configuration file
                string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode);
                string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode);
                IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

                var refSeqList = referenceSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))).Cast<ISequence>().ToList();

                var byteList = new List<Byte>();
                foreach (ISequence seq in refSeqList)
                {
                    byteList.AddRange(seq);
                    byteList.Add((byte) '+');
                }

                referenceSeq = new Sequence(refSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray());
                searchSeqList.AddRange(searchSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))));
            }

            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.           
            var suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence)
                                    {
                                        MinLengthOfMatch =
                                            long.Parse(mumLength, null)
                                    };
            var matches = searchSeqList.ToDictionary(t => t, suffixTreeBuilder.SearchMatchesUniqueInReference);

            var mums = new List<Match>();
            foreach (var a in matches.Values)
            {
                mums.AddRange(a);
            }

            switch (additionalParam)
            {
                case AdditionalParameters.FindUniqueMatches:
                    // Validates the Unique Matches.
                    ApplicationLog.WriteLine("NUCmer P1 : Validating the Unique Matches");
                    Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, isFilePath));
                    ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences.");
                    break;
                case AdditionalParameters.PerformClusterBuilder:
                    // Validates the Unique Matches.
                    ApplicationLog.WriteLine(
                        "NUCmer P1 : Validating the Unique Matches using Cluster Builder");
                    Assert.IsTrue(this.ValidateClusterBuilderMatches(mums, nodeName, propParam));
                    ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the cluster builder matches for the sequences.");
                    break;
                default:
                    break;
            }


            ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences.");
        }
Example #3
0
        void ValidateFindMatchSimpleSuffixGeneralTestCases(string nodeName, bool isFilePath)
        {
            ISequence referenceSeq = null;
            ISequence querySeq = null;
            string referenceSequence = string.Empty;
            string querySequence = string.Empty;
            IEnumerable<ISequence> referenceSeqs = null;

            if (isFilePath)
            {
                // Gets the reference sequence from the configurtion file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastAParser parser = new FastAParser();
                referenceSeqs = parser.Parse(filePath);
                referenceSeq = referenceSeqs.ElementAt(0);
                referenceSequence = new string(referenceSeq.Select(a => (char)a).ToArray());

                // Gets the reference sequence from the configurtion file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                IEnumerable<ISequence> querySeqs = null;
                querySeqs = parser.Parse(queryFilePath);
                querySeq = querySeqs.ElementAt(0);
                querySequence = new string(querySeq.Select(a => (char)a).ToArray());
            }
            else
            {
                // Gets the reference sequence from the configuration file
                referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SequenceNode);

                string seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(seqAlp),
                    this.encodingObj.GetBytes(referenceSequence));

                querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SearchSequenceNode);

                seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(seqAlp),
                    this.encodingObj.GetBytes(querySequence));
            }

            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.

            MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence);
            suffixTreeBuilder.MinLengthOfMatch = long.Parse(mumLength, null);
            IEnumerable<Match> matches = null;
            matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeq);

            // Validates the Unique Matches.
            ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches");
            Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName, LISParameters.FindUniqueMatches));

            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                referenceSequence, querySequence));
        }
Example #4
0
        /// <summary>
        /// Validates SearchMatch() with different inputs.
        /// </summary>
        /// <param name="nodeName">Parent Node from Xml.</param>
        void ValidateSearchMatch(string nodeName)
        {
            string referenceSequence = string.Empty;
            string querySequence = string.Empty;
            string seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.AlphabetNameNode);

            // Gets the reference sequence from the configurtion file
            referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.SequenceNode);

            querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.SearchSequenceNode);

            seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.SearchSequenceAlphabetNode);

            IEnumerable<Match> matches = null;
            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);
            Sequence referenceSequenceForMatches = new Sequence(Utility.GetAlphabet(seqAlp), referenceSequence);
            MultiWaySuffixTree suffixTree = new MultiWaySuffixTree(referenceSequenceForMatches);
            suffixTree.MinLengthOfMatch = long.Parse(mumLength, null);
            Sequence querySequenceForMatches = new Sequence(Utility.GetAlphabet(seqAlp), querySequence);
            matches = suffixTree.SearchMatches(querySequenceForMatches);
            // Validates the Unique Matches.
            ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches");
            Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName, LISParameters.FindUniqueMatches));

            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                referenceSequence, querySequence));
        }
Example #5
0
        /// <summary>
        /// Validates most of the find matches suffix tree test cases with varying parameters.
        /// </summary>
        /// <param name="nodeName">Node name which needs to be read for execution.</param>
        /// <param name="isFilePath">Is File Path?</param>
        /// <param name="additionalParam">LIS action type enum</param>
        void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
            AdditionalParameters additionalParam)
        {
            ISequence referenceSeqs;
            var searchSeqList = new List<ISequence>();

            if (isFilePath)
            {
                // Gets the reference sequence from the FastA file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                
                FastAParser parser = new FastAParser();
                IEnumerable<ISequence> referenceSeqList = parser.Parse(filePath);
                List<Byte> byteList = new List<Byte>();
                foreach (ISequence seq in referenceSeqList)
                {
                    byteList.AddRange(seq);
                    byteList.Add((byte)'+');
                }
                referenceSeqs = new Sequence(referenceSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray());

                // Gets the query sequence from the FastA file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);

                IEnumerable<ISequence> querySeqList = parser.Parse(queryFilePath);
                searchSeqList.AddRange(querySeqList);
            }
            else
            {
                // Gets the reference & search sequences from the configuration file
                string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName,
                    Constants.ReferenceSequencesNode);
                string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName,
                    Constants.SearchSequencesNode);

                IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                       Constants.AlphabetNameNode));

                List<ISequence> refSeqList = referenceSequences.Select(t => new Sequence(seqAlphabet, this.encodingObj.GetBytes(t))).Cast<ISequence>().ToList();

                List<Byte> byteListQuery = new List<Byte>();
                foreach (ISequence seq in refSeqList)
                {
                    byteListQuery.AddRange(seq);
                    byteListQuery.Add((byte)'+');
                }
                referenceSeqs = new Sequence(refSeqList.First().Alphabet.GetMummerAlphabet(),
                    byteListQuery.ToArray());

                searchSeqList.AddRange(searchSequences.Select(t => new Sequence(seqAlphabet, this.encodingObj.GetBytes(t))).Cast<ISequence>());
            }

            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.           
            MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeqs as Sequence)
                {
                    MinLengthOfMatch = long.Parse(mumLength, null)
                };

            var matches = new Dictionary<ISequence, IEnumerable<Match>>();
            foreach (ISequence sequence in searchSeqList)
            {
                matches.Add(sequence,
                    suffixTreeBuilder.SearchMatchesUniqueInReference(sequence));
            }

            List<Match> mums = new List<Match>();
            foreach (var a in matches.Values)
            {
                mums.AddRange(a);
            }

            switch (additionalParam)
            {
                case AdditionalParameters.FindUniqueMatches:
                    // Validates the Unique Matches.
                    Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath));
                    break;
                case AdditionalParameters.PerformClusterBuilder:
                    // Validates the Unique Matches.
                    Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath));
                    break;
                default:
                    break;
            }
        }
Example #6
0
        static void Main(string[] args)
        {
            try
            {
                //            DateTime dStart = DateTime.Now;
                Stopwatch swMumUtil = Stopwatch.StartNew();
                Stopwatch swInterval = new Stopwatch();
                
                Console.Error.WriteLine(SplashString());
                if (args.Length > 0)
                {
                    CommandLineOptions myArgs = ProcessCommandLine(args);
                    if (myArgs.help)
                    {
                        Console.WriteLine(Resources.MumUtilHelp);
                    }
                    else
                    {
                        FileInfo refFileinfo = new FileInfo(myArgs.fileList[0]);
                        long refFileLength = refFileinfo.Length;
                        refFileinfo = null;

                        swInterval.Restart();
                        IEnumerable<ISequence> referenceSequences = ParseFastA(myArgs.fileList[0]);
                        Sequence referenceSequence = referenceSequences.First() as Sequence;
                        swInterval.Stop();
                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.fileList[0]));
                            Console.Error.WriteLine("        Length of first Sequence: {0:#,000}", referenceSequence.Count);
                            Console.Error.WriteLine("            Read/Processing time: {0}", swInterval.Elapsed);
                            Console.Error.WriteLine("            File size           : {0:#,000} bytes", refFileLength);
                        }

                        FileInfo queryFileinfo = new FileInfo(myArgs.fileList[1]);
                        long queryFileLength = queryFileinfo.Length;
                        refFileinfo = null;

                        IEnumerable<ISequence> parsedQuerySequences = ParseFastA(myArgs.fileList[1]);

                        IEnumerable<ISequence> querySequences = parsedQuerySequences;

                        if (myArgs.reverseOnly)
                        {
                            // convert to reverse complement sequences
                            querySequences = ReverseComplementSequenceList(parsedQuerySequences);
                        }
                        else if (myArgs.both)
                        {
                            // add the reverse complement sequences along with query sequences.
                            querySequences = AddReverseComplementsToSequenceList(parsedQuerySequences);
                        }

                        // DISCUSSION:
                        // Three possible outputs desired.  Globally unique 'mum' (v1), unique in reference sequence (v2), 
                        //   or get the maximum matches of length or greater.  
                        //
                        mummerTime = new TimeSpan();
                        writetime = new TimeSpan();
                        IEnumerable<Match> mums;
                        long memoryAtStart = 0;
                        long memoryAtEnd = 0;
                        if (myArgs.verbose)
                        {
                            swMumUtil.Stop();
                            memoryAtStart = GC.GetTotalMemory(true);
                            swMumUtil.Start();
                        }

                        swInterval.Restart();
                        MultiWaySuffixTree suffixTreee = new MultiWaySuffixTree(referenceSequence);
                        swInterval.Stop();

                        if (myArgs.verbose)
                        {
                            swMumUtil.Stop();
                            memoryAtEnd = GC.GetTotalMemory(true);
                            swMumUtil.Start();
                        }

                        MUMmer mummer = new MUMmer(suffixTreee);

                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("Suffix tree construction time   : {0}", swInterval.Elapsed);
                            Console.Error.WriteLine("Memory consumed by Suffix tree  : {0:#,000}", memoryAtEnd - memoryAtStart);
                            Console.Error.WriteLine("Total edges created             : {0:#,000}", suffixTreee.EdgesCount);
                            Console.Error.WriteLine("Memory per edge                 : {0:#,000.00} bytes", (((double)(memoryAtEnd - memoryAtStart)) / suffixTreee.EdgesCount));
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("      Processed Query FastA file: {0}", Path.GetFullPath(myArgs.fileList[1]));
                            Console.Error.WriteLine("            File Size           : {0:#,000} bytes", queryFileLength);
                        }

                        mummer.LengthOfMUM = myArgs.length;
                        mummer.NoAmbiguity = myArgs.noAmbiguity;

                        long querySeqCount = 0;
                        double sumofSeqLength = 0;
                        TimeSpan totalTimetakenToProcessQuerySequences = new TimeSpan();
                        string outputOption = string.Empty;

                        if (myArgs.maxmatch)
                        {
                            outputOption = "GetMumsMaxMatch()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                mums = mummer.GetMatches(qSeq);

                                WriteMums(mums, referenceSequence, qSeq, myArgs);

                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else if (myArgs.mum)
                        {
                            // mums = mum3.GetMumsMum( referenceSequences[0], querySequences);
                            outputOption = "GetMumsMum()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                // TODO: After implementing GetMatchesUniqueInBothReferenceAndQuery() in MUMmer
                                ////       GetMatchesUniqueInReference() with GetMatchesUniqueInBothReferenceAndQuery() in the line below.
                                mums = mummer.GetMatchesUniqueInReference(qSeq);
                                swInterval.Stop();

                                // Add time taken by GetMatchesUniqueInBothReferenceAndQuery().
                                mummerTime = mummerTime.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                WriteMums(mums, referenceSequence, qSeq, myArgs);
                                swInterval.Stop();

                                // Add time taken by write matches.
                                writetime = writetime.Add(swInterval.Elapsed);

                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else if (myArgs.mumreference)
                        {
                            // NOTE:
                            //     mum3.GetMUMs() this really implements the GetMumReference() functionality
                            // mums = mum3.GetMumsReference( referenceSequences[0], querySequences);     // should be
                            //swInterval.Restart();
                            outputOption = "GetMumsReference()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the watch after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                mums = mummer.GetMatchesUniqueInReference(qSeq);
                                swInterval.Stop();

                                // Add time taken by GetMatchesUniqueInReference().
                                mummerTime = mummerTime.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                WriteMums(mums, referenceSequence, qSeq, myArgs);
                                swInterval.Stop();

                                // Add time taken by write matches.
                                writetime = writetime.Add(swInterval.Elapsed);
                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else
                        {
                            // cannot happen as argument processing already asserted one of the three options must be specified
                            Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified.");
                            Environment.Exit(-1);
                            // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize 
                            //   Environment.Exit() as a no-return function
                            throw new Exception("Never hit this");
                        }

                        if (myArgs.verbose)
                        {
                            if (myArgs.reverseOnly || myArgs.both)
                            {
                                Console.Error.WriteLine("        Read/Processing time          : {0}", timeTakenToParseQuerySequences);
                                Console.Error.WriteLine("     Reverse Complement time          : {0}", timeTakenToGetReverseComplement);
                                Console.Error.WriteLine("     Total time taken to Process reads: {0}", totalTimetakenToProcessQuerySequences);
                            }
                            else
                            {
                                Console.Error.WriteLine("        Read/Processing time          : {0}", totalTimetakenToProcessQuerySequences);
                            }

                            Console.Error.WriteLine();
                            Console.Error.WriteLine("         Number of query Sequences        : {0:#,000}", querySeqCount);
                            Console.Error.WriteLine("         Average length of query Sequences: {0:#,000}", sumofSeqLength / querySeqCount);
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("Compute {0,20} time              : {1}", outputOption, mummerTime);
                            Console.Error.WriteLine("                WriteMums() time          : {0}", writetime);
                        }

                        swMumUtil.Stop();
                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine("           Total MumUtil Runtime      : {0}", swMumUtil.Elapsed);
                        }
                    }
                }
                else
                {
                    Console.WriteLine(Resources.MumUtilHelp);
                }
                     
            }
            catch (Exception ex)
            {
                DisplayException(ex);
            }

        }
Example #7
0
        void ValidateBuildSuffixTreeGeneralTestCases(string nodeName, bool isFilePath)
        {
            ISequence referenceSeq;
            string referenceSequence;

            if (isFilePath)
            {
                // Gets the reference sequence from the configuration file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null, "MUMmer P1 : Successfully validated the File Path '{0}'.", filePath));

                FastAParser fastaParserObj = new FastAParser();
                IEnumerable<ISequence> referenceSeqs = fastaParserObj.Parse(filePath);

                referenceSeq = referenceSeqs.FirstOrDefault();
                Assert.IsNotNull(referenceSeq);
                referenceSequence = referenceSeq.ConvertToString();
            }
            else
            {
                // Gets the reference sequence from the configuration file
                referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode);
                referenceSeq = new Sequence(Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, 
                    Constants.AlphabetNameNode)), this.encodingObj.GetBytes(referenceSequence));
            }

            // Builds the suffix for the reference sequence passed.            
            MultiWaySuffixTree suffixTree = new MultiWaySuffixTree(referenceSeq as Sequence);

            Assert.AreEqual(new string(suffixTree.Sequence.Select(a => (char)a).ToArray()), referenceSequence);
            ApplicationLog.WriteLine(string.Format(null,
                "MUMmer P1 : Successfully validated the Suffix Tree properties for the sequence '{0}'.",
                referenceSequence));
        }
Example #8
0
        void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
            bool isMultiSequenceSearchFile, PhaseOneAmbiguityParameters isAmbiguousCharacter)
        {
            ISequence referenceSeq;
            ISequence querySeq;
            string referenceSequence = string.Empty;
            string querySequence = string.Empty;
            IEnumerable<ISequence> referenceSeqs;
            IEnumerable<ISequence> querySeqs = null;

            if (isFilePath)
            {
                // Gets the reference sequence from the configuration file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastAParser parser = new FastAParser();
                switch (isAmbiguousCharacter)
                {
                    case PhaseOneAmbiguityParameters.Dna:
                        parser.Alphabet = AmbiguousDnaAlphabet.Instance;
                        break;
                    case PhaseOneAmbiguityParameters.Rna:
                        parser.Alphabet = AmbiguousRnaAlphabet.Instance;
                        break;
                    default:
                        break;
                }
                referenceSeqs = parser.Parse(filePath);
                referenceSeq = referenceSeqs.ElementAt(0);
                referenceSequence = new string(referenceSeq.Select(a => (char)a).ToArray());

                // Gets the reference sequence from the configuration file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastAParser queryParser = new FastAParser();
                switch (isAmbiguousCharacter)
                {
                    case PhaseOneAmbiguityParameters.Dna:
                        queryParser.Alphabet = AmbiguousDnaAlphabet.Instance;
                        break;
                    case PhaseOneAmbiguityParameters.Rna:
                        queryParser.Alphabet = AmbiguousRnaAlphabet.Instance;
                        break;
                    default:
                        break;
                }

                querySeqs = queryParser.Parse(queryFilePath);
                querySeq = querySeqs.ElementAt(0);
                querySequence = new string(querySeq.Select(a => (char)a).ToArray());
            }
            else
            {
                // Gets the reference sequence from the configuration file
                referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SequenceNode);

                string referenceAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(referenceAlphabet),
                   this.encodingObj.GetBytes(referenceSequence));

                querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SearchSequenceNode);

                referenceAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(referenceAlphabet),
                   this.encodingObj.GetBytes(querySequence));
            }

            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.            
            MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence);
            suffixTreeBuilder.MinLengthOfMatch = long.Parse(mumLength, null);
            IEnumerable<Match> matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeq);

            // For multi sequence query file validate all the sequences with the reference sequence
            if (isMultiSequenceSearchFile)
            {
                matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeqs.ElementAt(0));
                Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName));
                matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeqs.ElementAt(1));
                Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName));
            }
            else
            {
                matches = suffixTreeBuilder.SearchMatchesUniqueInReference(querySeq);
                Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName));
            }

            ApplicationLog.WriteLine(string.Format(null, "MUMmer P1 : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                referenceSequence, querySequence));
        }
Example #9
0
        /// <summary>
        /// Validates Constructor of Mummer Class with a Suffix tree as parameter.
        /// </summary>
        /// <param name="nodeName">Parent Node from Xml.</param>
        void ValidateConstructorWithSuffixTree(string nodeName)
        {
            string referenceSequence = string.Empty;
            string querySequence = string.Empty;
            string seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.AlphabetNameNode);

            // Gets the reference sequence from the configurtion file
            referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.SequenceNode);

            querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.SearchSequenceNode);

            seqAlp = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.SearchSequenceAlphabetNode);

            Sequence refSequence = new Sequence(Utility.GetAlphabet(seqAlp), referenceSequence);

            MultiWaySuffixTree suffixTree = new MultiWaySuffixTree(refSequence);
            Bio.Algorithms.MUMmer.MUMmer mum = new Bio.Algorithms.MUMmer.MUMmer(suffixTree);
            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);
            mum.LengthOfMUM = long.Parse(mumLength, null);
            IEnumerable<Match> matches = null;
            Sequence sequence = new Sequence(Utility.GetAlphabet(seqAlp), querySequence);
            matches = mum.GetMatches(sequence);
            // Validates the Unique Matches.
            ApplicationLog.WriteLine(@"MUMmer BVT : Validating the Unique Matches for 
                                            implementation of customised MUMer Constructor");
            Assert.IsTrue(this.ValidateUniqueMatches(matches, nodeName));

            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                referenceSequence, querySequence));
        }