public long GetEdgesCount(SequenceModel sequenceData)
        {
            var sequence = _sequenceProvider.Provide(sequenceData?.FileName, sequenceData?.Content)?.First();

            _suffixTree = new MultiWaySuffixTree(sequence);
            return(_suffixTree.EdgesCount);
        }
        public IEnumerable <Match> GetUniqueMatches(SequenceModel sequenceData)
        {
            var sequence = _sequenceProvider.Provide(sequenceData?.FileName, sequenceData?.Content)?.First();

            _suffixTree = new MultiWaySuffixTree(sequence);
            return(_suffixTree.SearchMatchesUniqueInReference(sequence));
        }
Ejemplo n.º 3
0
        void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
                                                     AdditionalParameters additionalParam)
        {
            ISequence referenceSeqs = null;

            string[] referenceSequences = null;
            string[] searchSequences    = null;

            List <ISequence> searchSeqList = new List <ISequence>();

            if (isFilePath)
            {
                // Gets the reference sequence from the FastA file
                string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                  Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "NUCmer BVT : Successfully validated the File Path '{0}'.", filePath));

                using (FastAParser parser = new FastAParser(filePath))
                {
                    IEnumerable <ISequence> referenceSeqList = parser.Parse();
                    List <Byte>             byteList         = new List <Byte>();
                    foreach (ISequence seq in referenceSeqList)
                    {
                        byteList.AddRange(seq);
                        byteList.Add((byte)'+');
                    }
                    referenceSeqs = new Sequence(AlphabetExtensions.GetMummerAlphabet(referenceSeqList.ElementAt(0).Alphabet),
                                                 byteList.ToArray());

                    // Gets the query sequence from the FastA file
                    string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                           Constants.SearchSequenceFilePathNode);

                    Assert.IsNotNull(queryFilePath);
                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                           "NUCmer BVT : Successfully validated the File Path '{0}'.", queryFilePath));

                    FastAParser             queryParser  = new FastAParser(queryFilePath);
                    IEnumerable <ISequence> querySeqList = queryParser.Parse();

                    foreach (ISequence seq in querySeqList)
                    {
                        searchSeqList.Add(seq);
                    }
                }
            }
            else
            {
                // Gets the reference & search sequences from the configurtion file
                referenceSequences = utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                      Constants.ReferenceSequencesNode);
                searchSequences = utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                   Constants.SearchSequencesNode);

                IAlphabet seqAlphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                            Constants.AlphabetNameNode));

                List <ISequence> refSeqList = new List <ISequence>();

                for (int i = 0; i < referenceSequences.Length; i++)
                {
                    ISequence referSeq = new Sequence(seqAlphabet, encodingObj.GetBytes(referenceSequences[i]));
                    refSeqList.Add(referSeq);
                }

                List <Byte> byteListQuery = new List <Byte>();
                foreach (ISequence seq in refSeqList)
                {
                    byteListQuery.AddRange(seq);
                    byteListQuery.Add((byte)'+');
                }
                referenceSeqs = new Sequence(AlphabetExtensions.GetMummerAlphabet(refSeqList.ElementAt(0).Alphabet),
                                             byteListQuery.ToArray());

                for (int i = 0; i < searchSequences.Length; i++)
                {
                    ISequence searchSeq = new Sequence(seqAlphabet, encodingObj.GetBytes(searchSequences[i]));
                    searchSeqList.Add(searchSeq);
                }
            }

            string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.
            MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeqs as Sequence);

            suffixTreeBuilder.MinLengthOfMatch = long.Parse(mumLength, null);

            Dictionary <ISequence, IEnumerable <Match> > matches =
                new Dictionary <ISequence, IEnumerable <Match> >();

            for (int i = 0; i < searchSeqList.Count; i++)
            {
                matches.Add(searchSeqList[i],
                            suffixTreeBuilder.SearchMatchesUniqueInReference(searchSeqList[i]));
            }

            List <Match> mums = new List <Match>();

            foreach (var a in matches.Values)
            {
                mums.AddRange(a);
            }

            switch (additionalParam)
            {
            case AdditionalParameters.FindUniqueMatches:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine("NUCmer BVT : Validating the Unique Matches");
                Assert.IsTrue(ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath));
                Console.WriteLine(
                    "NUCmer BVT : Successfully validated the all the unique matches for the sequences.");
                break;

            case AdditionalParameters.PerformClusterBuilder:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine(
                    "NUCmer BVT : Validating the Unique Matches using Cluster Builder");
                Assert.IsTrue(ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath));
                Console.WriteLine(
                    "NUCmer BVT : Successfully validated the all the cluster builder matches for the sequences.");
                break;

            default:
                break;
            }


            ApplicationLog.WriteLine(
                "NUCmer BVT : Successfully validated the all the unique matches for the sequences.");
        }
Ejemplo n.º 4
0
        static void Main(string[] args)
        {
            try
            {
                //            DateTime dStart = DateTime.Now;
                Stopwatch swMumUtil  = Stopwatch.StartNew();
                Stopwatch swInterval = new Stopwatch();

                Console.Error.WriteLine(SplashString());
                if (args.Length > 0)
                {
                    CommandLineOptions myArgs = ProcessCommandLine(args);
                    if (myArgs.help)
                    {
                        Console.WriteLine(Resources.MumUtilHelp);
                    }
                    else
                    {
                        FileInfo refFileinfo   = new FileInfo(myArgs.fileList[0]);
                        long     refFileLength = refFileinfo.Length;
                        refFileinfo = null;

                        swInterval.Restart();
                        IEnumerable <ISequence> referenceSequences = ParseFastA(myArgs.fileList[0]);
                        Sequence referenceSequence = referenceSequences.First() as Sequence;
                        swInterval.Stop();
                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed Reference FastA file: {0}", Path.GetFullPath(myArgs.fileList[0]));
                            Console.Error.WriteLine("        Length of first Sequence: {0:#,000}", referenceSequence.Count);
                            Console.Error.WriteLine("            Read/Processing time: {0}", swInterval.Elapsed);
                            Console.Error.WriteLine("            File size           : {0:#,000} bytes", refFileLength);
                        }

                        FileInfo queryFileinfo   = new FileInfo(myArgs.fileList[1]);
                        long     queryFileLength = queryFileinfo.Length;
                        refFileinfo = null;

                        IEnumerable <ISequence> parsedQuerySequences = ParseFastA(myArgs.fileList[1]);

                        IEnumerable <ISequence> querySequences = parsedQuerySequences;

                        if (myArgs.reverseOnly)
                        {
                            // convert to reverse complement sequences
                            querySequences = ReverseComplementSequenceList(parsedQuerySequences);
                        }
                        else if (myArgs.both)
                        {
                            // add the reverse complement sequences along with query sequences.
                            querySequences = AddReverseComplementsToSequenceList(parsedQuerySequences);
                        }

                        // DISCUSSION:
                        // Three possible outputs desired.  Globally unique 'mum' (v1), unique in reference sequence (v2),
                        //   or get the maximum matches of length or greater.
                        //
                        mummerTime = new TimeSpan();
                        writetime  = new TimeSpan();
                        IEnumerable <Match> mums;
                        long memoryAtStart = 0;
                        long memoryAtEnd   = 0;
                        if (myArgs.verbose)
                        {
                            swMumUtil.Stop();
                            memoryAtStart = GC.GetTotalMemory(true);
                            swMumUtil.Start();
                        }

                        swInterval.Restart();
                        MultiWaySuffixTree suffixTreee = new MultiWaySuffixTree(referenceSequence);
                        swInterval.Stop();

                        if (myArgs.verbose)
                        {
                            swMumUtil.Stop();
                            memoryAtEnd = GC.GetTotalMemory(true);
                            swMumUtil.Start();
                        }

                        MUMmer mummer = new MUMmer(suffixTreee);

                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("Suffix tree construction time   : {0}", swInterval.Elapsed);
                            Console.Error.WriteLine("Memory consumed by Suffix tree  : {0:#,000}", memoryAtEnd - memoryAtStart);
                            Console.Error.WriteLine("Total edges created             : {0:#,000}", suffixTreee.EdgesCount);
                            Console.Error.WriteLine("Memory per edge                 : {0:#,000.00} bytes", (((double)(memoryAtEnd - memoryAtStart)) / suffixTreee.EdgesCount));
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("      Processed Query FastA file: {0}", Path.GetFullPath(myArgs.fileList[1]));
                            Console.Error.WriteLine("            File Size           : {0:#,000} bytes", queryFileLength);
                        }

                        mummer.LengthOfMUM = myArgs.length;
                        mummer.NoAmbiguity = myArgs.noAmbiguity;

                        long     querySeqCount  = 0;
                        double   sumofSeqLength = 0;
                        TimeSpan totalTimetakenToProcessQuerySequences = new TimeSpan();
                        string   outputOption = string.Empty;

                        if (myArgs.maxmatch)
                        {
                            outputOption = "GetMumsMaxMatch()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                mums = mummer.GetMatches(qSeq);

                                WriteMums(mums, referenceSequence, qSeq, myArgs);

                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else if (myArgs.mum)
                        {
                            // mums = mum3.GetMumsMum( referenceSequences[0], querySequences);
                            outputOption = "GetMumsMum()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                // TODO: After implementing GetMatchesUniqueInBothReferenceAndQuery() in MUMmer
                                ////       GetMatchesUniqueInReference() with GetMatchesUniqueInBothReferenceAndQuery() in the line below.
                                mums = mummer.GetMatchesUniqueInReference(qSeq);
                                swInterval.Stop();

                                // Add time taken by GetMatchesUniqueInBothReferenceAndQuery().
                                mummerTime = mummerTime.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                WriteMums(mums, referenceSequence, qSeq, myArgs);
                                swInterval.Stop();

                                // Add time taken by write matches.
                                writetime = writetime.Add(swInterval.Elapsed);

                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else if (myArgs.mumreference)
                        {
                            // NOTE:
                            //     mum3.GetMUMs() this really implements the GetMumReference() functionality
                            // mums = mum3.GetMumsReference( referenceSequences[0], querySequences);     // should be
                            //swInterval.Restart();
                            outputOption = "GetMumsReference()";
                            swInterval.Restart();
                            foreach (Sequence qSeq in querySequences)
                            {
                                // Stop the wath after each query sequence parsed.
                                swInterval.Stop();

                                // Add total time to process query sequence.
                                // if reverse complement option is set, includes reverse complement time also.
                                totalTimetakenToProcessQuerySequences = totalTimetakenToProcessQuerySequences.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                mums = mummer.GetMatchesUniqueInReference(qSeq);
                                swInterval.Stop();

                                // Add time taken by GetMatchesUniqueInReference().
                                mummerTime = mummerTime.Add(swInterval.Elapsed);

                                swInterval.Restart();
                                WriteMums(mums, referenceSequence, qSeq, myArgs);
                                swInterval.Stop();

                                // Add time taken by write matches.
                                writetime = writetime.Add(swInterval.Elapsed);
                                querySeqCount++;
                                sumofSeqLength += qSeq.Count;

                                // Start the watch for next query sequence parse.
                                swInterval.Restart();
                            }

                            swInterval.Stop();
                        }
                        else
                        {
                            // cannot happen as argument processing already asserted one of the three options must be specified
                            Console.Error.WriteLine("\nError: one of /maxmatch, /mum, /mumreference options must be specified.");
                            Environment.Exit(-1);
                            // kill the error about unitialized use of 'mums' in the next block...the compiler does not recognize
                            //   Environment.Exit() as a no-return function
                            throw new Exception("Never hit this");
                        }

                        if (myArgs.verbose)
                        {
                            if (myArgs.reverseOnly || myArgs.both)
                            {
                                Console.Error.WriteLine("        Read/Processing time          : {0}", timeTakenToParseQuerySequences);
                                Console.Error.WriteLine("     Reverse Complement time          : {0}", timeTakenToGetReverseComplement);
                                Console.Error.WriteLine("     Total time taken to Process reads: {0}", totalTimetakenToProcessQuerySequences);
                            }
                            else
                            {
                                Console.Error.WriteLine("        Read/Processing time          : {0}", totalTimetakenToProcessQuerySequences);
                            }

                            Console.Error.WriteLine();
                            Console.Error.WriteLine("         Number of query Sequences        : {0:#,000}", querySeqCount);
                            Console.Error.WriteLine("         Average length of query Sequences: {0:#,000}", sumofSeqLength / querySeqCount);
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("Compute {0,20} time              : {1}", outputOption, mummerTime);
                            Console.Error.WriteLine("                WriteMums() time          : {0}", writetime);
                        }

                        swMumUtil.Stop();
                        if (myArgs.verbose)
                        {
                            Console.Error.WriteLine("           Total MumUtil Runtime      : {0}", swMumUtil.Elapsed);
                        }
                    }
                }
                else
                {
                    Console.WriteLine(Resources.MumUtilHelp);
                }
            }
            catch (Exception ex)
            {
                DisplayException(ex);
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Validates most of the find matches suffix tree test cases with varying parameters.
        /// </summary>
        /// <param name="nodeName">Node name which needs to be read for execution.</param>
        /// <param name="isFilePath">Is File Path?</param>
        /// <param name="additionalParam">LIS action type enum</param>
        void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
                                                     AdditionalParameters additionalParam)
        {
            ISequence referenceSeqs;
            var       searchSeqList = new List <ISequence>();

            if (isFilePath)
            {
                // Gets the reference sequence from the FastA file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                       Constants.FilePathNode);

                Assert.IsNotNull(filePath);

                FastAParser             parser           = new FastAParser();
                IEnumerable <ISequence> referenceSeqList = parser.Parse(filePath);
                List <Byte>             byteList         = new List <Byte>();
                foreach (ISequence seq in referenceSeqList)
                {
                    byteList.AddRange(seq);
                    byteList.Add((byte)'+');
                }
                referenceSeqs = new Sequence(referenceSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray());

                // Gets the query sequence from the FastA file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                            Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);

                IEnumerable <ISequence> querySeqList = parser.Parse(queryFilePath);
                searchSeqList.AddRange(querySeqList);
            }
            else
            {
                // Gets the reference & search sequences from the configuration file
                string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                                    Constants.ReferenceSequencesNode);
                string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                                 Constants.SearchSequencesNode);

                IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                                 Constants.AlphabetNameNode));

                List <ISequence> refSeqList = referenceSequences.Select(t => new Sequence(seqAlphabet, this.encodingObj.GetBytes(t))).Cast <ISequence>().ToList();

                List <Byte> byteListQuery = new List <Byte>();
                foreach (ISequence seq in refSeqList)
                {
                    byteListQuery.AddRange(seq);
                    byteListQuery.Add((byte)'+');
                }
                referenceSeqs = new Sequence(refSeqList.First().Alphabet.GetMummerAlphabet(),
                                             byteListQuery.ToArray());

                searchSeqList.AddRange(searchSequences.Select(t => new Sequence(seqAlphabet, this.encodingObj.GetBytes(t))).Cast <ISequence>());
            }

            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.
            MultiWaySuffixTree suffixTreeBuilder = new MultiWaySuffixTree(referenceSeqs as Sequence)
            {
                MinLengthOfMatch = long.Parse(mumLength, null)
            };

            var matches = new Dictionary <ISequence, IEnumerable <Match> >();

            foreach (ISequence sequence in searchSeqList)
            {
                matches.Add(sequence,
                            suffixTreeBuilder.SearchMatchesUniqueInReference(sequence));
            }

            List <Match> mums = new List <Match>();

            foreach (var a in matches.Values)
            {
                mums.AddRange(a);
            }

            switch (additionalParam)
            {
            case AdditionalParameters.FindUniqueMatches:
                // Validates the Unique Matches.
                Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath));
                break;

            case AdditionalParameters.PerformClusterBuilder:
                // Validates the Unique Matches.
                Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, additionalParam, isFilePath));
                break;

            default:
                break;
            }
        }