Example #1
0
        /// <summary>
        /// Build Suffix Tree using reference sequence
        /// </summary>
        /// <param name="referenceSequence">sequence to build SuffixTree</param>
        /// <returns>Suffix Tree</returns>
        protected override ISuffixTree BuildSuffixTree(ISequence referenceSequence)
        {
            ISuffixTreeBuilder suffixTreeBuilder = Factory.CreateNew(referenceSequence);
            ISuffixTree        suffixTree        = suffixTreeBuilder.BuildSuffixTree(referenceSequence);

            return(suffixTree);
        }
Example #2
0
        /// <summary>
        /// Creates the suffix array given an already built instance of a suffix tree.
        /// </summary>
        /// <param name="suffixTree">The suffixTree.</param>
        /// <returns></returns>
        public static int[] Create(ISuffixTree suffixTree)
        {
            if (suffixTree == null)
            {
                return null;
            }

            var stack = new Stack<ISuffixNode>();
            stack.Push(suffixTree.Root);

            int textLength = suffixTree.Text.Length;
            var sufarray = new int[textLength];

            int k = 0;

            while (stack.Count > 0)
            {
                var node = stack.Pop();

                if (node.IsLeaf)
                {
                    sufarray[k++] = node.LeafNumber;
                }
                else
                {
                    foreach (var kvp in node.Children.Reverse())
                    {
                        stack.Push(kvp.Value);
                    }
                }
            }

            return sufarray;
        }
Example #3
0
        public static string GetNodeSuffix(this ISuffixTree t, ISuffixNode p)
        {
            var sb = new StringBuilder();

            GetNodeSuffixImpl(t, p, sb);
            return(sb.ToString());
        }
Example #4
0
        /// <summary>
        /// Creates the suffix array given an already built instance of a suffix tree.
        /// </summary>
        /// <param name="suffixTree">The suffixTree.</param>
        /// <returns></returns>
        public static int[] Create(ISuffixTree suffixTree)
        {
            if (suffixTree == null)
            {
                return(null);
            }

            var stack = new Stack <ISuffixNode>();

            stack.Push(suffixTree.Root);

            int textLength = suffixTree.Text.Length;
            var sufarray   = new int[textLength];

            int k = 0;

            while (stack.Count > 0)
            {
                var node = stack.Pop();

                if (node.IsLeaf)
                {
                    sufarray[k++] = node.LeafNumber;
                }
                else
                {
                    foreach (var kvp in node.Children.Reverse())
                    {
                        stack.Push(kvp.Value);
                    }
                }
            }

            return(sufarray);
        }
Example #5
0
        public void TestStreamingPersistentSegmentedSequence()
        {
            string            sequenceString   = "AAATTGGC";
            Sequence          sequence         = new Sequence(Alphabets.Protein, sequenceString);
            SegmentedSequence segmentedSequece = new SegmentedSequence(sequence);

            sequenceString = "ANANA";
            sequence       = new Sequence(Alphabets.Protein, sequenceString);
            segmentedSequece.Sequences.Add(sequence);

            using (SimpleSuffixTreeBuilder simpleSuffixTreeBuilder = new SimpleSuffixTreeBuilder())
            {
                simpleSuffixTreeBuilder.PersistenceThreshold = 0;

                ISuffixTree persistentSuffixTree = simpleSuffixTreeBuilder.BuildSuffixTree(segmentedSequece);

                string   queryString   = "AATTNANAGGC";
                Sequence querySequence = new Sequence(Alphabets.Protein, queryString);

                IList <MaxUniqueMatch> MUMs = simpleSuffixTreeBuilder.FindMatches(persistentSuffixTree, querySequence, 3);

                // Verify the count of MUMs found
                Assert.AreEqual(3, MUMs.Count);
            }
        }
Example #6
0
 /// <summary>
 /// Finds all the matches of given sequence in suffix tree irrespective of the uniqueness in
 /// reference or query sequence
 /// </summary>
 /// <param name="suffixTree">Suffix Tree</param>
 /// <param name="searchSequence">Query searchSequence</param>
 /// <param name="lengthOfMUM">Mininum length of MUM</param>
 /// <returns>Matches found</returns>
 public IList <MaxUniqueMatch> FindMaximumMatches(
     ISuffixTree suffixTree,
     ISequence searchSequence,
     long lengthOfMUM)
 {
     _findMaximumMatch = true;
     return(FindMatchWithOption(suffixTree, searchSequence, lengthOfMUM));
 }
Example #7
0
        /// <summary>
        /// Generates list of MUMs for each query sequence.
        /// This returns the MUMs that are generated.
        /// If 'performLIS' is true, MUMs are sorted and processed
        /// using Longest Increasing Subsequence (LIS). If 'performLIS'
        /// is false, MUMs are returned immediately after streaming.
        /// Note: If MaximumMatchEnabled property is true, then MUMs are generated irrespective
        /// of uniqueness in query and reference sequences; else MUMs are unique in reference
        /// sequence only.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequenceList">List of query sequences</param>
        /// <param name="performLIS">Boolean indicating whether Longest Increasing
        /// Subsequence (LIS) modules is run on MUMs before returning</param>
        /// <returns>List of MUMs for each query sequence</returns>
        public override IDictionary <ISequence, IList <MaxUniqueMatch> > GetMUMs(
            ISequence referenceSequence,
            IList <ISequence> querySequenceList,
            bool performLIS)
        {
            GetMUMsValidate(referenceSequence, querySequenceList);

            // Initializations
            IDictionary <ISequence, IList <MaxUniqueMatch> > queryMums = new Dictionary <ISequence, IList <MaxUniqueMatch> >();

            // Step1 : building suffix trees using reference sequence
            ISuffixTree suffixTree = BuildSuffixTree(referenceSequence);

            // On each query sequence aligned with reference sequence
            //foreach (ISequence sequence in querySequenceList)
            Parallel.ForEach(querySequenceList, sequence =>
            {
                bool isQuerySequence = true;
                IList <MaxUniqueMatch> mumList;

                if (sequence.Equals(referenceSequence))
                {
                    isQuerySequence = false;
                }

                if (isQuerySequence)
                {
                    // Step2 : streaming process is performed with the query sequence
                    mumList = Streaming(suffixTree, referenceSequence, sequence, LengthOfMUM);

                    if (performLIS)
                    {
                        // Step3(a) : sorted mum list based on reference sequence
                        mumList = SortMum(mumList);

                        if (mumList.Count > 0)
                        {
                            // Step3(b) : LIS using greedy cover algorithm
                            mumList = CollectLongestIncreasingSubsequence(mumList);
                        }
                        else
                        {
                            mumList = null;
                        }
                    }

                    lock (queryMums)
                    {
                        queryMums.Add(sequence, mumList);
                    }
                }
            });


            return(queryMums);
        }
Example #8
0
        /// <summary>
        /// Traverse the suffix tree using query sequence and return list of MUMs
        /// </summary>
        /// <param name="suffixTree">Suffix tree</param>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="sequence">Query sequence</param>
        /// <param name="lengthOfMUM">Minimum length of MUM</param>
        /// <returns>List of MUMs</returns>
        protected override IList <MaxUniqueMatch> Streaming(
            ISuffixTree suffixTree,
            ISequence referenceSequence,
            ISequence sequence,
            long lengthOfMUM)
        {
            ISuffixTreeBuilder suffixTreeBuilder = Factory.CreateNew(referenceSequence);

            return(suffixTreeBuilder.FindMatches(suffixTree, sequence, lengthOfMUM));
        }
Example #9
0
        private static void GetNodeSuffixImpl(ISuffixTree t, ISuffixNode p, StringBuilder sb)
        {
            if (p.Parent != null)
            {
                GetNodeSuffixImpl(t, p.Parent, sb);

                int length = (p.IsLeaf ? t.Text.Length - 1 : p.Edge.End) - p.Edge.Start + 1;
                sb.Append(t.Text.Substring(p.Edge.Start, length));
            }
        }
Example #10
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="suffixTree"></param>
        public NUCmer(ISuffixTree suffixTree)
        {
            if (suffixTree == null)
            {
                throw new ArgumentNullException("suffixTree");
            }

            // Mummer with the reference sequence.
            _internalMummer = new MUMmer.MUMmer(suffixTree);
            _internalReferenceSequence = _internalMummer.ReferenceSequence;

            SetDefaults();
        }
Example #11
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="suffixTree"></param>
        public NUCmer(ISuffixTree suffixTree)
        {
            if (suffixTree == null)
            {
                throw new ArgumentNullException("suffixTree");
            }

            // Mummer with the reference sequence.
            _internalMummer            = new MUMmer.MUMmer(suffixTree);
            _internalReferenceSequence = _internalMummer.ReferenceSequence;

            SetDefaults();
        }
Example #12
0
        /// <summary>
        /// Initializes a new instance of the MUMmer class with the specified suffix tree.
        /// This enables to use custom suffix tree.
        /// </summary>
        /// <param name="suffixTree">Suffix tree.</param>
        public MUMmer(ISuffixTree suffixTree)
        {
            if (suffixTree == null)
            {
                throw new ArgumentNullException("suffixTree");
            }

            this.suffixTree        = suffixTree;
            this.ReferenceSequence = this.suffixTree.Sequence;

            // Default Min length of Match - set to 20.
            this.LengthOfMUM = 20;
            this.NoAmbiguity = false;
            this.Name        = Properties.Resource.MUMmerName;
            this.Description = Properties.Resource.MUMmerDescription;
        }
Example #13
0
        /// <summary>
        /// Initializes a new instance of the MUMmer class with the specified suffix tree.
        /// This enables to use custom suffix tree.
        /// </summary>
        /// <param name="suffixTree">Suffix tree.</param>
        public MUMmer(ISuffixTree suffixTree)
        {
            if (suffixTree == null)
            {
                throw new ArgumentNullException("suffixTree");
            }

            this.suffixTree = suffixTree;
            this.ReferenceSequence = this.suffixTree.Sequence;

            // Default Min length of Match - set to 20.
            this.LengthOfMUM = 20;
            this.NoAmbiguity = false;
            this.Name = Properties.Resource.MUMmerName;
            this.Description = Properties.Resource.MUMmerDescription;
        }
Example #14
0
            public static SuffixTreeDiagnostic create(ISuffixTree suffixTree)
            {
                if (suffixTree != null)
                {
                    var diag = new SuffixTreeDiagnostic()
                    {
                        Tree = suffixTree
                    };
                    diag.build(suffixTree.Root);

                    return(diag);
                }
                else
                {
                    throw new ArgumentNullException("tree");
                }
            }
Example #15
0
        private static void Diagnose(ISuffixTree t)
        {
            Debug.WriteLine("");

            var diagnostics = new SuffixTreeDiagnostics(t);

            diagnostics.Run();

            Debug.WriteLine("");
            Debug.WriteLine("suffix links count: {0} ", diagnostics.InternalCount);
            Debug.WriteLine("----------------------- ");
            diagnostics.Display(SuffixTreeDiagnostics.DisplayInfo.DisplayContent | SuffixTreeDiagnostics.DisplayInfo.DisplaySuffixLinks);

            Debug.WriteLine("");
            Debug.WriteLine("suffix count (leaves): {0}", diagnostics.SuffixesCount);
            Debug.WriteLine("-------------------------- ");
            diagnostics.Display(SuffixTreeDiagnostics.DisplayInfo.DisplayContent | SuffixTreeDiagnostics.DisplayInfo.DisplaySuffixes);
        }
Example #16
0
        public void TestStreamingInMemorySimpleSequence()
        {
            string   sequenceString = "AGTATGCCCCCCCCCCTGCCG";
            Sequence sequence       = new Sequence(Alphabets.Protein, sequenceString);

            using (SimpleSuffixTreeBuilder simpleSuffixTreeBuilder = new SimpleSuffixTreeBuilder())
            {
                ISuffixTree inMemorySuffixTree = simpleSuffixTreeBuilder.BuildSuffixTree(sequence);

                string   queryString   = "CCCCCCCCTATG";
                Sequence querySequence = new Sequence(Alphabets.Protein, queryString);

                IList <MaxUniqueMatch> MUMs = simpleSuffixTreeBuilder.FindMatches(inMemorySuffixTree, querySequence, 3);

                // Verify the count of MUMs found
                Assert.AreEqual(2, MUMs.Count);
            }
        }
Example #17
0
        /// <summary>
        /// Initializes a new instance of the MUMmer class with specified reference sequence.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        public MUMmer(ISequence referenceSequence)
        {
            if (referenceSequence == null)
            {
                throw new ArgumentNullException("referenceSequence");
            }

            this.ReferenceSequence = referenceSequence;

            // build the suffix tree for the reference sequence.
            this.suffixTree = new MultiWaySuffixTree(referenceSequence);

            // Default Min length of Match - set to 20.
            this.LengthOfMUM = 20;
            this.NoAmbiguity = false;

            this.Name        = Properties.Resource.MUMmerName;
            this.Description = Properties.Resource.MUMmerDescription;
        }
Example #18
0
        /// <summary>
        /// Initializes a new instance of the MUMmer class with specified reference sequence.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        public MUMmer(ISequence referenceSequence)
        {
            if (referenceSequence == null)
            {
                throw new ArgumentNullException("referenceSequence");
            }

            this.ReferenceSequence = referenceSequence;

            // build the suffix tree for the reference sequence.
            this.suffixTree = new MultiWaySuffixTree(referenceSequence);

            // Default Min length of Match - set to 20.
            this.LengthOfMUM = 20;
            this.NoAmbiguity = false;

            this.Name = Properties.Resource.MUMmerName;
            this.Description = Properties.Resource.MUMmerDescription;
        }
Example #19
0
        public void TestFindMaximumMatchPersistentInSequence()
        {
            string   sequenceString = "BANANA";
            Sequence sequence       = new Sequence(Alphabets.Protein, sequenceString);

            using (SimpleSuffixTreeBuilder simpleSuffixTreeBuilder = new SimpleSuffixTreeBuilder())
            {
                simpleSuffixTreeBuilder.PersistenceThreshold = 0;
                ISuffixTree simpleSuffixTree = simpleSuffixTreeBuilder.BuildSuffixTree(sequence);

                string   queryString   = "ANA";
                Sequence querySequence = new Sequence(Alphabets.Protein, queryString);

                IList <MaxUniqueMatch> MUMs = simpleSuffixTreeBuilder.FindMaximumMatches(simpleSuffixTree, querySequence, 3);

                // Verify the count of MUMs found
                Assert.AreEqual(1, MUMs.Count);
            }
        }
Example #20
0
        public void TestFindMaximumMatchInSequence()
        {
            string   sequenceString = "BANANA";
            Sequence sequence       = new Sequence(Alphabets.Protein, sequenceString);

            ApplicationLog.WriteLine("Begin SuffixTree Test for string '{0}'", sequenceString);
            ISuffixTreeBuilder kurtzSuffixTreeBuilder = new KurtzSuffixTreeBuilder();
            ISuffixTree        kurtzSuffixTree        = kurtzSuffixTreeBuilder.BuildSuffixTree(sequence);

            string   queryString   = "ANA";
            Sequence querySequence = new Sequence(Alphabets.Protein, queryString);

            ApplicationLog.WriteLine("Query string : {0}. Minimum Length of MUM : 3.", queryString);
            ApplicationLog.WriteTime("Start Time.", DateTime.Now.ToString());
            IList <MaxUniqueMatch> MUMs = kurtzSuffixTreeBuilder.FindMaximumMatches(kurtzSuffixTree, querySequence, 3);

            ApplicationLog.WriteTime("End Time.", DateTime.Now.ToString());

            // Verify the count of MUMs found
            Assert.AreEqual(1, MUMs.Count);
        }
Example #21
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignment</returns>
        private IList <IPairwiseSequenceAlignment> Alignment(
            IList <ISequence> referenceSequenceList,
            IList <ISequence> querySequenceList)
        {
            // Initializations
            if (referenceSequenceList.Count > 0)
            {
                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList[0].Alphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = referenceSequenceList[0].Alphabet;
                }
            }

            IList <IPairwiseSequenceAlignment> results           = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         sequenceAlignment = null;
            IList <DeltaAlignment>             deltaAlignments   = null;
            IList <PairwiseAlignedSequence>    alignments        = null;
            ISequence referenceSequence = null;

            // Validate the input
            Validate(referenceSequenceList, querySequenceList);

            // Step:1 concat all the sequences into one sequence
            if (referenceSequenceList.Count > 1)
            {
                referenceSequence = ConcatSequence(referenceSequenceList);
            }
            else
            {
                referenceSequence = referenceSequenceList[0];
            }

            // Getting refernce sequence
            _referenceSequence = referenceSequence;

            // Step2 : building suffix trees using reference sequence
            _suffixTree = BuildSuffixTree(_referenceSequence);

            // On each query sequence aligned with reference sequence
            foreach (ISequence sequence in querySequenceList)
            {
                if (sequence.Equals(referenceSequence))
                {
                    continue;
                }

                sequenceAlignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                // Step3 : streaming process is performed with the query sequence
                _mumList = Streaming(_suffixTree, referenceSequence, sequence, LengthOfMUM);

                if (_mumList.Count > 0)
                {
                    // Step 5 : Get the list of Clusters
                    _clusterList = GetClusters(_mumList);

                    // Step 7: Process Clusters and get delta
                    deltaAlignments = ProcessCluster(
                        referenceSequenceList,
                        _clusterList);

                    // Step 8: Convert delta alignments to sequence alignments
                    alignments = ConvertDeltaToAlignment(deltaAlignments);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                align.FirstSequence,
                                align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                align.FirstSequence,
                                align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }
                }

                results.Add(sequenceAlignment);
            }

            return(results);
        }
Example #22
0
 public SuffixTreeDiagnostics(ISuffixTree tree)
 {
     this.Tree = tree;
 }
            public static SuffixTreeDiagnostic create(ISuffixTree suffixTree)
            {
                if (suffixTree != null)
                {
                    var diag = new SuffixTreeDiagnostic() { Tree = suffixTree };
                    diag.build(suffixTree.Root);

                    return diag;
                }
                else throw new ArgumentNullException("tree");
            }
Example #24
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method. Store generated MUMs in properties MUMs, SortedMUMs.
        /// Alignment first finds MUMs for all the query sequence, and then
        /// runs pairwise algorithm on gaps to produce alignments.
        /// </summary>
        /// <param name="referenceSequence">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignments</returns>
        private IList <IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs(
            ISequence referenceSequence,
            IList <ISequence> querySequenceList)
        {
            // Get MUMs
            _mums      = new Dictionary <ISequence, IList <MaxUniqueMatch> >();
            _finalMums = new Dictionary <ISequence, IList <MaxUniqueMatch> >();

            if (Validate(referenceSequence, querySequenceList))
            {
                IList <MaxUniqueMatch> mumList;

                // Step1 : building suffix trees using reference sequence
                ISuffixTree suffixTree = BuildSuffixTree(referenceSequence);

                // On each query sequence aligned with reference sequence
                foreach (ISequence sequence in querySequenceList)
                {
                    if (sequence.Equals(referenceSequence))
                    {
                        continue;
                    }

                    // Step2 : streaming process is performed with the query sequence
                    mumList = Streaming(suffixTree, referenceSequence, sequence, LengthOfMUM);
                    _mums.Add(sequence, mumList);

                    // Step3(a) : sorted mum list based on reference sequence
                    mumList = SortMum(mumList);

                    if (mumList.Count > 0)
                    {
                        // Step3(b) : LIS using greedy cover algorithm
                        mumList = CollectLongestIncreasingSubsequence(mumList);
                    }
                    else
                    {
                        mumList = null;
                    }

                    _finalMums.Add(sequence, mumList);
                }
            }

            IList <IPairwiseSequenceAlignment> results   = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         alignment = null;

            if (MUMs != null && FinalMUMs != null)
            {
                // Getting refernce sequence
                _referenceSequence = referenceSequence;

                // On each query sequence aligned with reference sequence
                foreach (var finalMum in FinalMUMs)
                {
                    var sequence = finalMum.Key;
                    _mumList      = MUMs[sequence];
                    _finalMumList = finalMum.Value;

                    alignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                    if (_mumList.Count > 0)
                    {
                        if (_finalMumList.Count > 0)
                        {
                            // Step 4 : get all the gaps in each sequence and call
                            // pairwise alignment
                            alignment.PairwiseAlignedSequences.Add(ProcessGaps(referenceSequence, sequence));
                        }

                        results.Add(alignment);
                    }
                    else
                    {
                        IList <IPairwiseSequenceAlignment> sequenceAlignment = RunPairWise(
                            referenceSequence,
                            sequence);

                        foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                        {
                            results.Add(pairwiseAlignment);
                        }
                    }
                }
            }

            return(results);
        }
Example #25
0
 public TextMatcher(ISuffixTree suffixTree)
 {
     this.Tree = suffixTree;
 }
Example #26
0
 /// <summary>
 /// Traverse the suffix tree using query sequence and return list of matches
 /// </summary>
 /// <param name="suffixTree">Suffix tree</param>
 /// <param name="referenceSequence">Reference seqeunce</param>
 /// <param name="sequence">Query sequence</param>
 /// <param name="lengthOfMUM">Minimum length of MUM</param>
 /// <returns>List of matches</returns>
 protected abstract IList <MaxUniqueMatch> Streaming(
     ISuffixTree suffixTree,
     ISequence referenceSequence,
     ISequence sequence,
     long lengthOfMUM);
Example #27
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method. Does not store MUMs, processes MUMs and gaps to find
        /// alignment directly.
        /// </summary>
        /// <param name="referenceSequence">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignments</returns>
        private IList <IPairwiseSequenceAlignment> AlignmentWithoutAccumulatedMUMs(
            ISequence referenceSequence,
            IList <ISequence> querySequenceList)
        {
            IList <IPairwiseSequenceAlignment> results   = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         alignment = null;

            if (Validate(referenceSequence, querySequenceList))
            {
                // Safety check for public methods to ensure that null
                // inputs are handled.
                if (referenceSequence == null || querySequenceList == null)
                {
                    return(null);
                }

                // Getting refernce sequence
                _referenceSequence = referenceSequence;

                // Step1 : building suffix trees using reference sequence
                _suffixTree = BuildSuffixTree(_referenceSequence);

                // On each query sequence aligned with reference sequence
                foreach (ISequence sequence in querySequenceList)
                {
                    if (sequence.Equals(referenceSequence))
                    {
                        continue;
                    }

                    alignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                    // Step2 : streaming process is performed with the query sequence
                    _mumList = Streaming(_suffixTree, referenceSequence, sequence, LengthOfMUM);

                    // Step3(a) : sorted mum list based on reference sequence
                    _sortedMumList = SortMum(_mumList);

                    if (_sortedMumList.Count > 0)
                    {
                        // Step3(b) : LIS using greedy cover algorithm
                        _finalMumList = CollectLongestIncreasingSubsequence(_sortedMumList);

                        if (_finalMumList.Count > 0)
                        {
                            // Step 4 : get all the gaps in each sequence and call
                            // pairwise alignment
                            alignment.PairwiseAlignedSequences.Add(ProcessGaps(referenceSequence, sequence));
                        }

                        results.Add(alignment);
                    }
                    else
                    {
                        IList <IPairwiseSequenceAlignment> sequenceAlignment = RunPairWise(
                            referenceSequence,
                            sequence);

                        foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                        {
                            results.Add(pairwiseAlignment);
                        }
                    }
                }
            }

            return(results);
        }
Example #28
0
        /// <summary>
        /// Find the matches of sequence in suffix tree
        /// </summary>
        /// <param name="suffixTree">Suffix tree to searh on</param>
        /// <param name="searchSequence">query sequence to find matches</param>
        /// <param name="lengthOfMUM">Minimum length of the match</param>
        /// <returns>Matches found</returns>
        private IList <MaxUniqueMatch> FindMatchWithOption(
            ISuffixTree suffixTree,
            ISequence searchSequence,
            long lengthOfMUM)
        {
            if (suffixTree == null)
            {
                throw new ArgumentNullException("suffixTree");
            }

            if (searchSequence == null)
            {
                throw new ArgumentNullException("searchSequence");
            }

            IMultiWaySuffixTree mwSuffixTree = suffixTree as IMultiWaySuffixTree;

            if (mwSuffixTree == null)
            {
                throw new ArgumentNullException("suffixTree");
            }

            ValidateSequence(suffixTree.Sequence, searchSequence);

            // Initialize
            _minimumLengthOfMUM = lengthOfMUM;
            _suffixTree         = mwSuffixTree;
            InitializeReferenceSequence(suffixTree.Sequence);
            InitializeQuerySequence(searchSequence);

            int interval = (int)(_querySequence.Count - (_minimumLengthOfMUM - 1)) / Environment.ProcessorCount;

            if (interval < 1)
            {
                interval = 1;
            }

            IList <Task <List <MaxUniqueMatch> > > result = new List <Task <List <MaxUniqueMatch> > >();

            for (int index = 0; index < _querySequence.Count - (_minimumLengthOfMUM - 1); index += interval)
            {
                int taskIndex = index;
                result.Add(
                    Task.Factory.StartNew <List <MaxUniqueMatch> >(
                        o => FindMUMs(taskIndex, interval),
                        TaskCreationOptions.None));
            }

            List <MaxUniqueMatch> mergedList = new List <MaxUniqueMatch>();

            foreach (List <MaxUniqueMatch> local in result.Select(l => l.Result))
            {
                // Check if there is overlap, last MUM of mergedList overlaps with first MUM of local
                if (0 == mergedList.Count)
                {
                    mergedList.AddRange(local.Select(m => m));
                }
                else
                {
                    if (0 < local.Count)
                    {
                        MaxUniqueMatch previous = mergedList.Last();
                        MaxUniqueMatch current  = local.First();

                        if ((current.SecondSequenceStart >= previous.SecondSequenceStart &&
                             current.SecondSequenceStart <= previous.SecondSequenceStart + previous.Length) &&
                            (current.SecondSequenceStart + current.Length >= previous.SecondSequenceStart &&
                             current.SecondSequenceStart + current.Length <= previous.SecondSequenceStart + previous.Length))
                        {
                            local.RemoveAt(0);
                        }

                        if (0 < local.Count)
                        {
                            mergedList.AddRange(local.Select(m => m));
                        }
                    }
                }
            }
            // Order the mum list with query sequence order
            for (int index = 0; index < mergedList.Count; index++)
            {
                mergedList[index].FirstSequenceMumOrder  = index + 1;
                mergedList[index].SecondSequenceMumOrder = index + 1;
            }

            return(mergedList);
        }