Пример #1
0
        /// <summary>
        /// Get the alignment using pair wise.
        /// </summary>
        /// <param name="seq1">Sequence 1.</param>
        /// <param name="seq2">Sequence 2.</param>
        /// <returns>A list of sequence alignments.</returns>
        private IPairwiseSequenceAlignment RunPairWise(ISequence seq1, ISequence seq2)
        {
            IPairwiseSequenceAlignment sequenceAlignment = null;

            if (this.PairWiseAlgorithm == null)
            {
                this.PairWiseAlgorithm = new NeedlemanWunschAligner();
            }

            this.PairWiseAlgorithm.SimilarityMatrix  = SimilarityMatrix;
            this.PairWiseAlgorithm.GapOpenCost       = this.GapOpenCost;
            this.PairWiseAlgorithm.ConsensusResolver = this.ConsensusResolver;

            if (this.UseGapExtensionCost)
            {
                this.PairWiseAlgorithm.GapExtensionCost = this.GapExtensionCost;
                sequenceAlignment = this.PairWiseAlgorithm.Align(seq1, seq2).FirstOrDefault();
            }
            else
            {
                sequenceAlignment = this.PairWiseAlgorithm.AlignSimple(seq1, seq2).FirstOrDefault();
            }
            // NeedlemanWunsch is a global aligner that should always return exactly one alignment
            if (sequenceAlignment == null || sequenceAlignment.PairwiseAlignedSequences.Count != 1 || sequenceAlignment.PairwiseAlignedSequences[0] == null)
            {
                throw new Exception("NeedlemanWunsch failed to return an alignment when gaps in mummer were being processed");
            }
            return(sequenceAlignment);
        }
Пример #2
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">Reference sequence.</param>
        /// <param name="querySequenceList">List of input sequences.</param>
        /// <returns>A list of sequence alignment.</returns>
        private IList <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> querySequenceList)
        {
            this.ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet);

            IList <IPairwiseSequenceAlignment>    results           = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment            sequenceAlignment = null;
            IList <IEnumerable <DeltaAlignment> > deltaAlignments   = null;
            IList <PairwiseAlignedSequence>       alignments        = null;

            // Validate the input
            Validate(referenceSequenceList.ElementAt(0), querySequenceList);

            deltaAlignments = this.nucmerAlgo.GetDeltaAlignments(referenceSequenceList, querySequenceList);

            ISequence concatReference = ConcatSequence(referenceSequenceList);

            // On each query sequence aligned with reference sequence
            IEnumerator <ISequence> qryEnumerator = querySequenceList.GetEnumerator();

            foreach (var delta in deltaAlignments)
            {
                qryEnumerator.MoveNext();
                sequenceAlignment = new PairwiseSequenceAlignment(concatReference, qryEnumerator.Current);

                // Convert delta alignments to sequence alignments
                alignments = ConvertDeltaToAlignment(delta);

                if (alignments.Count > 0)
                {
                    foreach (PairwiseAlignedSequence align in alignments)
                    {
                        // Calculate the score of alignment
                        align.Score = this.CalculateScore(
                            align.FirstSequence,
                            align.SecondSequence);

                        // Make Consensus
                        align.Consensus = this.MakeConsensus(
                            align.FirstSequence,
                            align.SecondSequence);

                        sequenceAlignment.PairwiseAlignedSequences.Add(align);
                    }
                }

                results.Add(sequenceAlignment);
            }

            return(results);
        }
Пример #3
0
        /// <summary>
        /// Converts the Sequence to a QualitativeSequence in the alignment.
        /// </summary>
        /// <param name="aln">Aln.</param>
        /// <param name="qualScores">Qual scores.</param>
        public static void ConvertAlignedSequenceToQualSeq(IPairwiseSequenceAlignment aln, int[] qualScores) {
            var q = aln.PairwiseAlignedSequences [0].SecondSequence as Sequence;
            var qvs = new int[q.Count];
            int queryPos = 0;
            for (int i = 0; i < qvs.Length; i++) {
                if (q [i] == '-') {
                    qvs [i] = 0;
                } else {
                    qvs [i] = qualScores[queryPos++];
                }            
            }
            var qseq = new QualitativeSequence (DnaAlphabet.Instance, FastQFormatType.Sanger, q.ToArray (), qvs, false);

            aln.PairwiseAlignedSequences [0].SecondSequence = qseq;

        }
Пример #4
0
        /// <summary>
        /// When implemented in a derived class, performs the execution of the activity.
        /// </summary>
        /// <returns>
        /// The result of the activity’s execution.
        /// </returns>
        /// <param name="context">The execution context under which the activity executes.</param>
        protected override ISequenceAlignment Execute(CodeActivityContext context)
        {
            string alignerName = (AlignerName ?? DefaultAligner).ToLowerInvariant();
            var    aligner     = SequenceAligners.All.FirstOrDefault(sa => sa.Name.ToLowerInvariant() == alignerName);

            if (aligner == null)
            {
                throw new ArgumentException("Could not find aligner: " + alignerName);
            }

            aligner.GapOpenCost      = GapOpenCost;
            aligner.GapExtensionCost = GapExtensionCost;

            var smName = SimilarityMatrix ?? DefaultMatrix;

            SimilarityMatrix.StandardSimilarityMatrix sm;
            if (Enum.TryParse(smName, true, out sm))
            {
                aligner.SimilarityMatrix = new SimilarityMatrix(sm);
            }

            ISequenceAlignment result;

            if (GapOpenCost == GapExtensionCost || GapExtensionCost == 0)
            {
                result = aligner.AlignSimple(new[] { FirstSequence.Get(context), SecondSequence.Get(context) }).First();
            }
            else
            {
                result = aligner.Align(new[] { FirstSequence.Get(context), SecondSequence.Get(context) }).First();
            }

            IPairwiseSequenceAlignment pwAlignment = result as IPairwiseSequenceAlignment;

            if (pwAlignment != null)
            {
                if (pwAlignment.PairwiseAlignedSequences.Count > 0)
                {
                    FirstResult.Set(context, pwAlignment.PairwiseAlignedSequences[0].FirstSequence);
                    SecondResult.Set(context, pwAlignment.PairwiseAlignedSequences[0].SecondSequence);
                    Consensus.Set(context, pwAlignment.PairwiseAlignedSequences[0].Consensus);
                }
            }

            return(result);
        }
Пример #5
0
        /// <summary>
        /// Converts the Sequence to a QualitativeSequence in the alignment.
        /// </summary>
        /// <param name="aln">Aln.</param>
        /// <param name="qualScores">Qual scores.</param>
        public static void ConvertAlignedSequenceToQualSeq(IPairwiseSequenceAlignment aln, int[] qualScores)
        {
            var q        = aln.PairwiseAlignedSequences [0].SecondSequence as Sequence;
            var qvs      = new int[q.Count];
            int queryPos = 0;

            for (int i = 0; i < qvs.Length; i++)
            {
                if (q [i] == '-')
                {
                    qvs [i] = 0;
                }
                else
                {
                    qvs [i] = qualScores[queryPos++];
                }
            }
            var qseq = new QualitativeSequence(DnaAlphabet.Instance, FastQFormatType.Sanger, q.ToArray(), qvs, false);

            aln.PairwiseAlignedSequences [0].SecondSequence = qseq;
        }
Пример #6
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignment</returns>
        private IList <IPairwiseSequenceAlignment> Alignment(
            IList <ISequence> referenceSequenceList,
            IList <ISequence> querySequenceList)
        {
            // Initializations
            if (referenceSequenceList.Count > 0)
            {
                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList[0].Alphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = referenceSequenceList[0].Alphabet;
                }
            }

            IList <IPairwiseSequenceAlignment> results           = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         sequenceAlignment = null;
            IList <DeltaAlignment>             deltaAlignments   = null;
            IList <PairwiseAlignedSequence>    alignments        = null;
            ISequence referenceSequence = null;

            // Validate the input
            Validate(referenceSequenceList, querySequenceList);

            // Step:1 concat all the sequences into one sequence
            if (referenceSequenceList.Count > 1)
            {
                referenceSequence = ConcatSequence(referenceSequenceList);
            }
            else
            {
                referenceSequence = referenceSequenceList[0];
            }

            // Getting refernce sequence
            _referenceSequence = referenceSequence;

            // Step2 : building suffix trees using reference sequence
            _suffixTree = BuildSuffixTree(_referenceSequence);

            // On each query sequence aligned with reference sequence
            foreach (ISequence sequence in querySequenceList)
            {
                if (sequence.Equals(referenceSequence))
                {
                    continue;
                }

                sequenceAlignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                // Step3 : streaming process is performed with the query sequence
                _mumList = Streaming(_suffixTree, sequence, LengthOfMUM);

                if (_mumList.Count > 0)
                {
                    // Step 5 : Get the list of Clusters
                    _clusterList = GetClusters(_mumList);

                    // Step 7: Process Clusters and get delta
                    deltaAlignments = ProcessCluster(
                        referenceSequenceList,
                        _clusterList);

                    // Step 8: Convert delta alignments to sequence alignments
                    alignments = ConvertDeltaToAlignment(deltaAlignments);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                align.FirstSequence,
                                align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                align.FirstSequence,
                                align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }
                }

                results.Add(sequenceAlignment);
            }

            return(results);
        }
Пример #7
0
        /// <summary>
        /// Given two byte arrays representing a pairwise alignment, shift them so
        /// that all deletions start as early as possible.  For example:
        ///
        /// <code>
        /// TTTTAAAATTTT  -> Converts to ->  TTTTAAAATTTT
        /// TTTTAA--TTTT                     TTTT--AATTTT
        /// </code>
        ///
        /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second
        /// sequence is the query.  It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants.
        /// </summary>
        /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param>
        /// <param name="callVariants">callVariants.  If true, it will call variants, otherwise the second half of tuple will be null. </param>
        public static Tuple <IPairwiseSequenceAlignment, List <Variant> > LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true)
        {
            if (aln == null)
            {
                throw new NullReferenceException("aln");
            }
            if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1)
            {
                throw new ArgumentException("The pairwise aligned sequence should only have one alignment");
            }
            var frstAln = aln.PairwiseAlignedSequences.First();
            var seq1    = frstAln.FirstSequence;
            var seq2    = frstAln.SecondSequence;

            if (seq1 == null)
            {
                throw new NullReferenceException("seq1");
            }
            else if (seq2 == null)
            {
                throw new NullReferenceException("seq2");
            }

            //TODO: Might implement an ambiguity check later.
            #if FALSE
            if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity)
            {
                throw new ArgumentException("Cannot left align sequences with ambiguous symbols.");
            }
            #endif

            // Note we have to copy unless we can guarantee the array will not be mutated.
            byte[]         refseq = seq1.ToArray();
            ISequence      newQuery;
            List <Variant> variants = null;
            // Call variants for a qualitative sequence
            if (seq2 is QualitativeSequence)
            {
                var qs    = seq2 as QualitativeSequence;
                var query = Enumerable.Zip(qs, qs.GetQualityScores(), (bp, qv) => new BPandQV(bp, (byte)qv, false)).ToArray();
                AlignmentUtils.LeftAlignIndels(refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds(refseq, query);
                if (callVariants)
                {
                    variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                var newQueryQS = new QualitativeSequence(qs.Alphabet,
                                                         qs.FormatType,
                                                         query.Select(z => z.BP).ToArray(),
                                                         query.Select(p => p.QV).ToArray(),
                                                         false);
                newQueryQS.Metadata = seq2.Metadata;
                newQuery            = newQueryQS;
            }
            else if (seq2 is Sequence)      // For a sequence with no QV values.
            {
                var qs    = seq2 as Sequence;
                var query = qs.Select(v => new BPandQV(v, 0, false)).ToArray();
                AlignmentUtils.LeftAlignIndels(refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds(refseq, query);
                // ISequence does not have a setable metadata
                var newQueryS = new Sequence(qs.Alphabet, query.Select(z => z.BP).ToArray(), false);
                newQueryS.Metadata = seq2.Metadata;
                if (callVariants)
                {
                    variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                newQuery = newQueryS;
            }
            else
            {
                throw new ArgumentException("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence.");
            }

            if (aln.FirstSequence != null && aln.FirstSequence.ID != null)
            {
                foreach (var v in variants)
                {
                    v.RefName = aln.FirstSequence.ID;
                }
            }

            var newRef = new Sequence(seq1.Alphabet, refseq, false);
            newRef.ID       = seq1.ID;
            newRef.Metadata = seq1.Metadata;

            newQuery.ID = seq2.ID;

            var newaln = new PairwiseSequenceAlignment(aln.FirstSequence, aln.SecondSequence);
            var pas    = new PairwiseAlignedSequence();
            pas.FirstSequence  = newRef;
            pas.SecondSequence = newQuery;
            newaln.Add(pas);
            return(new Tuple <IPairwiseSequenceAlignment, List <Variant> > (newaln, variants));
        }
Пример #8
0
        /// <summary>
        /// Given a pairwise sequence alignment, call variants, producing
        /// a list of SNPs and Indels found in the alignment.
        ///
        /// This method will first left-align all variants before calling to be consistent with other
        /// software.  The
        /// </summary>
        /// <param name="aln">The Pairwise alignment to call variants with.</param>
        /// <returns></returns>
        ///

        public static List <Variant> CallVariants(IPairwiseSequenceAlignment aln)
        {
            return(LeftAlignIndelsAndCallVariants(aln).Item2);
        }
Пример #9
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method. Store generated MUMs in properties MUMs, SortedMUMs.
        /// Alignment first finds MUMs for all the query sequence, and then
        /// runs pairwise algorithm on gaps to produce alignments.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        /// <param name="querySequenceList">List of input sequences.</param>
        /// <returns>A list of sequence alignments.</returns>
        private IList <IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs(
            ISequence referenceSequence,
            IEnumerable <ISequence> querySequenceList)
        {
            // Get MUMs
            this.mums = new Dictionary <ISequence, IEnumerable <Match> >();
            IList <IPairwiseSequenceAlignment> results   = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         alignment = null;
            IEnumerable <Match> mum;

            if (this.Validate(referenceSequence, querySequenceList))
            {
                // Safety check for public methods to ensure that null
                // inputs are handled.
                if (referenceSequence == null || querySequenceList == null)
                {
                    return(null);
                }

                Sequence seq = referenceSequence as Sequence;
                if (seq == null)
                {
                    throw new ArgumentException(Properties.Resource.OnlySequenceClassSupported);
                }

                MUMmer mummer = new MUMmer(seq);
                mummer.LengthOfMUM = this.LengthOfMUM;
                mummer.NoAmbiguity = this.AmbigiousMatchesAllowed;
                foreach (ISequence sequence in querySequenceList)
                {
                    if (sequence.Equals(referenceSequence))
                    {
                        continue;
                    }

                    alignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                    // Step2 : streaming process is performed with the query sequence
                    if (this.MaximumMatchEnabled)
                    {
                        mum = mummer.GetMatches(sequence);
                    }
                    else
                    {
                        mum = mummer.GetMatchesUniqueInReference(sequence);
                    }

                    this.mums.Add(sequence, mum);

                    // Step3(a) : sorted mum list based on reference sequence
                    LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
                    IList <Match> sortedMumList      = lis.SortMum(GetMumsForLIS(mum));

                    if (sortedMumList.Count > 0)
                    {
                        // Step3(b) : LIS using greedy cover algorithm
                        IList <Match> finalMumList = lis.GetLongestSequence(sortedMumList);

                        if (finalMumList.Count > 0)
                        {
                            // Step 4 : get all the gaps in each sequence and call
                            // pairwise alignment
                            alignment.PairwiseAlignedSequences.Add(
                                this.ProcessGaps(referenceSequence, sequence, finalMumList));
                        }

                        results.Add(alignment);
                    }
                    else
                    {
                        IList <IPairwiseSequenceAlignment> sequenceAlignment = this.RunPairWise(
                            referenceSequence,
                            sequence);

                        foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                        {
                            results.Add(pairwiseAlignment);
                        }
                    }
                }
            }

            return(results);
        }
Пример #10
0
        /// <summary>
        /// Given two byte arrays representing a pairwise alignment, shift them so 
        /// that all deletions start as early as possible.  For example:
        /// 
        /// <code>
        /// TTTTAAAATTTT  -> Converts to ->  TTTTAAAATTTT
        /// TTTTAA--TTTT                     TTTT--AATTTT
        /// </code>
        /// 
        /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second
        /// sequence is the query.  It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants.
        /// </summary>
        /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param>
        /// <param name="callVariants">callVariants.  If true, it will call variants, otherwise the second half of tuple will be null. </param>
        public static Tuple<IPairwiseSequenceAlignment, List<Variant>> LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) {

            if (aln == null) {
                throw new NullReferenceException ("aln");
            }
            if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) {
                throw new ArgumentException ("The pairwise aligned sequence should only have one alignment");
            }
            var frstAln = aln.PairwiseAlignedSequences.First ();
            var seq1 = frstAln.FirstSequence;
            var seq2 = frstAln.SecondSequence;
            if (seq1 == null) {
                throw new NullReferenceException ("seq1");
            } else if (seq2 == null) {
                throw new NullReferenceException ("seq2");
            }

            //TODO: Might implement an ambiguity check later.
            #if FALSE
            if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) {
                throw new ArgumentException ("Cannot left align sequences with ambiguous symbols.");
            }
            #endif

            // Note we have to copy unless we can guarantee the array will not be mutated.
            byte[] refseq = seq1.ToArray ();
            ISequence newQuery;
            List<Variant> variants = null;
            // Call variants for a qualitative sequence
            if (seq2 is QualitativeSequence) {
                var qs = seq2 as QualitativeSequence;
                var query = Enumerable.Zip (qs, qs.GetQualityScores (), (bp, qv) => new BPandQV (bp, (byte)qv, false)).ToArray ();
                AlignmentUtils.LeftAlignIndels (refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds (refseq, query);
                if (callVariants) {
                    variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                var newQueryQS = new QualitativeSequence (qs.Alphabet, 
                    qs.FormatType,
                    query.Select (z => z.BP).ToArray (),
                    query.Select (p => p.QV).ToArray (),
                    false);
                newQueryQS.Metadata = seq2.Metadata;
                newQuery = newQueryQS;
                
            } else if (seq2 is Sequence) {  // For a sequence with no QV values.
                var qs = seq2 as Sequence;
                var query = qs.Select (v => new BPandQV (v, 0, false)).ToArray();
                AlignmentUtils.LeftAlignIndels (refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds (refseq, query);
                // ISequence does not have a setable metadata
                var newQueryS = new Sequence(qs.Alphabet, query.Select(z=>z.BP).ToArray(), false);
                newQueryS.Metadata = seq2.Metadata;
                if (callVariants) {
                    variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                newQuery = newQueryS;
            } else {
                throw new ArgumentException ("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence.");
            }

            if (aln.FirstSequence != null && aln.FirstSequence.ID != null) {
                foreach (var v in variants) {
                    v.RefName = aln.FirstSequence.ID;
                }
            }

            var newRef = new Sequence (seq1.Alphabet, refseq, false);
            newRef.ID = seq1.ID;
            newRef.Metadata = seq1.Metadata;

            newQuery.ID = seq2.ID;

            var newaln = new PairwiseSequenceAlignment (aln.FirstSequence, aln.SecondSequence);
            var pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = newRef;
            pas.SecondSequence = newQuery;
            newaln.Add (pas);
            return new Tuple<IPairwiseSequenceAlignment, List<Variant>> (newaln, variants);
        }
Пример #11
0
        /// <summary>
        /// Given a pairwise sequence alignment, call variants, producing
        /// a list of SNPs and Indels found in the alignment.
        /// 
        /// This method will first left-align all variants before calling to be consistent with other
        /// software.  The 
        /// </summary>
        /// <param name="aln">The Pairwise alignment to call variants with.</param>
        /// <returns></returns>
        /// 

        public static List<Variant> CallVariants(IPairwiseSequenceAlignment aln) {
            return LeftAlignIndelsAndCallVariants (aln).Item2;
        }
Пример #12
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method. Store generated MUMs in properties MUMs, SortedMUMs.
        /// Alignment first finds MUMs for all the query sequence, and then
        /// runs pairwise algorithm on gaps to produce alignments.
        /// </summary>
        /// <param name="referenceSequence">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignments</returns>
        private IList <IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs(
            ISequence referenceSequence,
            IList <ISequence> querySequenceList)
        {
            // Get MUMs
            IDictionary <ISequence, IList <MaxUniqueMatch> > queryMums = new Dictionary <ISequence, IList <MaxUniqueMatch> >();

            _mums      = new Dictionary <ISequence, IList <MaxUniqueMatch> >();
            _finalMums = new Dictionary <ISequence, IList <MaxUniqueMatch> >();

            if (Validate(referenceSequence, querySequenceList))
            {
                IList <MaxUniqueMatch> mumList;

                // Step1 : building suffix trees using reference sequence
                SequenceSuffixTree suffixTree = BuildSuffixTree(referenceSequence);

                // On each query sequence aligned with reference sequence
                foreach (ISequence sequence in querySequenceList)
                {
                    if (sequence.Equals(referenceSequence))
                    {
                        continue;
                    }

                    // Step2 : streaming process is performed with the query sequence
                    mumList = Streaming(suffixTree, sequence, LengthOfMUM);
                    _mums.Add(sequence, mumList);

                    // Step3(a) : sorted mum list based on reference sequence
                    mumList = SortMum(mumList);

                    if (mumList.Count > 0)
                    {
                        // Step3(b) : LIS using greedy cover algorithm
                        mumList = CollectLongestIncreasingSubsequence(mumList);
                    }
                    else
                    {
                        mumList = null;
                    }

                    _finalMums.Add(sequence, mumList);
                }
            }

            IList <IPairwiseSequenceAlignment> results   = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         alignment = null;

            if (MUMs != null && FinalMUMs != null)
            {
                // Getting refernce sequence
                _referenceSequence = referenceSequence;

                // On each query sequence aligned with reference sequence
                foreach (var finalMum in FinalMUMs)
                {
                    var sequence = finalMum.Key;
                    _mumList      = MUMs[sequence];
                    _finalMumList = finalMum.Value;

                    alignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                    if (_mumList.Count > 0)
                    {
                        if (_finalMumList.Count > 0)
                        {
                            // Step 4 : get all the gaps in each sequence and call
                            // pairwise alignment
                            alignment.PairwiseAlignedSequences.Add(ProcessGaps(referenceSequence, sequence));
                        }

                        results.Add(alignment);
                    }
                    else
                    {
                        IList <IPairwiseSequenceAlignment> sequenceAlignment = RunPairWise(
                            referenceSequence,
                            sequence);

                        foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                        {
                            results.Add(pairwiseAlignment);
                        }
                    }
                }
            }

            return(results);
        }
Пример #13
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method. Does not store MUMs, processes MUMs and gaps to find
        /// alignment directly.
        /// </summary>
        /// <param name="referenceSequence">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignments</returns>
        private IList <IPairwiseSequenceAlignment> AlignmentWithoutAccumulatedMUMs(
            ISequence referenceSequence,
            IList <ISequence> querySequenceList)
        {
            IList <IPairwiseSequenceAlignment> results   = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         alignment = null;

            if (Validate(referenceSequence, querySequenceList))
            {
                // Safety check for public methods to ensure that null
                // inputs are handled.
                if (referenceSequence == null || querySequenceList == null)
                {
                    return(null);
                }

                // Getting refernce sequence
                _referenceSequence = referenceSequence;

                // Step1 : building suffix trees using reference sequence
                _suffixTree = BuildSuffixTree(_referenceSequence);

                // On each query sequence aligned with reference sequence
                foreach (ISequence sequence in querySequenceList)
                {
                    if (sequence.Equals(referenceSequence))
                    {
                        continue;
                    }

                    alignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                    // Step2 : streaming process is performed with the query sequence
                    _mumList = Streaming(_suffixTree, sequence, LengthOfMUM);

                    // Step3(a) : sorted mum list based on reference sequence
                    _sortedMumList = SortMum(_mumList);

                    if (_sortedMumList.Count > 0)
                    {
                        // Step3(b) : LIS using greedy cover algorithm
                        _finalMumList = CollectLongestIncreasingSubsequence(_sortedMumList);

                        if (_finalMumList.Count > 0)
                        {
                            // Step 4 : get all the gaps in each sequence and call
                            // pairwise alignment
                            alignment.PairwiseAlignedSequences.Add(ProcessGaps(referenceSequence, sequence));
                        }

                        results.Add(alignment);
                    }
                    else
                    {
                        IList <IPairwiseSequenceAlignment> sequenceAlignment = RunPairWise(
                            referenceSequence,
                            sequence);

                        foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                        {
                            results.Add(pairwiseAlignment);
                        }
                    }
                }
            }

            return(results);
        }