Ejemplo n.º 1
0
        /// <summary>
        /// Calculate pairwise score of a pair of aligned sequences.
        /// The score is the sum over all position score given by the similarity matrix.
        /// The positions with only indels, e.g. gaps, are discarded. Gaps in the remaining
        /// columns are assessed affined score: g + w * e, where g is open penalty, and e
        /// is extension penalty.
        /// </summary>
        /// <param name="sequenceA">aligned sequence</param>
        /// <param name="sequenceB">aligned sequence</param>
        /// <param name="similarityMatrix">similarity matrix</param>
        /// <param name="gapOpenPenalty">negative open gap penalty</param>
        /// <param name="gapExtensionPenalty">negative extension gap penalty</param>
        public static float PairWiseScoreFunction(ISequence sequenceA, ISequence sequenceB, SimilarityMatrix similarityMatrix,
                                                  int gapOpenPenalty, int gapExtensionPenalty)
        {
            if (sequenceA.Count != sequenceB.Count)
            {
                throw new Exception("Unaligned sequences");
            }
            float result = 0;

            bool isGapA = false;
            bool isGapB = false;

            for (int i = 0; i < sequenceA.Count; ++i)
            {
                if (sequenceA[i].IsGap && sequenceB[i].IsGap)
                {
                    continue;
                }
                if (sequenceA[i].IsGap && !sequenceB[i].IsGap)
                {
                    if (isGapB)
                    {
                        isGapB = false;
                    }
                    if (isGapA)
                    {
                        result += gapExtensionPenalty;
                    }
                    else
                    {
                        result += gapOpenPenalty;
                        isGapA  = true;
                    }
                    continue;
                }
                if (!sequenceA[i].IsGap && sequenceB[i].IsGap)
                {
                    if (isGapA)
                    {
                        isGapA = false;
                    }
                    if (isGapB)
                    {
                        result += gapExtensionPenalty;
                    }
                    else
                    {
                        result += gapOpenPenalty;
                        isGapB  = true;
                    }
                    continue;
                }

                byte[] a = similarityMatrix.ToByteArray(sequenceA[i].Symbol.ToString());
                byte[] b = similarityMatrix.ToByteArray(sequenceB[i].Symbol.ToString());
                result += similarityMatrix[a[0], b[0]];
            }
            return(result);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Performs initializations and validations required
        /// before carrying out sequence alignment.
        /// Initializes only gap open penalty. Initialization for
        /// gap extension, if required, has to be done seperately.
        /// </summary>
        /// <param name="similarityMatrix">Scoring matrix.</param>
        /// <param name="gapPenalty">Gap open penalty (by convention, use a negative number for this.)</param>
        /// <param name="aInput">First input sequence.</param>
        /// <param name="bInput">Second input sequence.</param>
        private void SimpleAlignPrimer(SimilarityMatrix similarityMatrix, int gapPenalty, ISequence aInput, ISequence bInput)
        {
            InitializeAlign(aInput);
            ResetSpecificAlgorithmMemberVariables();

            // Set Gap Penalty and Similarity Matrix
            _gapOpenCost = gapPenalty;

            // note that _gapExtensionCost is not used for simple gap penalty
            _similarityMatrix = similarityMatrix;

            ValidateAlignInput(aInput, bInput);  // throws exception if input not valid

            // Convert input strings to 0-based int arrays using similarity matrix mapping
            _a = similarityMatrix.ToByteArray(aInput.ToString());
            _b = similarityMatrix.ToByteArray(bInput.ToString());
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Align the Gap by executing pairwise alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query Sequence</param>
        /// <param name="sequenceResult1">Editable sequence containing alignment first result</param>
        /// <param name="sequenceResult2">Editable sequence containing alignment second result</param>
        /// <param name="consensusResult">Editable sequence containing consensus sequence</param>
        /// <param name="mum1">First MUM of Gap</param>
        /// <param name="mum2">Second MUM of Gap</param>
        /// <param name="insertions">Insetions made to the aligned sequences.</param>
        /// <returns>Score of alignment</returns>
        private int AlignGap(
            ISequence referenceSequence,
            ISequence querySequence,
            ISequence sequenceResult1,
            ISequence sequenceResult2,
            ISequence consensusResult,
            MaxUniqueMatch mum1,
            MaxUniqueMatch mum2,
            out List <int> insertions)
        {
            int       score     = 0;
            ISequence sequence1 = null;
            ISequence sequence2 = null;
            IList <IPairwiseSequenceAlignment> sequenceAlignment = null;
            string mum1String = string.Empty;
            string mum2String = string.Empty;

            insertions = new List <int>(2);
            insertions.Add(0);
            insertions.Add(0);

            int mum1ReferenceStartIndex = 0;
            int mum1QueryStartIndex     = 0;
            int mum1Length = 0;
            int mum2ReferenceStartIndex = 0;
            int mum2QueryStartIndex     = 0;
            int mum2Length = 0;

            if (null != mum1)
            {
                mum1ReferenceStartIndex = mum1.FirstSequenceStart;
                mum1QueryStartIndex     = mum1.SecondSequenceStart;
                mum1Length = mum1.Length;
            }

            if (null != mum2)
            {
                mum2ReferenceStartIndex = mum2.FirstSequenceStart;
                mum2QueryStartIndex     = mum2.SecondSequenceStart;
                mum2Length = mum2.Length;
            }
            else
            {
                mum2ReferenceStartIndex = referenceSequence.Count;
                mum2QueryStartIndex     = querySequence.Count;
            }

            int referenceGapStartIndex = mum1ReferenceStartIndex + mum1Length;
            int queryGapStartIndex     = mum1QueryStartIndex + mum1Length;

            if (mum2ReferenceStartIndex > referenceGapStartIndex &&
                mum2QueryStartIndex > queryGapStartIndex)
            {
                sequence1 = referenceSequence.Range(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);
                sequence2 = querySequence.Range(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);

                sequenceAlignment = RunPairWise(sequence1, sequence2);

                if (sequenceAlignment != null)
                {
                    foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                    {
                        foreach (PairwiseAlignedSequence alignment in pairwiseAlignment.PairwiseAlignedSequences)
                        {
                            sequenceResult1.InsertRange(
                                sequenceResult1.Count,
                                alignment.FirstSequence.ToString());
                            sequenceResult2.InsertRange(
                                sequenceResult2.Count,
                                alignment.SecondSequence.ToString());
                            consensusResult.InsertRange(
                                consensusResult.Count,
                                alignment.Consensus.ToString());

                            score += alignment.Score;

                            if (alignment.Metadata.ContainsKey("Insertions"))
                            {
                                List <int> gapinsertions = alignment.Metadata["Insertions"] as List <int>;
                                if (gapinsertions != null)
                                {
                                    if (gapinsertions.Count > 0)
                                    {
                                        insertions[0] += gapinsertions[0];
                                    }

                                    if (gapinsertions.Count > 1)
                                    {
                                        insertions[1] += gapinsertions[1];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            else if (mum2ReferenceStartIndex > referenceGapStartIndex)
            {
                sequence1 = referenceSequence.Range(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);

                sequenceResult1.InsertRange(sequenceResult1.Count, sequence1.ToString());
                sequenceResult2.InsertRange(sequenceResult2.Count, CreateDefaultGap(sequence1.Count));
                consensusResult.InsertRange(consensusResult.Count, sequence1.ToString());

                insertions[1] += sequence1.Count;

                if (UseGapExtensionCost)
                {
                    score = GapOpenCost + ((sequence1.Count - 1) * GapExtensionCost);
                }
                else
                {
                    score = sequence1.Count * GapOpenCost;
                }
            }
            else if (mum2QueryStartIndex > queryGapStartIndex)
            {
                sequence2 = querySequence.Range(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);

                sequenceResult1.InsertRange(sequenceResult1.Count, CreateDefaultGap(sequence2.Count));
                sequenceResult2.InsertRange(sequenceResult2.Count, sequence2.ToString());
                consensusResult.InsertRange(consensusResult.Count, sequence2.ToString());

                insertions[0] += sequence2.Count;

                if (UseGapExtensionCost)
                {
                    score = GapOpenCost + ((sequence2.Count - 1) * GapExtensionCost);
                }
                else
                {
                    score = sequence2.Count * GapOpenCost;
                }
            }

            // Add the MUM to the result
            if (0 < mum2Length)
            {
                mum1String = referenceSequence.Range(
                    mum2ReferenceStartIndex,
                    mum2Length).ToString();
                sequenceResult1.InsertRange(sequenceResult1.Count, mum1String);

                mum2String = querySequence.Range(
                    mum2QueryStartIndex,
                    mum2Length).ToString();
                sequenceResult2.InsertRange(sequenceResult2.Count, mum2String);
                consensusResult.InsertRange(consensusResult.Count, mum1String);

                // Get the byte array (indices of symbol in MUM)
                byte[] indices = SimilarityMatrix.ToByteArray(mum1String);

                // Calculate the score
                foreach (byte index in indices)
                {
                    score += SimilarityMatrix[index, index];
                }
            }

            return(score);
        }