Exemple #1
0
 /// <summary>
 /// Initializes a new instance of the MaxUniqueMatchExtension class
 /// </summary>
 /// <param name="mum">Maximum Unique Match</param>
 public MatchExtension(Match mum)
 {
     this.ReferenceSequenceOffset = mum.ReferenceSequenceOffset;
     this.QuerySequenceOffset = mum.QuerySequenceOffset;
     this.Length = mum.Length;
     this.IsGood = false;
     this.IsTentative = false;
 }
        public void TestLISWithCross1()
        {
            // Create a list of Mum classes.
            List<Match> MUM = new List<Match>();
            Match mum;

            mum = new Match();
            mum.ReferenceSequenceOffset = 0;
            mum.Length = 4;
            mum.QuerySequenceOffset = 4;
            MUM.Add(mum);

            mum = new Match();
            mum.ReferenceSequenceOffset = 4;
            mum.Length = 3;
            mum.QuerySequenceOffset = 0;
            MUM.Add(mum);

            mum = new Match();
            mum.ReferenceSequenceOffset = 10;
            mum.Length = 3;
            mum.QuerySequenceOffset = 10;
            MUM.Add(mum);

            //ILongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
            LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
            IList<Match> lisList = lis.SortMum(MUM);
            IList<Match> lisList1 = lis.GetLongestSequence(lisList);

            List<Match> expectedOutput = new List<Match>();
            mum = new Match();
            mum.ReferenceSequenceOffset = 0;
            mum.Length = 4;
            mum.QuerySequenceOffset = 4;
            expectedOutput.Add(mum);

            mum = new Match();
            mum.ReferenceSequenceOffset = 10;
            mum.Length = 3;
            mum.QuerySequenceOffset = 10;
            expectedOutput.Add(mum);

            Assert.IsTrue(CompareMumList(lisList1, expectedOutput));
        }
        /// <summary>
        /// Find the longest increasing sub sequence from the given set of MUMs.
        /// </summary>
        /// <param name="sortedMums">List of sorted MUMs.</param>
        /// <returns>Longest Increasing Subsequence.</returns>
        public IList<Match> GetLongestSequence(IList<Match> sortedMums)
        {
            if (sortedMums == null)
            {
                return null;
            }

            MatchExtension[] matches = ConvertToMUMExtension(sortedMums);

            for (var counteri = 0; counteri < matches.Length; counteri++)
            {
                var matches_i = matches[counteri];

                // Initialize the MUM Extension
                matches_i.Score = matches[counteri].Length;
                matches_i.WrapScore = matches[counteri].Length;
                matches_i.Adjacent = 0;
                matches_i.From = -1;

                for (var counterj = 0; counterj < counteri; counterj++)
                {
                    MatchExtension matches_j = matches[counterj];

                    // Find the overlap in query sequence of MUM
                    var overlap2 = matches_j.QuerySequenceOffset + matches_j.Length;

                    overlap2 -= matches_i.QuerySequenceOffset;
                    var overlap = overlap2 > 0 ? overlap2 : 0;

                    // Calculate the score for query sequence of MUM
                    var score = matches_j.Score
                                + matches_i.Length
                                - overlap;
                    if (score > matches_i.WrapScore)
                    {
                        matches_i.WrapScore = score;
                    }

                    // Find the overlap in reference sequence of MUM
                    var overlap1 = matches_j.ReferenceSequenceOffset
                                    + matches_j.Length
                                    - matches_i.ReferenceSequenceOffset;

                    overlap = overlap > overlap1 ? overlap : overlap1;

                    score = matches_j.Score
                            + matches_i.Length
                            - overlap;
                    if (score > matches_i.Score)
                    {
                        // To remove crosses, mark counteri as next MUM From counterj
                        // without any crosses
                        matches_i.From = counterj;

                        // Set the new score and overlap after removing the cross
                        matches_i.Score = score;
                        matches_i.Adjacent = overlap;
                    }

                    // Calculate the score for reference sequence of MUM
                    score = matches_j.WrapScore
                            + matches_i.Length
                            - overlap;
                    if (score >= matches_i.WrapScore)
                    {
                        matches_i.WrapScore = score;
                    }
                }
            }

            // Find the best longest increasing subsequence
            // Sequence with highest score is the longest increasing subsequence
            long best = 0;
            long bestScore = matches[best].Score;
            for (long counteri = 1; counteri < matches.Length; counteri++)
            {
                if (matches[counteri].Score > bestScore)
                {
                    best = counteri;
                    bestScore = matches[best].Score;
                }
            }

            // Mark the MUMs in longest increasing subsequence as "Good"
            for (long counteri = best; counteri >= 0; counteri = matches[counteri].From)
            {
                matches[counteri].IsGood = true;
            }

            IList<Match> outputMums = new List<Match>();
            foreach (MatchExtension t in matches)
            {
                if (t.IsGood)
                {
                    var adjacent = t.Adjacent;
                    if (0 != adjacent)
                    {
                        t.ReferenceSequenceOffset += adjacent;
                        t.QuerySequenceOffset += adjacent;
                        t.Length -= adjacent;
                    }

                    if (0 < t.Length)
                    {
                        Match match = new Match();
                        match.Length = t.Length;
                        match.QuerySequenceOffset = t.QuerySequenceOffset;
                        match.ReferenceSequenceOffset = t.ReferenceSequenceOffset;
                        outputMums.Add(match);
                    }
                }
            }

            // Return the list of MUMs that represent the longest increasing subsequence
            return outputMums;
        }
Exemple #4
0
        /// <summary>
        /// Align the Gap by executing pairwise alignment.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        /// <param name="querySequence">Query Sequence.</param>
        /// <param name="sequenceResult1">Editable sequence containing alignment first result.</param>
        /// <param name="sequenceResult2">Editable sequence containing alignment second result.</param>
        /// <param name="consensusResult">Editable sequence containing consensus sequence.</param>
        /// <param name="mum1">First MUM of Gap.</param>
        /// <param name="mum2">Second MUM of Gap.</param>
        /// <param name="insertions">Insertions made to the aligned sequences.</param>
        /// <returns>Score of alignment.</returns>
        private long AlignGap(
                ISequence referenceSequence,
                ISequence querySequence,
                List<byte> sequenceResult1,
                List<byte> sequenceResult2,
                List<byte> consensusResult,
                Match mum1,
                Match mum2,
                out List<long> insertions)
        {
            long score = 0;
            ISequence sequence1 = null;
            ISequence sequence2 = null;
            IList<IPairwiseSequenceAlignment> sequenceAlignment = null;
            byte[] mum1String;
            byte[] mum2String;

            insertions = new List<long>(2);
            insertions.Add(0);
            insertions.Add(0);

            long mum1ReferenceStartIndex = 0;
            long mum1QueryStartIndex = 0;
            long mum1Length = 0;
            long mum2ReferenceStartIndex = 0;
            long mum2QueryStartIndex = 0;
            long mum2Length = 0;

            if (mum1.Length != 0)
            {
                mum1ReferenceStartIndex = mum1.ReferenceSequenceOffset;
                mum1QueryStartIndex = mum1.QuerySequenceOffset;
                mum1Length = mum1.Length;
            }

            if (mum2.Length != 0)
            {
                mum2ReferenceStartIndex = mum2.ReferenceSequenceOffset;
                mum2QueryStartIndex = mum2.QuerySequenceOffset;
                mum2Length = mum2.Length;
            }
            else
            {
                mum2ReferenceStartIndex = referenceSequence.Count;
                mum2QueryStartIndex = querySequence.Count;
            }

            long referenceGapStartIndex = mum1ReferenceStartIndex + mum1Length;
            long queryGapStartIndex = mum1QueryStartIndex + mum1Length;

            if (mum2ReferenceStartIndex > referenceGapStartIndex
                && mum2QueryStartIndex > queryGapStartIndex)
            {
                sequence1 = referenceSequence.GetSubSequence(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);
                sequence2 = querySequence.GetSubSequence(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);

                sequenceAlignment = this.RunPairWise(sequence1, sequence2);

                if (sequenceAlignment != null)
                {
                    foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                    {
                        foreach (PairwiseAlignedSequence alignment in pairwiseAlignment.PairwiseAlignedSequences)
                        {
                            sequenceResult1.InsertRange(
                                    sequenceResult1.Count,
                                    alignment.FirstSequence);
                            sequenceResult2.InsertRange(
                                    sequenceResult2.Count,
                                    alignment.SecondSequence);
                            consensusResult.InsertRange(
                                consensusResult.Count,
                                    alignment.Consensus);

                            score += alignment.Score;

                            if (alignment.Metadata.ContainsKey("Insertions"))
                            {
                                List<int> gapinsertions = alignment.Metadata["Insertions"] as List<int>;
                                if (gapinsertions != null)
                                {
                                    if (gapinsertions.Count > 0)
                                    {
                                        insertions[0] += gapinsertions[0];
                                    }

                                    if (gapinsertions.Count > 1)
                                    {
                                        insertions[1] += gapinsertions[1];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            else if (mum2ReferenceStartIndex > referenceGapStartIndex)
            {
                sequence1 = referenceSequence.GetSubSequence(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);

                sequenceResult1.InsertRange(sequenceResult1.Count, sequence1);
                sequenceResult2.InsertRange(sequenceResult2.Count, CreateDefaultGap(sequence1.Count));
                consensusResult.InsertRange(consensusResult.Count, sequence1);

                insertions[1] += sequence1.Count;

                if (this.UseGapExtensionCost)
                {
                    score = this.GapOpenCost + ((sequence1.Count - 1) * this.GapExtensionCost);
                }
                else
                {
                    score = sequence1.Count * this.GapOpenCost;
                }
            }
            else if (mum2QueryStartIndex > queryGapStartIndex)
            {
                sequence2 = querySequence.GetSubSequence(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);

                sequenceResult1.InsertRange(sequenceResult1.Count, CreateDefaultGap(sequence2.Count));
                sequenceResult2.InsertRange(sequenceResult2.Count, sequence2);
                consensusResult.InsertRange(consensusResult.Count, sequence2);

                insertions[0] += sequence2.Count;

                if (this.UseGapExtensionCost)
                {
                    score = this.GapOpenCost + ((sequence2.Count - 1) * this.GapExtensionCost);
                }
                else
                {
                    score = sequence2.Count * this.GapOpenCost;
                }
            }

            // Add the MUM to the result
            if (0 < mum2Length)
            {
                mum1String = referenceSequence.GetSubSequence(
                        mum2ReferenceStartIndex,
                        mum2Length).ToArray();
                sequenceResult1.InsertRange(sequenceResult1.Count, mum1String);

                mum2String = querySequence.GetSubSequence(
                        mum2QueryStartIndex,
                        mum2Length).ToArray();
                sequenceResult2.InsertRange(sequenceResult2.Count, mum2String);
                consensusResult.InsertRange(consensusResult.Count, mum1String);

                foreach (byte index in mum1String)
                {
                    score += SimilarityMatrix[index, index];
                }
            }

            return score;
        }
        public void ValidateMatchAndMatchExtensionToString()
        {
            var match = new Match();
            match.Length = 20;
            match.QuerySequenceOffset = 33;

            var matchExtn = new MatchExtension(match);
            matchExtn.ID = 1;
            matchExtn.Length = 20;

            string actualMatchExtnString = matchExtn.ToString();
            string actualMatchstring = match.ToString();
            string ExpectedMatchExtnString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                             Constants.ExpectedMatchExtnStringNode);
            string ExpectedMatchString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                         Constants.ExpectedMatchStringNode);

            Assert.AreEqual(ExpectedMatchExtnString, actualMatchExtnString);
            Assert.AreEqual(actualMatchstring, ExpectedMatchString);
        }
        public void ValidateClusterToString()
        {
            var match = new Match();

            var matchExtn1 = new MatchExtension(match);
            matchExtn1.ID = 1;
            matchExtn1.Length = 20;
            var matchExtn2 = new MatchExtension(match);
            matchExtn2.ID = 2;
            matchExtn2.Length = 30;
            IList<MatchExtension> extnList = new List<MatchExtension>();
            extnList.Add(matchExtn1);
            extnList.Add(matchExtn2);

            var clust = new Cluster(extnList);
            string actualString = clust.ToString();
            string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                    Constants.ClusterExpectedNode);
            Assert.AreEqual(actualString, expectedString.Replace("\\r\\n", System.Environment.NewLine));
        }
Exemple #7
0
        /// <summary>
        ///     Gets the matches where length is greater than or equal to the MinLengthOfMatch.
        /// </summary>
        /// <param name="searchSequence">Query sequence to search.</param>
        /// <returns>Returns IEnumerable of matches.</returns>
        public IEnumerable<Match> SearchMatches(ISequence searchSequence)
        {
            // LastQueryEndIndex ->  (LastQueryStartIndex - LastRefStartIndex )-> LastRefEndIndex -> LastRefStartIndex
            var overlappingMatches = new SortedList<long, Dictionary<long, SortedList<long, SortedSet<long>>>>();
            var edgesFound = new Stack<EdgesFound>();
            long minLengthOfMatch = this.MinLengthOfMatch;
            bool noambiguity = this.NoAmbiguity;
            long queryIndex;
            long querySequenceLength = searchSequence.Count;
            long lengthOfMatchFound = 0;

            var match = new Match();

            if (minLengthOfMatch <= 0)
            {
                throw new ArgumentOutOfRangeException(Resource.MinLengthMustBeGreaterThanZero);
            }

            if (!(searchSequence is Sequence))
            {
                throw new ArgumentException(Resource.OnlySequenceClassSupported);
            }

            // Get base alphabet of the searchSequence.
            IAlphabet searchSeqBaseAlphabet = searchSequence.Alphabet;
            IAlphabet alphabet;
            while (Alphabets.AlphabetToBaseAlphabetMap.TryGetValue(searchSeqBaseAlphabet, out alphabet))
            {
                searchSeqBaseAlphabet = alphabet;
            }

            // If base alphabets are not same then throw the exception.
            if (searchSeqBaseAlphabet != this.supportedBaseAlphabet)
            {
                throw new ArgumentException(Resource.AlphabetMisMatch);
            }

            ISequence convertedSearchSeq = ProcessQuerySequence(searchSequence, noambiguity);

            long lengthOfMatchInEdge = 0;
            long edgeStartIndex = 0;
            long childStartIndexToSkip = -1;

            MultiWaySuffixEdge edge = this.rootEdge;
            MultiWaySuffixEdge previousIntermediateEdge = this.rootEdge;

            for (queryIndex = 0; queryIndex <= querySequenceLength - minLengthOfMatch; queryIndex++)
            {
                // if the previousIntermediateEdge is rootEdge then start from the begining.
                if (previousIntermediateEdge.StartIndex == -1 && lengthOfMatchInEdge > 0)
                {
                    lengthOfMatchInEdge--;
                }

                MultiWaySuffixEdge suffixLink = previousIntermediateEdge.SuffixLink[0];
                MultiWaySuffixEdge childEdgePointToParent = previousIntermediateEdge;
                bool suffixLinkPointsToParentEdge = false;

                // Verify whether SuffixLink points to its parent or not.
                if (suffixLink.StartIndex == previousIntermediateEdge.StartIndex - 1
                    && previousIntermediateEdge.SuffixLink[0].StartIndex != -1)
                {
                    int suffixLinkChildCount = suffixLink.Children.Length;

                    for (int suffixLinkChildIndex = 0;
                         suffixLinkChildIndex < suffixLinkChildCount;
                         suffixLinkChildIndex++)
                    {
                        if (suffixLink.Children[suffixLinkChildIndex].Children == previousIntermediateEdge.Children)
                        {
                            suffixLinkPointsToParentEdge = true;
                            edgesFound.Clear();
                            break;
                        }
                    }
                }

                // Go to the next query index by following the suffix link of the previousintermediate edge.
                // This will reduce the searching from the root. In this case lengthOfMatchFound will be deducted by 1.

                // As suffix link always point to another intermediate edge.
                // Note: suffix link for the root is root ifself.
                previousIntermediateEdge = suffixLink;
                lengthOfMatchFound--;

                if (lengthOfMatchFound < 0)
                {
                    lengthOfMatchFound = 0;
                }

                long searchIndex = queryIndex + lengthOfMatchFound - lengthOfMatchInEdge;
                int childCount = previousIntermediateEdge.Children.Length;
                byte refSymbol, querySymbol;

                if (lengthOfMatchInEdge > 0)
                {
                    querySymbol = convertedSearchSeq[searchIndex];
                    for (int index = 0; index < childCount; index++)
                    {
                        edge = previousIntermediateEdge.Children[index];
                        edgeStartIndex = edge.StartIndex;
                        refSymbol = TerminatingSymbol;

                        if (edgeStartIndex < this.symbolsCount)
                        {
                            refSymbol = this.referenceSequence[edgeStartIndex];
                        }

                        if (refSymbol == querySymbol)
                        {
                            break;
                        }
                    }

                    // When lengthOfMatchInEdge >0 there will be an edge from the previousIntermediateEdge.
                    while (!edge.IsLeaf)
                    {
                        long edgeEndIndex = edge.Children[0].StartIndex - 1;

                        // compare the first symbol of the edge.
                        long edgeSymbolCount = edgeEndIndex - edgeStartIndex + 1;
                        if (lengthOfMatchInEdge == edgeSymbolCount)
                        {
                            searchIndex += lengthOfMatchInEdge;

                            if (searchIndex != querySequenceLength)
                            {
                                lengthOfMatchInEdge = 0;
                                previousIntermediateEdge = edge;
                            }

                            break;
                        }
                        if (lengthOfMatchInEdge > edgeSymbolCount)
                        {
                            lengthOfMatchInEdge -= edgeSymbolCount;
                            searchIndex += edgeSymbolCount;

                            long edgeChildCount = edge.Children.Length;

                            querySymbol = convertedSearchSeq[searchIndex];

                            for (int edgeChildIndex = 0; edgeChildIndex < edgeChildCount; edgeChildIndex++)
                            {
                                if (this.referenceSequence[edge.Children[edgeChildIndex].StartIndex] == querySymbol)
                                {
                                    // get the child of edge and continue searching.
                                    previousIntermediateEdge = edge;
                                    edgeStartIndex = edge.Children[edgeChildIndex].StartIndex;
                                    if (lengthOfMatchFound - lengthOfMatchInEdge >= minLengthOfMatch)
                                    {
                                        edgesFound.Push(
                                            new EdgesFound
                                                {
                                                    Edge = previousIntermediateEdge,
                                                    LengthOfMatch = lengthOfMatchFound - lengthOfMatchInEdge
                                                });
                                        childStartIndexToSkip = edgeStartIndex;
                                    }
                                    edge = edge.Children[edgeChildIndex];
                                    break;
                                }
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                }

                bool continueSearch = true;

                if (lengthOfMatchInEdge > 0)
                {
                    // no need to continue with search as search is ended inside the edge.
                    continueSearch = false;

                    if (lengthOfMatchFound >= minLengthOfMatch)
                    {
                        // Set -1 so that it wont match with start index of any child edge.
                        edgesFound.Push(new EdgesFound { Edge = edge, LengthOfMatch = lengthOfMatchFound });
                        childStartIndexToSkip = -1;
                    }
                }

                if (queryIndex + lengthOfMatchFound >= querySequenceLength)
                {
                    // no need continue with the seach as entaire query sequence is 
                    // searched and rest of the result can be found using suffix links.
                    continueSearch = false;
                }

                while (continueSearch)
                {
                    querySymbol = 0;
                    if (searchIndex < querySequenceLength)
                    {
                        querySymbol = convertedSearchSeq[searchIndex];
                    }

                    int edgeIndex = -1;

                    childCount = previousIntermediateEdge.Children.Length;
                    for (int childIndex = 0; childIndex < childCount; childIndex++)
                    {
                        edge = previousIntermediateEdge.Children[childIndex];
                        edgeStartIndex = edge.StartIndex;

                        refSymbol = TerminatingSymbol;

                        if (edgeStartIndex < this.symbolsCount)
                        {
                            refSymbol = this.referenceSequence[edgeStartIndex];
                        }

                        if (refSymbol == querySymbol)
                        {
                            edgeIndex = childIndex;
                            break;
                        }
                    }

                    if (edgeIndex == -1)
                    {
                        lengthOfMatchInEdge = 0;
                        continueSearch = false;

                        if (lengthOfMatchFound >= minLengthOfMatch)
                        {
                            // Set -1 so that it wont match with start index of any child edge.
                            edgesFound.Push(
                                new EdgesFound { Edge = previousIntermediateEdge, LengthOfMatch = lengthOfMatchFound });
                            childStartIndexToSkip = -1;
                        }
                    }
                    else
                    {
                        if (lengthOfMatchFound >= minLengthOfMatch)
                        {
                            edgesFound.Push(
                                new EdgesFound { Edge = previousIntermediateEdge, LengthOfMatch = lengthOfMatchFound });
                            childStartIndexToSkip = edge.StartIndex;
                        }

                        searchIndex++;
                        lengthOfMatchFound++;
                        lengthOfMatchInEdge = 1;

                        // Get the endIndex of the edge found.
                        long edgeEndIndex = this.symbolsCount;

                        if (!edge.IsLeaf)
                        {
                            // return the minimum start index of children -1
                            edgeEndIndex = edge.Children[0].StartIndex - 1;
                        }

                        long edgeLength = edgeEndIndex - edgeStartIndex + 1;

                        for (long referenceIndex = edgeStartIndex + 1; referenceIndex <= edgeEndIndex; referenceIndex++)
                        {
                            refSymbol = TerminatingSymbol;
                            if (referenceIndex < this.symbolsCount)
                            {
                                refSymbol = this.referenceSequence[referenceIndex];
                            }

                            querySymbol = 0;
                            if (searchIndex < querySequenceLength)
                            {
                                querySymbol = convertedSearchSeq[searchIndex];
                            }

                            // Stop searching if any one of the following conditions is true.
                            // 1. Reached end of the query sequence
                            // 2. Reached end of the leaf edge.
                            // 3. Symbols are not matching
                            if (refSymbol != querySymbol)
                            {
                                break;
                            }

                            searchIndex++;
                            lengthOfMatchFound++;
                            lengthOfMatchInEdge++;
                        }

                        // Can't continue with search if the following conditions met thus add the edge to the stack.
                        // 1. Edge is a leaf edge regardless of where the search ended.
                        // 2. Edge is an intermediate edge and search ended inside the edge.
                        // 3. searchIndex is equl to the length of the search sequence. (as we increment the searchIndex in advance).
                        if (edge.IsLeaf || lengthOfMatchInEdge < edgeLength || searchIndex == querySequenceLength)
                        {
                            if (lengthOfMatchFound >= minLengthOfMatch)
                            {
                                // Set -1 so that it wont match with start index of any child edge.
                                edgesFound.Push(new EdgesFound { Edge = edge, LengthOfMatch = lengthOfMatchFound });
                                childStartIndexToSkip = -1;
                            }

                            // go to the next queryIndex 
                            continueSearch = false;
                        }
                        else
                        {
                            // if the edge is completly searched and edge is an intemediate edge then continue with the search.
                            previousIntermediateEdge = edge;
                        }
                    }
                }

                // first edge in the stack will be the search ended edge, so process it seperatly.
                if (edgesFound.Count > 0)
                {
                    EdgesFound itemToDisplay = edgesFound.Pop();
                    edge = itemToDisplay.Edge;
                    long matchLength = itemToDisplay.LengthOfMatch;

                    long refIndex;
                    if (edge.IsLeaf)
                    {
                        refIndex = edge.StartIndex + lengthOfMatchInEdge - matchLength;
                        if (ValidateMatch(queryIndex, refIndex, matchLength, overlappingMatches, out match))
                        {
                            yield return match;
                        }
                    }
                    else
                    {
                        childCount = edge.Children.Length;
                        long edgeLength = edge.Children[0].StartIndex - edge.StartIndex;
                        var startIndexes = new List<long>();

                        // suffixLink.Children == edge.Children - reference check to identify the edge having suffix link pointing to its parent.
                        if (suffixLinkPointsToParentEdge && childEdgePointToParent.Children == edge.Children)
                        {
                            startIndexes.Add(edge.StartIndex);
                        }
                        else
                        {
                            for (int childIndex = 0; childIndex < childCount; childIndex++)
                            {
                                if (edge.Children[childIndex].StartIndex == childStartIndexToSkip)
                                {
                                    continue;
                                }

                                DepthFirstIterativeTraversal(edge.Children[childIndex], edgeLength, startIndexes);
                            }

                            startIndexes.Sort();
                        }

                        int listCount = startIndexes.Count;
                        for (int matchIndex = 0; matchIndex < listCount; matchIndex++)
                        {
                            long startIndex = startIndexes[matchIndex];
                            long edgeLengthToAdd = lengthOfMatchInEdge == 0 ? edgeLength : lengthOfMatchInEdge;
                            refIndex = startIndex + edgeLengthToAdd - matchLength;

                            if (ValidateMatch(queryIndex, refIndex, matchLength, overlappingMatches, out match))
                            {
                                yield return match;
                            }
                        }

                        startIndexes.Clear();
                    }

                    // edgesFoundForNextQueryIndex is used for temporary storage and to maintain the order when it pushed to edgesFound stack.
                    var edgesFoundForNextQueryIndex = new Stack<EdgesFound>();

                    EdgesFound previousItemToDisplay = itemToDisplay;

                    // return the output and add the output the list to ignore the outputs that are not required.
                    while (edgesFound.Count > 0)
                    {
                        itemToDisplay = edgesFound.Pop();
                        edge = itemToDisplay.Edge;
                        matchLength = itemToDisplay.LengthOfMatch;

                        if (!edge.IsLeaf && !previousItemToDisplay.Edge.IsLeaf
                            && previousItemToDisplay.Edge.StartIndex
                            != previousItemToDisplay.Edge.SuffixLink[0].StartIndex)
                        {
                            Stack<EdgesFound> tempStack = this.GetIntermediateEdges(
                                edge,
                                previousItemToDisplay.Edge,
                                matchLength,
                                previousItemToDisplay.LengthOfMatch - matchLength,
                                queryIndex + 1,
                                convertedSearchSeq,
                                minLengthOfMatch);
                            if (tempStack.Count > 0)
                            {
                                while (tempStack.Count > 0)
                                {
                                    edgesFoundForNextQueryIndex.Push(tempStack.Pop());
                                }
                            }
                        }

                        childCount = edge.Children.Length;
                        long edgeLength = edge.Children[0].StartIndex - edge.StartIndex;
                        var startIndexes = new List<long>();
                        HashSet<long> overlappingStartIndexes = itemToDisplay.StartIndexesFromPreviousMatchPathEdge;

                        // suffixLink.Children == edge.Children - reference check to identify the edge having suffix link pointing to its parent.
                        if (suffixLinkPointsToParentEdge && childEdgePointToParent.Children == edge.Children)
                        {
                            startIndexes.Add(edge.StartIndex);
                        }
                        else
                        {
                            for (int childIndex = 0; childIndex < childCount; childIndex++)
                            {
                                // if (edge.Children[childIndex].StartIndex == itemToDisplay.ChildStartIndexToSkip)
                                if (edge.Children[childIndex].StartIndex == previousItemToDisplay.Edge.StartIndex)
                                {
                                    continue;
                                }

                                DepthFirstIterativeTraversal(edge.Children[childIndex], edgeLength, startIndexes);
                            }

                            if (overlappingStartIndexes != null)
                            {
                                for (int index = startIndexes.Count - 1; index >= 0; index--)
                                {
                                    if (overlappingStartIndexes.Contains(startIndexes[index]))
                                    {
                                        startIndexes.RemoveAt(index);
                                    }
                                }
                            }

                            startIndexes.Sort();
                        }

                        if (matchLength - 1 >= minLengthOfMatch)
                        {
                            var newEdgeFound = new EdgesFound
                                                   {
                                                       Edge = edge.SuffixLink[0],
                                                       LengthOfMatch = matchLength - 1
                                                   };
                            HashSet<long> overlappingStartIndexesForNextQueryIndex = null;
                            if (edge.StartIndex == edge.SuffixLink[0].StartIndex)
                            {
                                overlappingStartIndexesForNextQueryIndex = new HashSet<long>();
                                if (overlappingStartIndexes != null)
                                {
                                    foreach (long startIndex in overlappingStartIndexes)
                                    {
                                        overlappingStartIndexesForNextQueryIndex.Add(startIndex);
                                    }
                                }

                                for (int index = startIndexes.Count - 1; index >= 0; index--)
                                {
                                    overlappingStartIndexesForNextQueryIndex.Add(startIndexes[index]);
                                }
                            }

                            newEdgeFound.StartIndexesFromPreviousMatchPathEdge =
                                overlappingStartIndexesForNextQueryIndex;

                            // get the suffix link for the edge and add them to the tempstack.
                            edgesFoundForNextQueryIndex.Push(newEdgeFound);
                        }

                        int listCount = startIndexes.Count;
                        for (int matchIndex = 0; matchIndex < listCount; matchIndex++)
                        {
                            long startIndex = startIndexes[matchIndex];
                            refIndex = startIndex + edgeLength - matchLength;

                            if (ValidateMatch(queryIndex, refIndex, matchLength, overlappingMatches, out match))
                            {
                                yield return match;
                            }
                        }

                        startIndexes.Clear();
                        previousItemToDisplay = itemToDisplay;
                    }

                    if (matchLength > minLengthOfMatch && !suffixLinkPointsToParentEdge)
                    {
                        Stack<EdgesFound> tempStack = this.GetIntermediateEdges(
                            this.rootEdge,
                            previousItemToDisplay.Edge,
                            1,
                            previousItemToDisplay.LengthOfMatch,
                            queryIndex + 1,
                            convertedSearchSeq,
                            minLengthOfMatch);
                        while (tempStack.Count > 0)
                        {
                            edgesFoundForNextQueryIndex.Push(tempStack.Pop());
                        }
                    }

                    // push the items in temp stack to the edgesFound stack
                    while (edgesFoundForNextQueryIndex.Count > 0)
                    {
                        edgesFound.Push(edgesFoundForNextQueryIndex.Pop());
                    }
                }
            }
        }
Exemple #8
0
        /// <summary>
        ///     Validates whether new match is an exact sub match with any of the previous matches if not then returns the match in
        ///     out param.
        /// </summary>
        /// <param name="queryIndex">Query index</param>
        /// <param name="referenceIndex">Reference index</param>
        /// <param name="matchLength">Match length</param>
        /// <param name="previousMatches">Previous matches</param>
        /// <param name="match">New match</param>
        /// <returns>Returns true if the new match is not an exact sub match with any of the previous matches, else returns false</returns>
        private static bool ValidateMatch(
            long queryIndex,
            long referenceIndex,
            long matchLength,
            SortedList<long, Dictionary<long, SortedList<long, SortedSet<long>>>> previousMatches,
            out Match match)
        {
            bool isoverlapedMatchFound = false;

            long lastQueryEndIndex;
            int overlappingMatchesCount = previousMatches.Keys.Count();
            if (overlappingMatchesCount > 0)
            {
                lastQueryEndIndex = previousMatches.Keys.Last();
                if (lastQueryEndIndex < queryIndex)
                {
                    previousMatches.Clear();
                }
            }

            overlappingMatchesCount = previousMatches.Keys.Count();

            for (int listIndex = overlappingMatchesCount - 1; listIndex >= 0; listIndex--)
            {
                lastQueryEndIndex = previousMatches.Keys[listIndex];
                if (lastQueryEndIndex >= queryIndex + matchLength)
                {
                    Dictionary<long, SortedList<long, SortedSet<long>>> diffMap = previousMatches[lastQueryEndIndex];
                    SortedList<long, SortedSet<long>> refEndIndexMap;
                    if (diffMap.TryGetValue(queryIndex - referenceIndex, out refEndIndexMap))
                    {
                        int refEndIndexCount = refEndIndexMap.Count;
                        for (int refEndMapIndex = refEndIndexCount - 1; refEndMapIndex >= 0; refEndMapIndex--)
                        {
                            long refEndindex = refEndIndexMap.Keys[refEndMapIndex];

                            if (refEndindex >= referenceIndex + matchLength)
                            {
                                SortedSet<long> refStartIndexes = refEndIndexMap[refEndindex];
                                isoverlapedMatchFound =
                                    refStartIndexes.Any(refStartIndex => refStartIndex <= referenceIndex);
                                if (isoverlapedMatchFound)
                                {
                                    break;
                                }
                            }
                        }

                        if (isoverlapedMatchFound)
                        {
                            break;
                        }
                    }
                }
                else
                {
                    if (lastQueryEndIndex < queryIndex)
                    {
                        previousMatches.Remove(lastQueryEndIndex);
                    }

                    break;
                }
            }

            match = new Match();

            if (!isoverlapedMatchFound)
            {
                match.ReferenceSequenceOffset = referenceIndex;
                match.QuerySequenceOffset = queryIndex;
                match.Length = matchLength;
                long queryEndIndex = queryIndex + matchLength;
                long diffValue = queryIndex - referenceIndex;
                long refEndIndex = referenceIndex + matchLength;
                Dictionary<long, SortedList<long, SortedSet<long>>> diffsMap;
                SortedList<long, SortedSet<long>> refEndIndexMap;
                SortedSet<long> refStartIndexes;
                if (previousMatches.TryGetValue(queryEndIndex, out diffsMap))
                {
                    if (diffsMap.TryGetValue(diffValue, out refEndIndexMap))
                    {
                        if (refEndIndexMap.TryGetValue(refEndIndex, out refStartIndexes))
                        {
                            refStartIndexes.Add(referenceIndex);
                        }
                        else
                        {
                            refStartIndexes = new SortedSet<long>();
                            refStartIndexes.Add(referenceIndex);
                            refEndIndexMap.Add(refEndIndex, refStartIndexes);
                        }
                    }
                    else
                    {
                        refEndIndexMap = new SortedList<long, SortedSet<long>>();
                        refStartIndexes = new SortedSet<long>();
                        refStartIndexes.Add(referenceIndex);
                        refEndIndexMap.Add(refEndIndex, refStartIndexes);
                        diffsMap.Add(diffValue, refEndIndexMap);
                    }
                }
                else
                {
                    diffsMap = new Dictionary<long, SortedList<long, SortedSet<long>>>();
                    refEndIndexMap = new SortedList<long, SortedSet<long>>();
                    refStartIndexes = new SortedSet<long>();
                    refStartIndexes.Add(referenceIndex);
                    refEndIndexMap.Add(refEndIndex, refStartIndexes);
                    diffsMap.Add(diffValue, refEndIndexMap);
                    previousMatches.Add(queryEndIndex, diffsMap);
                }
            }

            return !isoverlapedMatchFound;
        }
Exemple #9
0
        /// <summary>
        ///     Gets the matches unique in reference sequence where length is greater than or equal to the MinLengthOfMatch.
        /// </summary>
        /// <param name="searchSequence">Sequence to search.</param>
        /// <returns>Returns IEnumerable of matches.</returns>
        public IEnumerable<Match> SearchMatchesUniqueInReference(ISequence searchSequence)
        {
            long minLengthOfMatch = this.MinLengthOfMatch;
            bool noambiguity = this.NoAmbiguity;
            long queryIndex = 0;
            long querySequenceLength = searchSequence.Count;
            long lastMatchQueryStart = 0;
            long lastMatchLength = 0;
            long lengthOfMatchFound = 0;

            var match = new Match();

            // Get base alphabet of the searchSequence.
            IAlphabet searchSeqBaseAlphabet = searchSequence.Alphabet;
            IAlphabet alphabet;

            if (minLengthOfMatch <= 0)
            {
                throw new ArgumentOutOfRangeException(Resource.MinLengthMustBeGreaterThanZero);
            }

            if (!(searchSequence is Sequence))
            {
                throw new ArgumentException(Resource.OnlySequenceClassSupported);
            }

            while (Alphabets.AlphabetToBaseAlphabetMap.TryGetValue(searchSeqBaseAlphabet, out alphabet))
            {
                searchSeqBaseAlphabet = alphabet;
            }

            // If base alphabets are not same then throw the exception.
            if (searchSeqBaseAlphabet != this.supportedBaseAlphabet)
            {
                throw new ArgumentException(Resource.AlphabetMisMatch);
            }

            ISequence convertedSearchSeq = ProcessQuerySequence(searchSequence, noambiguity);

            long lengthOfMatchInEdge = 0;
            long edgeStartIndex = 0;

            MultiWaySuffixEdge edge = this.rootEdge;
            MultiWaySuffixEdge previousIntermediateEdge = this.rootEdge;

            for (queryIndex = 0; queryIndex <= querySequenceLength - minLengthOfMatch; queryIndex++)
            {
                if (previousIntermediateEdge.StartIndex == -1 && lengthOfMatchInEdge > 0)
                {
                    lengthOfMatchInEdge--;
                }

                // As suffix link always point to another intermediate edge.
                // Note: suffix link for the root is root itself.
                previousIntermediateEdge = previousIntermediateEdge.SuffixLink[0];
                int childCount = previousIntermediateEdge.Children.Length;
                lengthOfMatchFound--;

                if (lengthOfMatchFound < 0)
                {
                    lengthOfMatchFound = 0;
                }

                long searchIndex = queryIndex + lengthOfMatchFound - lengthOfMatchInEdge;

                // if lengthOfMatchInEdge is greater than zero then instead of searching from the query index
                // try to jump to the edge starting at lengthOfMatchFound - lengthOfMatchInEdge distance from the root.
                // As previousIntermediateEdge is lengthOfMatchFound distance from the root find an edge in the path of 
                // match such that lengthOfMatchInEdge will end inside that edge.
                byte refSymbol, querySymbol;
                if (lengthOfMatchInEdge > 0)
                {
                    querySymbol = convertedSearchSeq[searchIndex];
                    for (int index = 0; index < childCount; index++)
                    {
                        edge = previousIntermediateEdge.Children[index];

                        edgeStartIndex = edge.StartIndex;

                        refSymbol = TerminatingSymbol;
                        if (edgeStartIndex < this.symbolsCount)
                        {
                            refSymbol = this.referenceSequence[edgeStartIndex];
                        }

                        if (refSymbol == querySymbol)
                        {
                            break;
                        }
                    }

                    // When lengthOfMatchInEdge > 0 there will be an edge from the previousIntermediateEdge in the path of match.
                    while (!edge.IsLeaf)
                    {
                        long edgeEndIndex = edge.Children[0].StartIndex - 1;

                        // compare the first symbol of the edge.
                        long edgeSymbolCount = edgeEndIndex - edgeStartIndex + 1;
                        if (lengthOfMatchInEdge == edgeSymbolCount)
                        {
                            previousIntermediateEdge = edge;
                            searchIndex += lengthOfMatchInEdge;
                            lengthOfMatchInEdge = 0;
                            break;
                        }
                        if (lengthOfMatchInEdge > edgeSymbolCount)
                        {
                            lengthOfMatchInEdge -= edgeSymbolCount;
                            searchIndex += edgeSymbolCount;

                            long edgeChildCount = edge.Children.Length;

                            querySymbol = convertedSearchSeq[searchIndex];

                            for (int edgeChildIndex = 0; edgeChildIndex < edgeChildCount; edgeChildIndex++)
                            {
                                if (this.referenceSequence[edge.Children[edgeChildIndex].StartIndex] == querySymbol)
                                {
                                    // get the child of edge and continue searching.
                                    previousIntermediateEdge = edge;
                                    edge = edge.Children[edgeChildIndex];
                                    edgeStartIndex = edge.StartIndex;
                                    break;
                                }
                            }
                        }
                        else
                        {
                            break;
                        }
                    }

                    if (lengthOfMatchInEdge > 0)
                    {
                        // lengthOfMatchInEdge > 0 means search is not ending in an intermediate edge or at the endIndex of an edge, 
                        // so no need to continue with the search as there will be missmatch.
                        continue;
                    }
                }

                bool continueSearch = true;

                // start searching for the match by comparing the symbols.
                while (continueSearch)
                {
                    querySymbol = 0;
                    if (searchIndex < querySequenceLength)
                    {
                        querySymbol = convertedSearchSeq[searchIndex];
                    }

                    int edgeIndex = -1;

                    childCount = previousIntermediateEdge.Children.Length;
                    for (int childIndex = 0; childIndex < childCount; childIndex++)
                    {
                        edge = previousIntermediateEdge.Children[childIndex];
                        edgeStartIndex = edge.StartIndex;

                        refSymbol = TerminatingSymbol;

                        if (edgeStartIndex < this.symbolsCount)
                        {
                            refSymbol = this.referenceSequence[edgeStartIndex];
                        }

                        if (refSymbol == querySymbol)
                        {
                            searchIndex++;
                            edgeIndex = childIndex;
                            lengthOfMatchFound++;
                            lengthOfMatchInEdge = 1;
                            break;
                        }
                    }

                    // if edge not found.
                    if (edgeIndex == -1)
                    {
                        // Since the previous edge is an intermediate edge the match is repeated in the reference sequence.
                        // Thus even though the match length is greater than or equal to the MinLengthOfMatch don't consider the match.

                        // Go to the next query index by following the suffix link of the previous intermediate edge.
                        // This will reduce time required for searching from the root. In this case lengthOfMatchFound will be deducted by 1.
                        break;
                    }

                    // Get the endIndex of the edge found.
                    long edgeEndIndex = this.symbolsCount;

                    if (!edge.IsLeaf)
                    {
                        // return the minimum start index of children -1
                        edgeEndIndex = edge.Children[0].StartIndex - 1;
                    }

                    for (long referenceIndex = edgeStartIndex + 1; referenceIndex <= edgeEndIndex; referenceIndex++)
                    {
                        refSymbol = TerminatingSymbol;
                        if (referenceIndex < this.symbolsCount)
                        {
                            refSymbol = this.referenceSequence[referenceIndex];
                        }

                        querySymbol = 0;
                        if (searchIndex < querySequenceLength)
                        {
                            querySymbol = convertedSearchSeq[searchIndex];
                        }

                        // Stop searching if any one of the following conditions is true.
                        // 1. Reached end of the query sequence
                        // 2. Reached end of the leaf edge.
                        // 3. Symbols are not matching
                        if (refSymbol != querySymbol)
                        {
                            break;
                        }

                        searchIndex++;
                        lengthOfMatchFound++;
                        lengthOfMatchInEdge++;
                    }

                    // if it is a leaf node
                    if (edge.IsLeaf)
                    {
                        // if the match length is greater than or equal to the minLengthOfMatch then yield the match.
                        if (lengthOfMatchFound >= minLengthOfMatch
                            && queryIndex + lengthOfMatchFound > lastMatchQueryStart + lastMatchLength)
                        {
                            match = new Match
                                        {
                                            ReferenceSequenceOffset =
                                                edgeStartIndex + lengthOfMatchInEdge - lengthOfMatchFound,
                                            QuerySequenceOffset = queryIndex,
                                            Length = lengthOfMatchFound
                                        };
                            yield return match;

                            if (searchIndex >= querySequenceLength - 1)
                            {
                                // reached the end of the query sequence, no further search needed.
                                continueSearch = false;
                                queryIndex = querySequenceLength;
                                break;
                            }

                            lastMatchLength = lengthOfMatchFound;
                            lastMatchQueryStart = queryIndex;
                        }

                        // go to the next queryIndex
                        continueSearch = false;
                    }
                    else
                    {
                        // if the search is ended 
                        // if the edge is an intermediate node then ignore the match and go to the next queryIndex.
                        if (lengthOfMatchInEdge < (edgeEndIndex - edgeStartIndex + 1))
                        {
                            continueSearch = false;
                        }
                        else
                        {
                            // if the edge is completely searched, then continue with the search.
                            lengthOfMatchInEdge = 0;
                            previousIntermediateEdge = edge;
                        }
                    }
                }
            }
        }
Exemple #10
0
        /// <summary>
        /// Parses MUMs from the input file.
        /// </summary>
        /// <param name="filename">MUM file name.</param>
        /// <returns>List of MUMs.</returns>
        private static IList<Match> ParseMums(string filename)
        {
            // TODO: Parse files with multiple query sequences
            IList<Match> mumList = new List<Match>();
            try
            {
                using (TextReader tr = File.OpenText(filename))
                {
                    string line;
                    while ((line = tr.ReadLine()) != null)
                    {
                        if (!line.StartsWith(">"))
                        {
                            string[] items = line.Trim().Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                            if (items[0] != ">")
                            {
                                Match mum2 = new Match
                                {
                                    ReferenceSequenceOffset = Convert.ToInt32(items[0]),
                                    QuerySequenceOffset = Convert.ToInt32(items[1]),
                                    Length = Convert.ToInt32(items[2])
                                };
                                mumList.Add(mum2);
                            }
                        }
                    }
                }

                return mumList;
            }
            catch
            {
                throw new FileFormatException(Resources.FileNotInProperFormat);
            }
        }
Exemple #11
0
        /// <summary>
        /// Align the Gap by executing pairwise alignment.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        /// <param name="querySequence">Query Sequence.</param>
        /// <param name="sequenceResult1">Editable sequence containing alignment first result.</param>
        /// <param name="sequenceResult2">Editable sequence containing alignment second result.</param>
        /// <param name="consensusResult">Editable sequence containing consensus sequence.</param>
        /// <param name="mum1">First MUM of Gap.</param>
        /// <param name="mum2">Second MUM of Gap.</param>
        /// <param name="insertions">Insertions made to the aligned sequences.</param>
        /// <returns>Score of alignment.</returns>
        private long AlignGap(
                ISequence referenceSequence,
                ISequence querySequence,
                List<byte> sequenceResult1,
                List<byte> sequenceResult2,
                List<byte> consensusResult,
                Match mum1,
                Match mum2,
                out List<long> insertions)
        {
            long score = 0;
            ISequence sequence1 = null;
            ISequence sequence2 = null;
            byte[] mum1String;
            byte[] mum2String;

            insertions = new List<long>(2);
            insertions.Add(0);
            insertions.Add(0);

            long mum1ReferenceStartIndex = 0;
            long mum1QueryStartIndex = 0;
            long mum1Length = 0;
            long mum2ReferenceStartIndex = 0;
            long mum2QueryStartIndex = 0;
            long mum2Length = 0;

            if (mum1.Length != 0)
            {
                mum1ReferenceStartIndex = mum1.ReferenceSequenceOffset;
                mum1QueryStartIndex = mum1.QuerySequenceOffset;
                mum1Length = mum1.Length;
            }

            if (mum2.Length != 0)
            {
                mum2ReferenceStartIndex = mum2.ReferenceSequenceOffset;
                mum2QueryStartIndex = mum2.QuerySequenceOffset;
                mum2Length = mum2.Length;
            }
            else
            {
                mum2ReferenceStartIndex = referenceSequence.Count;
                mum2QueryStartIndex = querySequence.Count;
            }

            long referenceGapStartIndex = mum1ReferenceStartIndex + mum1Length;
            long queryGapStartIndex = mum1QueryStartIndex + mum1Length;

            /* Stich the exact matches together according to if both sequences have data
             * in the gap (in which case use a global alignment) or if only one does 
             * (in which case just insert gaps).
             */
            if (mum2ReferenceStartIndex > referenceGapStartIndex
                && mum2QueryStartIndex > queryGapStartIndex) // Both sequences have data in the gap.
            {                
                // Get the sequences in between
                sequence1 = referenceSequence.GetSubSequence(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);
                sequence2 = querySequence.GetSubSequence(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);
                
                // Do a pairwise alignment (must be needleman wunsh)
                var alignment = this.RunPairWiseReturnJustAlignment(sequence1, sequence2);
                sequenceResult1.AddRange(alignment.FirstSequence);
                sequenceResult2.AddRange(alignment.SecondSequence);
                consensusResult.AddRange(alignment.Consensus);

                score += alignment.Score;

                if (!alignment.Metadata.ContainsKey ("Insertions")) {
                    // Should never happen - can remove later.
                    throw new Exception ("NeedlemanWunsch alignment did not have an insertion entry");
                }
                List<long> gapinsertions = alignment.Metadata ["Insertions"] as List<long>;
                if (gapinsertions == null || gapinsertions.Count != 2) {
                    // Should never happen - can remove later
                    throw new Exception("Alignment Insertions were not available as a size 2 list");
                }
                insertions [0] += gapinsertions [0];
                insertions [1] += gapinsertions [1];               
            }
            else if (mum2ReferenceStartIndex > referenceGapStartIndex) // Only the reference has data, insert gaps for the query
            {
                sequence1 = referenceSequence.GetSubSequence(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);

                sequenceResult1.AddRange(sequence1);
                sequenceResult2.AddRange(CreateDefaultGap(sequence1.Count));
                consensusResult.AddRange(sequence1);

                insertions[1] += sequence1.Count;

                if (this.UseGapExtensionCost)
                {
                    score = this.GapOpenCost + ((sequence1.Count - 1) * this.GapExtensionCost);
                }
                else
                {
                    score = sequence1.Count * this.GapOpenCost;
                }
            }
            else if (mum2QueryStartIndex > queryGapStartIndex) // Only the query has data, insert gaps for the reference
            {
                sequence2 = querySequence.GetSubSequence(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);

                sequenceResult1.AddRange(CreateDefaultGap(sequence2.Count));
                sequenceResult2.AddRange(sequence2);
                consensusResult.AddRange(sequence2);

                insertions[0] += sequence2.Count;

                if (this.UseGapExtensionCost)
                {
                    score = this.GapOpenCost + ((sequence2.Count - 1) * this.GapExtensionCost);
                }
                else
                {
                    score = sequence2.Count * this.GapOpenCost;
                }
            }

            // Add the MUM to the result
            if (0 < mum2Length)
            {
                mum1String = referenceSequence.GetSubSequence(
                        mum2ReferenceStartIndex,
                        mum2Length).ToArray();
                sequenceResult1.AddRange(mum1String);

                mum2String = querySequence.GetSubSequence(
                        mum2QueryStartIndex,
                        mum2Length).ToArray();
                sequenceResult2.AddRange(mum2String);
                consensusResult.AddRange(mum1String);

                foreach (byte index in mum1String)
                {
                    score += SimilarityMatrix[index, index];
                }
            }

            return score;
        }
Exemple #12
0
        public void TestMatchAndMatchExtensionToString()
        {
            Match match = new Match();
            match.Length = 20;
            match.QuerySequenceOffset = 33;

            MatchExtension matchExtn = new MatchExtension(match);
            matchExtn.ID = 1;
            matchExtn.Length = 20;

            string actualMatchExtnString = matchExtn.ToString();
            string actualMatchstring = match.ToString();
            string ExpectedMatchExtnString = "RefStart=0 QueryStart=33 Length=20 Score=0 WrapScore=0 IsGood=False";
            string ExpectedMatchString = "RefStart=0 QueryStart=33 Length=20";

            Assert.AreEqual(ExpectedMatchExtnString, actualMatchExtnString);
            Assert.AreEqual(actualMatchstring, ExpectedMatchString);
        }
Exemple #13
0
        public void TestClusterToString()
        {
            Match match = new Match();

            MatchExtension matchExtn1 = new MatchExtension(match);
            matchExtn1.ID = 1;
            matchExtn1.Length = 20;
            MatchExtension matchExtn2 = new MatchExtension(match);
            matchExtn2.ID = 2;
            matchExtn2.Length = 30;
            IList<MatchExtension> extnList = new List<MatchExtension>();
            extnList.Add(matchExtn1);
            extnList.Add(matchExtn2);

            Cluster clust = new Cluster(extnList);
            string actualString = clust.ToString();
            string expectedString = "RefStart=0 QueryStart=0 Length=20 Score=0 WrapScore=0 IsGood=False\r\nRefStart=0 QueryStart=0 Length=30 Score=0 WrapScore=0 IsGood=False\r\n".Replace ("\r\n", Environment.NewLine);
            Assert.AreEqual(actualString, expectedString);
        }