public void SemiGlobalAlign()
        {
            var scorer = new StringScorer();
            var msa    = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars)
            {
                Mode = AlignmentMode.SemiGlobal
            };

            msa.Compute();
            Alignment <string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a r |",
                                      "| b a r |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "bar", GetChars)
            {
                Mode = AlignmentMode.SemiGlobal
            };
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a r | t",
                                      "| b a r |  "
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.57).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "art", GetChars)
            {
                Mode = AlignmentMode.SemiGlobal
            };
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "c | a r t |",
                                      "  | a r t |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.86).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "start", "tan", GetChars)
            {
                Mode = AlignmentMode.SemiGlobal
            };
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "s | t a r | t",
                                      "  | t a n |  "
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01));
        }
        public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment,
                                                    WordAlignmentMatrix hintMatrix = null)
        {
            var paa = new PairwiseAlignmentAlgorithm <IReadOnlyList <string>, int>(_scorer, sourceSegment, targetSegment, GetWordIndices)
            {
                Mode = AlignmentMode.Global,
                ExpansionCompressionEnabled = true,
                TranspositionEnabled        = true
            };

            paa.Compute();
            Alignment <IReadOnlyList <string>, int> alignment = paa.GetAlignments().First();
            var waMatrix = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count);

            for (int c = 0; c < alignment.ColumnCount; c++)
            {
                foreach (int i in alignment[0, c])
                {
                    foreach (int j in alignment[1, c])
                    {
                        waMatrix[i, j] = AlignmentType.Aligned;
                    }
                }
            }

            return(waMatrix);
        }
Esempio n. 3
0
 public PairwiseWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer <Word, ShapeNode> scorer, WordPairAlignerSettings settings, Word word1, Word word2)
     : base(wordAligner)
 {
     _words     = new ReadOnlyList <Word>(new [] { word1, word2 });
     _algorithm = new PairwiseAlignmentAlgorithm <Word, ShapeNode>(scorer, word1, word2, GetNodes)
     {
         ExpansionCompressionEnabled = settings.ExpansionCompressionEnabled,
         Mode = settings.Mode
     };
     _algorithm.Compute();
 }
Esempio n. 4
0
 public PairwiseWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer<Word, ShapeNode> scorer, WordPairAlignerSettings settings, Word word1, Word word2)
     : base(wordAligner)
 {
     _words = new ReadOnlyList<Word>(new [] {word1, word2});
     _algorithm = new PairwiseAlignmentAlgorithm<Word, ShapeNode>(scorer, word1, word2, GetNodes)
         {
             ExpansionCompressionEnabled = settings.ExpansionCompressionEnabled,
             Mode = settings.Mode
         };
     _algorithm.Compute();
 }
        public void GlobalAlign()
        {
            var scorer = new StringScorer();
            var msa    = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars);

            msa.Compute();
            Alignment <string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a r |",
                                      "| b a r |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "bar", GetChars);
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a r t |",
                                      "| b a r - |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.25).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "art", GetChars);
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a r t |",
                                      "| - a r t |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "start", "tan", GetChars);
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(2));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| s t a r t |",
                                      "| - t a - n |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.00).Within(0.01));
            AssertAlignmentsEqual(alignments[1], CreateAlignment(
                                      "| s t a r t |",
                                      "| - t a n - |"
                                      ));
            Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.00).Within(0.01));
        }
        public void GlobalAlign()
        {
            var scorer = new StringScorer();
            var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars);
            msa.Compute();
            Alignment<string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a r |",
                "| b a r |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "bar", GetChars);
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a r t |",
                "| b a r - |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.25).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "art", GetChars);
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a r t |",
                "| - a r t |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "start", "tan", GetChars);
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(2));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| s t a r t |",
                "| - t a - n |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.00).Within(0.01));
            AssertAlignmentsEqual(alignments[1], CreateAlignment(
                "| s t a r t |",
                "| - t a n - |"
                ));
            Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.00).Within(0.01));
        }
        public void ExpansionCompressionAlign()
        {
            var scorer = new StringScorer();
            var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars) {ExpansionCompressionEnabled = true};
            msa.Compute();
            Alignment<string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a r |",
                "| b a r |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "bar", GetChars) {ExpansionCompressionEnabled = true};
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a rt |",
                "| b a r  |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "art", GetChars) {ExpansionCompressionEnabled = true};
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| ca r t |",
                "| a  r t |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.75).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "start", "tan", GetChars) {ExpansionCompressionEnabled = true};
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(2));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| st ar t |",
                "| t  a  n |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.40).Within(0.01));
            AssertAlignmentsEqual(alignments[1], CreateAlignment(
                "| st a rt |",
                "| t  a n  |"
                ));
            Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.40).Within(0.01));
        }
        public void GlobalAlign_EmptySequence()
        {
            var scorer = new StringScorer();
            var msa    = new PairwiseAlignmentAlgorithm <string, char>(scorer, "", "", GetChars);

            msa.Compute();
            Alignment <string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "||",
                                      "||"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0));
        }
        public void ZeroMaxScore()
        {
            var scorer = new ZeroMaxScoreStringScorer();
            var msa    = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars)
            {
                ExpansionCompressionEnabled = true
            };

            msa.Compute();
            Alignment <string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a r |",
                                      "| b a r |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0));
        }
        public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment,
                                                    IReadOnlyList <string> targetSegment, WordAlignmentMatrix hintMatrix = null)
        {
            var paa = new PairwiseAlignmentAlgorithm <IReadOnlyList <string>, int>(_scorer, sourceSegment, targetSegment,
                                                                                   GetWordIndices)
            {
                Mode = AlignmentMode.Global,
                ExpansionCompressionEnabled = true,
                TranspositionEnabled        = true
            };

            paa.Compute();
            Alignment <IReadOnlyList <string>, int> alignment = paa.GetAlignments().First();
            var waMatrix = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count);

            for (int c = 0; c < alignment.ColumnCount; c++)
            {
                foreach (int j in alignment[1, c])
                {
                    double bestScore;
                    int    minIndex, maxIndex;
                    if (alignment[0, c].IsNull)
                    {
                        double prob = _getTranslationProb(null, targetSegment[j]);
                        bestScore = ComputeAlignmentScore(prob, 0);
                        int tc = c - 1;
                        while (tc >= 0 && alignment[0, tc].IsNull)
                        {
                            tc--;
                        }
                        int i = tc == -1 ? 0 : alignment[0, tc].Last;
                        minIndex = i;
                        maxIndex = i + 1;
                    }
                    else
                    {
                        double prob = alignment[0, c]
                                      .Average(i => _getTranslationProb(sourceSegment[i], targetSegment[j]));
                        bestScore = ComputeAlignmentScore(prob, 0);
                        minIndex  = alignment[0, c].First - 1;
                        maxIndex  = alignment[0, c].Last + 1;
                    }

                    int bestIndex = -1;
                    for (int i = minIndex; i >= Math.Max(0, minIndex - _maxDistance); i--)
                    {
                        double prob          = _getTranslationProb(sourceSegment[i], targetSegment[j]);
                        double distanceScore = ComputeDistanceScore(i, minIndex + 1, sourceSegment.Count);
                        double score         = ComputeAlignmentScore(prob, distanceScore);
                        if (score > bestScore)
                        {
                            bestScore = score;
                            bestIndex = i;
                        }
                    }

                    for (int i = maxIndex; i < Math.Min(sourceSegment.Count, maxIndex + _maxDistance); i++)
                    {
                        double prob          = _getTranslationProb(sourceSegment[i], targetSegment[j]);
                        double distanceScore = ComputeDistanceScore(i, maxIndex - 1, sourceSegment.Count);
                        double score         = ComputeAlignmentScore(prob, distanceScore);
                        if (score > bestScore)
                        {
                            bestScore = score;
                            bestIndex = i;
                        }
                    }

                    if (bestIndex == -1)
                    {
                        if (!alignment[0, c].IsNull)
                        {
                            waMatrix[minIndex + 1, j] = AlignmentType.Aligned;
                            waMatrix[maxIndex - 1, j] = AlignmentType.Aligned;
                        }
                    }
                    else
                    {
                        waMatrix[bestIndex, j] = AlignmentType.Aligned;
                    }
                }
            }

            return(waMatrix);
        }
        public void ExpansionCompressionAlign()
        {
            var scorer = new StringScorer();
            var msa    = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars)
            {
                ExpansionCompressionEnabled = true
            };

            msa.Compute();
            Alignment <string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a r |",
                                      "| b a r |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "bar", GetChars)
            {
                ExpansionCompressionEnabled = true
            };
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| c a rt |",
                                      "| b a r  |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "art", GetChars)
            {
                ExpansionCompressionEnabled = true
            };
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| ca r t |",
                                      "| a  r t |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.75).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "start", "tan", GetChars)
            {
                ExpansionCompressionEnabled = true
            };
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(2));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                                      "| st ar t |",
                                      "| t  a  n |"
                                      ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.40).Within(0.01));
            AssertAlignmentsEqual(alignments[1], CreateAlignment(
                                      "| st a rt |",
                                      "| t  a n  |"
                                      ));
            Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.40).Within(0.01));
        }
        public void ZeroMaxScore()
        {
            var scorer = new ZeroMaxScoreStringScorer();
            var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars) {ExpansionCompressionEnabled = true};
            msa.Compute();
            Alignment<string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a r |",
                "| b a r |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0));
        }
        public void SemiGlobalAlign_EmptySequence()
        {
            var scorer = new StringScorer();
            var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "", "", GetChars) {Mode = AlignmentMode.SemiGlobal};
            msa.Compute();
            Alignment<string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "||",
                "||"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0));
        }
        public void SemiGlobalAlign()
        {
            var scorer = new StringScorer();
            var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars) {Mode = AlignmentMode.SemiGlobal};
            msa.Compute();
            Alignment<string, char>[] alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a r |",
                "| b a r |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "bar", GetChars) {Mode = AlignmentMode.SemiGlobal};
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "| c a r | t",
                "| b a r |  "
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.57).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "art", GetChars) {Mode = AlignmentMode.SemiGlobal};
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "c | a r t |",
                "  | a r t |"
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.86).Within(0.01));

            msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "start", "tan", GetChars) {Mode = AlignmentMode.SemiGlobal};
            msa.Compute();
            alignments = msa.GetAlignments().ToArray();

            Assert.That(alignments.Length, Is.EqualTo(1));
            AssertAlignmentsEqual(alignments[0], CreateAlignment(
                "s | t a r | t",
                "  | t a n |  "
                ));
            Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01));
        }