public void SemiGlobalAlign() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars) { Mode = AlignmentMode.SemiGlobal }; msa.Compute(); Alignment <string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "bar", GetChars) { Mode = AlignmentMode.SemiGlobal }; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r | t", "| b a r | " )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.57).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "art", GetChars) { Mode = AlignmentMode.SemiGlobal }; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "c | a r t |", " | a r t |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.86).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "start", "tan", GetChars) { Mode = AlignmentMode.SemiGlobal }; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "s | t a r | t", " | t a n | " )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01)); }
public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment, WordAlignmentMatrix hintMatrix = null) { var paa = new PairwiseAlignmentAlgorithm <IReadOnlyList <string>, int>(_scorer, sourceSegment, targetSegment, GetWordIndices) { Mode = AlignmentMode.Global, ExpansionCompressionEnabled = true, TranspositionEnabled = true }; paa.Compute(); Alignment <IReadOnlyList <string>, int> alignment = paa.GetAlignments().First(); var waMatrix = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count); for (int c = 0; c < alignment.ColumnCount; c++) { foreach (int i in alignment[0, c]) { foreach (int j in alignment[1, c]) { waMatrix[i, j] = AlignmentType.Aligned; } } } return(waMatrix); }
public void GlobalAlign() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars); msa.Compute(); Alignment <string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "bar", GetChars); msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r t |", "| b a r - |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.25).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "art", GetChars); msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r t |", "| - a r t |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "start", "tan", GetChars); msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(2)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| s t a r t |", "| - t a - n |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.00).Within(0.01)); AssertAlignmentsEqual(alignments[1], CreateAlignment( "| s t a r t |", "| - t a n - |" )); Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.00).Within(0.01)); }
public void GlobalAlign() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars); msa.Compute(); Alignment<string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "bar", GetChars); msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r t |", "| b a r - |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.25).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "art", GetChars); msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r t |", "| - a r t |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "start", "tan", GetChars); msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(2)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| s t a r t |", "| - t a - n |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.00).Within(0.01)); AssertAlignmentsEqual(alignments[1], CreateAlignment( "| s t a r t |", "| - t a n - |" )); Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.00).Within(0.01)); }
public void ExpansionCompressionAlign() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars) {ExpansionCompressionEnabled = true}; msa.Compute(); Alignment<string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "bar", GetChars) {ExpansionCompressionEnabled = true}; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a rt |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "art", GetChars) {ExpansionCompressionEnabled = true}; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| ca r t |", "| a r t |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.75).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "start", "tan", GetChars) {ExpansionCompressionEnabled = true}; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(2)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| st ar t |", "| t a n |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.40).Within(0.01)); AssertAlignmentsEqual(alignments[1], CreateAlignment( "| st a rt |", "| t a n |" )); Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.40).Within(0.01)); }
public void GlobalAlign_EmptySequence() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "", "", GetChars); msa.Compute(); Alignment <string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "||", "||" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0)); }
public void ZeroMaxScore() { var scorer = new ZeroMaxScoreStringScorer(); var msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars) { ExpansionCompressionEnabled = true }; msa.Compute(); Alignment <string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0)); }
public override IEnumerable <Alignment <Word, ShapeNode> > GetAlignments() { return(_algorithm.GetAlignments()); }
public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment, WordAlignmentMatrix hintMatrix = null) { var paa = new PairwiseAlignmentAlgorithm <IReadOnlyList <string>, int>(_scorer, sourceSegment, targetSegment, GetWordIndices) { Mode = AlignmentMode.Global, ExpansionCompressionEnabled = true, TranspositionEnabled = true }; paa.Compute(); Alignment <IReadOnlyList <string>, int> alignment = paa.GetAlignments().First(); var waMatrix = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count); for (int c = 0; c < alignment.ColumnCount; c++) { foreach (int j in alignment[1, c]) { double bestScore; int minIndex, maxIndex; if (alignment[0, c].IsNull) { double prob = _getTranslationProb(null, targetSegment[j]); bestScore = ComputeAlignmentScore(prob, 0); int tc = c - 1; while (tc >= 0 && alignment[0, tc].IsNull) { tc--; } int i = tc == -1 ? 0 : alignment[0, tc].Last; minIndex = i; maxIndex = i + 1; } else { double prob = alignment[0, c] .Average(i => _getTranslationProb(sourceSegment[i], targetSegment[j])); bestScore = ComputeAlignmentScore(prob, 0); minIndex = alignment[0, c].First - 1; maxIndex = alignment[0, c].Last + 1; } int bestIndex = -1; for (int i = minIndex; i >= Math.Max(0, minIndex - _maxDistance); i--) { double prob = _getTranslationProb(sourceSegment[i], targetSegment[j]); double distanceScore = ComputeDistanceScore(i, minIndex + 1, sourceSegment.Count); double score = ComputeAlignmentScore(prob, distanceScore); if (score > bestScore) { bestScore = score; bestIndex = i; } } for (int i = maxIndex; i < Math.Min(sourceSegment.Count, maxIndex + _maxDistance); i++) { double prob = _getTranslationProb(sourceSegment[i], targetSegment[j]); double distanceScore = ComputeDistanceScore(i, maxIndex - 1, sourceSegment.Count); double score = ComputeAlignmentScore(prob, distanceScore); if (score > bestScore) { bestScore = score; bestIndex = i; } } if (bestIndex == -1) { if (!alignment[0, c].IsNull) { waMatrix[minIndex + 1, j] = AlignmentType.Aligned; waMatrix[maxIndex - 1, j] = AlignmentType.Aligned; } } else { waMatrix[bestIndex, j] = AlignmentType.Aligned; } } } return(waMatrix); }
public void ExpansionCompressionAlign() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "car", "bar", GetChars) { ExpansionCompressionEnabled = true }; msa.Compute(); Alignment <string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "bar", GetChars) { ExpansionCompressionEnabled = true }; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a rt |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "cart", "art", GetChars) { ExpansionCompressionEnabled = true }; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| ca r t |", "| a r t |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.75).Within(0.01)); msa = new PairwiseAlignmentAlgorithm <string, char>(scorer, "start", "tan", GetChars) { ExpansionCompressionEnabled = true }; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(2)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| st ar t |", "| t a n |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.40).Within(0.01)); AssertAlignmentsEqual(alignments[1], CreateAlignment( "| st a rt |", "| t a n |" )); Assert.That(alignments[1].NormalizedScore, Is.EqualTo(0.40).Within(0.01)); }
public void ZeroMaxScore() { var scorer = new ZeroMaxScoreStringScorer(); var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars) {ExpansionCompressionEnabled = true}; msa.Compute(); Alignment<string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0)); }
public void SemiGlobalAlign_EmptySequence() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "", "", GetChars) {Mode = AlignmentMode.SemiGlobal}; msa.Compute(); Alignment<string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "||", "||" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0)); }
public void SemiGlobalAlign() { var scorer = new StringScorer(); var msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "car", "bar", GetChars) {Mode = AlignmentMode.SemiGlobal}; msa.Compute(); Alignment<string, char>[] alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r |", "| b a r |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.66).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "bar", GetChars) {Mode = AlignmentMode.SemiGlobal}; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "| c a r | t", "| b a r | " )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.57).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "cart", "art", GetChars) {Mode = AlignmentMode.SemiGlobal}; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "c | a r t |", " | a r t |" )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.86).Within(0.01)); msa = new PairwiseAlignmentAlgorithm<string, char>(scorer, "start", "tan", GetChars) {Mode = AlignmentMode.SemiGlobal}; msa.Compute(); alignments = msa.GetAlignments().ToArray(); Assert.That(alignments.Length, Is.EqualTo(1)); AssertAlignmentsEqual(alignments[0], CreateAlignment( "s | t a r | t", " | t a n | " )); Assert.That(alignments[0].NormalizedScore, Is.EqualTo(0.50).Within(0.01)); }