public void CommonChars() { TestSupport("digit", DiscreteChar.Digit(), "0123456789", "Ab !Ј"); TestSupport("lower", DiscreteChar.Lower(), "abcdefghixyz", "ABC0123, "); TestSupport("upper", DiscreteChar.Upper(), "ABCDEFGHUXYZ", "abc0123, "); TestSupport("letter", DiscreteChar.Letter(), "aBcDeFgGhxyzXYZ", "0123! ,"); TestSupport("letterOrDigit", DiscreteChar.LetterOrDigit(), "abcABC0123xyzXYZ789", " !Ј$,"); TestSupport("wordChar", DiscreteChar.WordChar(), "abc_ABC_0123s", " !:.,"); TestSupport("whitespace", DiscreteChar.Whitespace(), " \t", "abcABC0123,:!"); }
public void ReplaceElements() { StringTransducer replace = StringTransducer.Replace(DiscreteChar.Lower(), DiscreteChar.Digit()); StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "123", 1.0); StringInferenceTestUtilities.TestTransducerValue(replace, "w", "1337", 1.0); StringInferenceTestUtilities.TestTransducerValue(replace, "w", string.Empty, 1.0); StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, "17", 1.0); StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, string.Empty, 1.0); StringInferenceTestUtilities.TestTransducerValue(replace, "123", "worlds", 0.0); StringInferenceTestUtilities.TestTransducerValue(replace, "123", "123", 0.0); StringInferenceTestUtilities.TestTransducerValue(replace, "1", string.Empty, 0.0); }
public void Repeat2() { StringAutomaton automaton = StringAutomaton.Constant(1.0, DiscreteChar.Lower()); StringInferenceTestUtilities.TestValue(automaton, 1.0, string.Empty, "ab", "abcab"); automaton = StringAutomaton.Repeat(automaton); // Can't use StringInferenceTestUtilities.TestValue here since the value is not infinite in log-domain // due to approximate closure computations for epsilon-loops Assert.True(automaton.GetValue(string.Empty) > 1000); Assert.True(automaton.GetValue("ab") > 1000); Assert.True(automaton.GetValue("abcab") > 1000); }
public void Optional() { StringAutomaton automaton = StringAutomaton.Constant(1.0, DiscreteChar.Lower()); StringTransducer copy = StringTransducer.Copy(automaton); StringTransducer copyOptional = StringTransducer.Optional(copy); StringInferenceTestUtilities.TestTransducerValue(copy, "abc", "abc", 1.0); StringInferenceTestUtilities.TestTransducerValue(copyOptional, "abc", "abc", 1.0); StringInferenceTestUtilities.TestTransducerValue(copy, string.Empty, string.Empty, 1.0); StringInferenceTestUtilities.TestTransducerValue(copyOptional, string.Empty, string.Empty, 2.0); StringInferenceTestUtilities.TestTransducerValue(copy, "abc", "ab", 0.0); StringInferenceTestUtilities.TestTransducerValue(copyOptional, "abc", "ab", 0.0); StringInferenceTestUtilities.TestTransducerValue(copy, "abc", string.Empty, 0.0); StringInferenceTestUtilities.TestTransducerValue(copyOptional, "abc", string.Empty, 0.0); }
public void Lower() { int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0); var lowercaseAutomaton1 = StringDistribution.Lower(minLength: 1, maxLength: 2); Assert.True(lowercaseAutomaton1.IsProper()); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton1, StringInferenceTestUtilities.StringUniformProbability(1, 2, lowercaseCharacterCount), "a", "bc"); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton1, 0.0, "abc", "BC", "A", string.Empty); var lowercaseAutomaton2 = StringDistribution.Lower(minLength: 2); Assert.False(lowercaseAutomaton2.IsProper()); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton2, 1.0, "bc", "abvhrbfijbr"); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton2, 0.0, "a", "BC", "adasdADNdej", string.Empty); }
public void ConsumeAutomaton() { StringAutomaton automaton = StringAutomaton.Constant(2.0, DiscreteChar.Lower()); automaton = automaton.Sum(StringAutomaton.ConstantOnElement(3.0, 'a')); StringTransducer consume = StringTransducer.Consume(automaton); StringInferenceTestUtilities.TestTransducerValue(consume, "aaa", string.Empty, 2.0); StringInferenceTestUtilities.TestTransducerValue(consume, "bb", string.Empty, 2.0); StringInferenceTestUtilities.TestTransducerValue(consume, "a", string.Empty, 5.0); StringInferenceTestUtilities.TestTransducerValue(consume, string.Empty, string.Empty, 2.0); StringInferenceTestUtilities.TestTransducerValue(consume, "bb", "aaa", 0.0); StringInferenceTestUtilities.TestTransducerValue(consume, "bb", "bb", 0.0); StringInferenceTestUtilities.TestTransducerValue(consume, string.Empty, "bb", 0.0); StringInferenceTestUtilities.TestTransducerValue(consume, string.Empty, "a", 0.0); }
public void Sum() { StringTransducer replace = StringTransducer.Sum( StringTransducer.Replace(DiscreteChar.Lower(), DiscreteChar.Digit()), StringTransducer.Replace(DiscreteChar.Lower(), DiscreteChar.LetterOrDigit())); StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "123", 2.0); StringInferenceTestUtilities.TestTransducerValue(replace, "w", "1337", 2.0); StringInferenceTestUtilities.TestTransducerValue(replace, "w", string.Empty, 2.0); StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, "17", 2.0); StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, string.Empty, 2.0); StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "worlds", 1.0); StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "WORLDS111", 1.0); StringInferenceTestUtilities.TestTransducerValue(replace, "123", "worlds", 0.0); StringInferenceTestUtilities.TestTransducerValue(replace, "123", "123", 0.0); StringInferenceTestUtilities.TestTransducerValue(replace, "1", string.Empty, 0.0); }
public void CopyAutomaton() { StringAutomaton automaton = StringAutomaton.ConstantOn(1.0, "prefix1", "prefix2"); automaton.AppendInPlace(StringAutomaton.Constant(1.0, DiscreteChar.Lower())); automaton.AppendInPlace(StringAutomaton.Constant(1.0, DiscreteChar.Upper())); automaton.AppendInPlace("!"); StringTransducer copy = StringTransducer.Copy(automaton); StringInferenceTestUtilities.TestTransducerValue(copy, "prefix1!", "prefix1!", 1.0); StringInferenceTestUtilities.TestTransducerValue(copy, "prefix2!", "prefix2!", 1.0); StringInferenceTestUtilities.TestTransducerValue(copy, "prefix1lower!", "prefix1lower!", 1.0); StringInferenceTestUtilities.TestTransducerValue(copy, "prefix2UPPER!", "prefix2UPPER!", 1.0); StringInferenceTestUtilities.TestTransducerValue(copy, "prefix1lowerUPPER!", "prefix1lowerUPPER!", 1.0); StringInferenceTestUtilities.TestIfTransducerRejects(copy, "prefix1lower", "prefix2UPPER", "!", "prefix1lowerUPPER"); StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix1!", 1.0); StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix2!", 1.0); StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix1lower!", 1.0); StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix2UPPER!", 1.0); StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix1lowerUPPER!", 1.0); StringAutomaton subsetAutomaton = StringAutomaton.ConstantOn(2.0, "prefix1"); subsetAutomaton.AppendInPlace(StringAutomaton.ConstantOn(3.0, "lll", "mmm")); subsetAutomaton.AppendInPlace(StringAutomaton.ConstantOn(1.5, "!", "U!")); StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix1lll!", 9.0); StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix1mmmU!", 9.0); StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix1!", 0.0); StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix2lower!", 0.0); StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix2U!", 0.0); StringAutomaton supersetAutomaton = StringAutomaton.ConstantOn(1.5, "pr"); supersetAutomaton.AppendInPlace(StringAutomaton.Constant(2.0)); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1!", 3.0); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix2!", 3.0); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1lower!", 3.0); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix2UPPER!", 3.0); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1lowerUPPER!", 3.0); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix11!", 0.0); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1lowerUPPERlower!", 0.0); StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prrrrr!", 0.0); }
public void ReplaceAutomaton() { StringAutomaton automaton1 = StringAutomaton.Constant(2.0, DiscreteChar.Lower()); automaton1 = automaton1.Sum(StringAutomaton.ConstantOnElement(3.0, 'a')); StringAutomaton automaton2 = StringAutomaton.Constant(0.5, DiscreteChar.Digit()); automaton2 = automaton2.Sum(StringAutomaton.Constant(2.5, DiscreteChar.LetterOrDigit())); StringTransducer replace = StringTransducer.Replace(automaton1, automaton2); StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, "123", 6.0); StringInferenceTestUtilities.TestTransducerValue(replace, "a", "123", 15.0); StringInferenceTestUtilities.TestTransducerValue(replace, "ax", "AbC", 5.0); StringInferenceTestUtilities.TestTransducerValue(replace, "a", "a", 12.5); StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, string.Empty, 6.0); StringInferenceTestUtilities.TestTransducerValue(replace, "123", string.Empty, 0.0); StringInferenceTestUtilities.TestTransducerValue(replace, "AbC", "ax", 0.0); StringInferenceTestUtilities.TestTransducerValue(replace, "1", "1", 0.0); }
public void UniformOf() { var unif1 = StringDistribution.ZeroOrMore(DiscreteChar.Lower()); Assert.False(unif1.IsUniform()); Assert.False(unif1.IsProper()); StringInferenceTestUtilities.TestProbability(unif1, 1.0, "hello", "a", string.Empty); StringInferenceTestUtilities.TestProbability(unif1, 0.0, "123", "!", "Abc"); // Test if non-uniform element distribution does not affect the outcome Vector probs = DiscreteChar.Digit().GetProbs(); probs['1'] = 0; probs['2'] = 0.3; probs['3'] = 0.0001; var unif2 = StringDistribution.ZeroOrMore(DiscreteChar.FromVector(probs)); StringInferenceTestUtilities.TestProbability(unif2, 1.0, "0", "234", string.Empty); StringInferenceTestUtilities.TestProbability(unif2, 0.0, "1", "231", "!", "Abc"); }
public void Capitalized() { int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0); int uppercaseCharacterCount = DiscreteChar.Upper().GetProbs().Count(p => p > 0); var capitalizedAutomaton1 = StringDistribution.Capitalized(minLength: 3, maxLength: 5); Assert.True(capitalizedAutomaton1.IsProper()); StringInferenceTestUtilities.TestProbability( capitalizedAutomaton1, StringInferenceTestUtilities.StringUniformProbability(2, 4, lowercaseCharacterCount) / uppercaseCharacterCount, "Abc", "Bcde", "Abcde"); StringInferenceTestUtilities.TestProbability(capitalizedAutomaton1, 0.0, "A", "abc", "Ab", "Abcdef", string.Empty); var capitalizedAutomaton2 = StringDistribution.Capitalized(minLength: 3); Assert.False(capitalizedAutomaton2.IsProper()); StringInferenceTestUtilities.TestProbability(capitalizedAutomaton2, 1.0, "Abc", "Bcde", "Abcde", "Abfjrhfjlrl"); StringInferenceTestUtilities.TestProbability(capitalizedAutomaton2, 0.0, "A", "abc", "Ab", string.Empty); }
public void MessageOperatorsTest() { int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0); int uppercaseCharacterCount = DiscreteChar.Upper().GetProbs().Count(p => p > 0); StringDistribution strDist1 = StringDistribution.OneOf("a", "b", "abc", "ab", "bcd", "d", string.Empty); DiscreteChar charDist1 = SingleOp.CharacterAverageConditional(strDist1); Assert.Equal(1.0 / 3.0, charDist1['a'], ProbEps); Assert.Equal(1.0 / 3.0, charDist1['b'], ProbEps); Assert.Equal(1.0 / 3.0, charDist1['d'], ProbEps); StringDistribution strDist2 = StringDistribution.OneOf(strDist1, StringDistribution.OneOf("b", "d")); DiscreteChar charDist2 = SingleOp.CharacterAverageConditional(strDist2); Assert.Equal(1.0 / 10.0, charDist2['a'], ProbEps); Assert.Equal(4.5 / 10.0, charDist2['b'], ProbEps); Assert.Equal(4.5 / 10.0, charDist2['d'], ProbEps); StringDistribution strDist3 = StringDistribution.Letters(minLength: 0); DiscreteChar charDist3 = SingleOp.CharacterAverageConditional(strDist3); Assert.Equal(1.0 / (lowercaseCharacterCount + uppercaseCharacterCount), charDist3['a'], ProbEps); Assert.Equal(1.0 / (lowercaseCharacterCount + uppercaseCharacterCount), charDist3['B'], ProbEps); Assert.Equal(1.0 / (lowercaseCharacterCount + uppercaseCharacterCount), charDist3['d'], ProbEps); StringDistribution strDist4 = StringDistribution.OneOf(strDist3, StringDistribution.Lower(minLength: 0)); DiscreteChar charDist4 = SingleOp.CharacterAverageConditional(strDist4); Assert.Equal(2.0 / (2 * lowercaseCharacterCount + uppercaseCharacterCount), charDist4['a'], ProbEps); Assert.Equal(1.0 / (2 * lowercaseCharacterCount + uppercaseCharacterCount), charDist4['B'], ProbEps); Assert.Equal(2.0 / (2 * lowercaseCharacterCount + uppercaseCharacterCount), charDist4['d'], ProbEps); StringDistribution strDist5 = StringDistribution.String("XX").Append(strDist4); Assert.Throws <AllZeroException>(() => SingleOp.CharacterAverageConditional(strDist5)); }
public void WordModel() { // We want to build a word model as a reasonably simple StringDistribution. It // should satisfy the following: // (1) The probability of a word of moderate length should not be // significantly less than the probability of a shorter word. // (2) The probability of a specific word conditioned on its length matches that of // words in the target language. // We achieve this by putting non-normalized character distributions on the edges. The // StringDistribution is unaware that these are non-normalized. // The StringDistribution itself is non-normalizable. const double TargetProb1 = 0.05; const double Ratio1 = 0.4; const double TargetProb2 = TargetProb1 * Ratio1; const double Ratio2 = 0.2; const double TargetProb3 = TargetProb2 * Ratio2; const double TargetProb4 = TargetProb3 * Ratio2; const double TargetProb5 = TargetProb4 * Ratio2; const double Ratio3 = 0.999; const double TargetProb6 = TargetProb5 * Ratio3; const double TargetProb7 = TargetProb6 * Ratio3; const double TargetProb8 = TargetProb7 * Ratio3; const double Ratio4 = 0.9; const double TargetProb9 = TargetProb8 * Ratio4; const double TargetProb10 = TargetProb9 * Ratio4; var targetProbabilitiesPerLength = new double[] { TargetProb1, TargetProb2, TargetProb3, TargetProb4, TargetProb5, TargetProb6, TargetProb7, TargetProb8, TargetProb9, TargetProb10 }; var charDistUpper = DiscreteChar.Upper(); var charDistLower = DiscreteChar.Lower(); var charDistUpperNarrow = DiscreteChar.OneOf('A', 'B'); var charDistLowerNarrow = DiscreteChar.OneOf('a', 'b'); var charDistUpperScaled = DiscreteChar.Uniform(); var charDistLowerScaled1 = DiscreteChar.Uniform(); var charDistLowerScaled2 = DiscreteChar.Uniform(); var charDistLowerScaled3 = DiscreteChar.Uniform(); var charDistLowerScaledEnd = DiscreteChar.Uniform(); charDistUpperScaled.SetToPartialUniformOf(charDistUpper, Math.Log(TargetProb1)); charDistLowerScaled1.SetToPartialUniformOf(charDistLower, Math.Log(Ratio1)); charDistLowerScaled2.SetToPartialUniformOf(charDistLower, Math.Log(Ratio2)); charDistLowerScaled3.SetToPartialUniformOf(charDistLower, Math.Log(Ratio3)); charDistLowerScaledEnd.SetToPartialUniformOf(charDistLower, Math.Log(Ratio4)); var wordModel = StringDistribution.Concatenate( new List <DiscreteChar> { charDistUpperScaled, charDistLowerScaled1, charDistLowerScaled2, charDistLowerScaled2, charDistLowerScaled2, charDistLowerScaled3, charDistLowerScaled3, charDistLowerScaled3, charDistLowerScaledEnd }, true, true); const string Word = "Abcdefghij"; const double Eps = 1e-5; var broadDist = StringDistribution.Char(charDistUpper); var narrowDist = StringDistribution.Char(charDistUpperNarrow); var narrowWord = "A"; var expectedProbForNarrow = 0.5; for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(wordModel.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); var logAvg = Math.Exp(wordModel.GetLogAverageOf(broadDist)); Assert.Equal(targetProbabilitiesPerLength[i], logAvg, Eps); var prod = StringDistribution.Zero(); prod.SetToProduct(broadDist, wordModel); Xunit.Assert.True(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides); probCurrentWord = Math.Exp(prod.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); prod.SetToProduct(narrowDist, wordModel); Xunit.Assert.False(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides); var probNarrowWord = Math.Exp(prod.GetLogProb(narrowWord)); Assert.Equal(expectedProbForNarrow, probNarrowWord, Eps); broadDist = broadDist.Append(charDistLower); narrowDist = narrowDist.Append(charDistLowerNarrow); narrowWord += "a"; expectedProbForNarrow *= 0.5; } // Copied model var copiedModel = StringDistribution.FromWorkspace(StringTransducer.Copy().ProjectSource(wordModel.GetWorkspaceOrPoint())); // Under transducer. for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(copiedModel.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); } // Rescaled model var scale = 0.5; var newTargetProb1 = TargetProb1 * scale; var charDistUpperScaled1 = DiscreteChar.Uniform(); charDistUpperScaled1.SetToPartialUniformOf(charDistUpper, Math.Log(newTargetProb1)); var reWeightingTransducer = StringTransducer.Replace(StringDistribution.Char(charDistUpper).GetWorkspaceOrPoint(), StringDistribution.Char(charDistUpperScaled1).GetWorkspaceOrPoint()) .Append(StringTransducer.Copy()); var reWeightedWordModel = StringDistribution.FromWorkspace(reWeightingTransducer.ProjectSource(wordModel.GetWorkspaceOrPoint())); for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(reWeightedWordModel.GetLogProb(currentWord)); Assert.Equal(scale * targetProbabilitiesPerLength[i], probCurrentWord, Eps); } }