public void ProductWithLogOverrideNarrow() { for (var i = 0; i < 2; i++) { var dist1 = DiscreteChar.LetterOrDigit(); var dist2 = DiscreteChar.OneOf('1', '3', '5', '6'); var logOverrideProbability = Math.Log(0.9); dist1.SetToPartialUniformOf(dist1, logOverrideProbability); Xunit.Assert.True(dist1.HasLogProbabilityOverride); Xunit.Assert.False(dist2.IsBroad); if (i == 1) { Util.Swap(ref dist1, ref dist2); } var dist3 = DiscreteChar.Uniform(); dist3.SetToProduct(dist1, dist2); Xunit.Assert.False(dist3.HasLogProbabilityOverride); Assert.Equal(Math.Log(0.25), dist3.GetLogProb('5'), Eps); Xunit.Assert.True(double.IsNegativeInfinity(dist3.GetLogProb('a'))); } }
public void PartialUniformWithLogProbabilityOverride() { var dist = DiscreteChar.LetterOrDigit(); var probLetter = Math.Exp(dist.GetLogProb('j')); var probNumber = Math.Exp(dist.GetLogProb('5')); var logProbabilityOverride = Math.Log(0.7); var scaledDist = DiscreteChar.Uniform(); scaledDist.SetToPartialUniformOf(dist, logProbabilityOverride); var scaledLogProbLetter = scaledDist.GetLogProb('j'); var scaledLogProbNumber = scaledDist.GetLogProb('5'); Assert.Equal(scaledLogProbLetter, logProbabilityOverride, Eps); Assert.Equal(scaledLogProbNumber, logProbabilityOverride, Eps); // Check that cache has not been compromised. Assert.Equal(probLetter, Math.Exp(dist.GetLogProb('j')), Eps); Assert.Equal(probNumber, Math.Exp(dist.GetLogProb('5')), Eps); // Check that an exception is thrown if a bad maximumProbability is passed down. Xunit.Assert.Throws <ArgumentException>(() => { var badDist = DiscreteChar.Uniform(); badDist.SetToPartialUniformOf(dist, Math.Log(1.2)); }); }
public void GetOutgoingTransitionsForDeterminization1() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition(DiscreteChar.Uniform(), Weight.FromValue(2)); var wrapper = new StringAutomatonWrapper(builder); var outgoingTransitions = wrapper.GetOutgoingTransitionsForDeterminization(0, Weight.FromValue(3)); var expectedOutgoingTransitions = new[]
/// <summary> /// Creates a string distribution <c>P(s) = \prod_i P_i(s_i)^I[i != j]</c>, /// where <c>P_i(c)</c> is a given array of character distributions and <c>j</c> is a given position in the array. /// </summary> /// <param name="characters">The distributions over individual characters.</param> /// <param name="excludedPos">The character to skip.</param> /// <returns>The created distribution.</returns> private static StringDistribution GetCharWeighter(IList <DiscreteChar> characters, int excludedPos) { StringDistribution result = StringDistribution.Empty(); for (int i = 0; i < characters.Count; ++i) { result.AppendInPlace(i == excludedPos ? DiscreteChar.Uniform() : characters[i]); } return(result); }
public void CharDistribution() { var rng = DiscreteChar.UniformInRanges("bdgi"); var unif = DiscreteChar.Uniform(); var mix = new DiscreteChar(); mix.SetToSum(0.8, rng, 0.2, unif); DistributionTests.DistributionTest(unif, mix, false); DistributionTests.PointMassTest(mix, 'b'); DistributionTests.UniformTest(rng, 'b'); }
public void GetOutgoingTransitionsForDeterminization1() { var wrapper = new StringAutomatonWrapper(); wrapper.Start.AddTransition(DiscreteChar.Uniform(), Weight.FromValue(2)); var outgoingTransitions = wrapper.GetOutgoingTransitionsForDeterminization(new Dictionary <int, Weight> { { 0, Weight.FromValue(3) } }); var expectedOutgoingTransitions = new[] { new Tuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.Uniform(), Weight.FromValue(6), new Dictionary <int, Weight> { { 1, Weight.FromValue(1) } }) }; AssertCollectionsEqual(expectedOutgoingTransitions, outgoingTransitions, TransitionInfoEqualityComparer.Instance); }
public void WordModel() { // We want to build a word model as a reasonably simple StringDistribution. It // should satisfy the following: // (1) The probability of a word of moderate length should not be // significantly less than the probability of a shorter word. // (2) The probability of a specific word conditioned on its length matches that of // words in the target language. // We achieve this by putting non-normalized character distributions on the edges. The // StringDistribution is unaware that these are non-normalized. // The StringDistribution itself is non-normalizable. const double TargetProb1 = 0.05; const double Ratio1 = 0.4; const double TargetProb2 = TargetProb1 * Ratio1; const double Ratio2 = 0.2; const double TargetProb3 = TargetProb2 * Ratio2; const double TargetProb4 = TargetProb3 * Ratio2; const double TargetProb5 = TargetProb4 * Ratio2; const double Ratio3 = 0.999; const double TargetProb6 = TargetProb5 * Ratio3; const double TargetProb7 = TargetProb6 * Ratio3; const double TargetProb8 = TargetProb7 * Ratio3; const double Ratio4 = 0.9; const double TargetProb9 = TargetProb8 * Ratio4; const double TargetProb10 = TargetProb9 * Ratio4; var targetProbabilitiesPerLength = new double[] { TargetProb1, TargetProb2, TargetProb3, TargetProb4, TargetProb5, TargetProb6, TargetProb7, TargetProb8, TargetProb9, TargetProb10 }; var charDistUpper = DiscreteChar.Upper(); var charDistLower = DiscreteChar.Lower(); var charDistUpperNarrow = DiscreteChar.OneOf('A', 'B'); var charDistLowerNarrow = DiscreteChar.OneOf('a', 'b'); var charDistUpperScaled = DiscreteChar.Uniform(); var charDistLowerScaled1 = DiscreteChar.Uniform(); var charDistLowerScaled2 = DiscreteChar.Uniform(); var charDistLowerScaled3 = DiscreteChar.Uniform(); var charDistLowerScaledEnd = DiscreteChar.Uniform(); charDistUpperScaled.SetToPartialUniformOf(charDistUpper, Math.Log(TargetProb1)); charDistLowerScaled1.SetToPartialUniformOf(charDistLower, Math.Log(Ratio1)); charDistLowerScaled2.SetToPartialUniformOf(charDistLower, Math.Log(Ratio2)); charDistLowerScaled3.SetToPartialUniformOf(charDistLower, Math.Log(Ratio3)); charDistLowerScaledEnd.SetToPartialUniformOf(charDistLower, Math.Log(Ratio4)); var wordModel = StringDistribution.Concatenate( new List <DiscreteChar> { charDistUpperScaled, charDistLowerScaled1, charDistLowerScaled2, charDistLowerScaled2, charDistLowerScaled2, charDistLowerScaled3, charDistLowerScaled3, charDistLowerScaled3, charDistLowerScaledEnd }, true, true); const string Word = "Abcdefghij"; const double Eps = 1e-5; var broadDist = StringDistribution.Char(charDistUpper); var narrowDist = StringDistribution.Char(charDistUpperNarrow); var narrowWord = "A"; var expectedProbForNarrow = 0.5; for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(wordModel.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); var logAvg = Math.Exp(wordModel.GetLogAverageOf(broadDist)); Assert.Equal(targetProbabilitiesPerLength[i], logAvg, Eps); var prod = StringDistribution.Zero(); prod.SetToProduct(broadDist, wordModel); Xunit.Assert.True(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides); probCurrentWord = Math.Exp(prod.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); prod.SetToProduct(narrowDist, wordModel); Xunit.Assert.False(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides); var probNarrowWord = Math.Exp(prod.GetLogProb(narrowWord)); Assert.Equal(expectedProbForNarrow, probNarrowWord, Eps); broadDist = broadDist.Append(charDistLower); narrowDist = narrowDist.Append(charDistLowerNarrow); narrowWord += "a"; expectedProbForNarrow *= 0.5; } // Copied model var copiedModel = StringDistribution.FromWorkspace(StringTransducer.Copy().ProjectSource(wordModel.GetWorkspaceOrPoint())); // Under transducer. for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(copiedModel.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); } // Rescaled model var scale = 0.5; var newTargetProb1 = TargetProb1 * scale; var charDistUpperScaled1 = DiscreteChar.Uniform(); charDistUpperScaled1.SetToPartialUniformOf(charDistUpper, Math.Log(newTargetProb1)); var reWeightingTransducer = StringTransducer.Replace(StringDistribution.Char(charDistUpper).GetWorkspaceOrPoint(), StringDistribution.Char(charDistUpperScaled1).GetWorkspaceOrPoint()) .Append(StringTransducer.Copy()); var reWeightedWordModel = StringDistribution.FromWorkspace(reWeightingTransducer.ProjectSource(wordModel.GetWorkspaceOrPoint())); for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(reWeightedWordModel.GetLogProb(currentWord)); Assert.Equal(scale * targetProbabilitiesPerLength[i], probCurrentWord, Eps); } }
public void MessageOperatorsTest() { var str1 = StringFromArrayOp.StrAverageConditional(new[] { DiscreteChar.PointMass('a'), DiscreteChar.OneOf('b', 'c'), DiscreteChar.OneOf('d', 'e') }); Assert.Equal(StringDistribution.OneOf("abd", "abe", "acd", "ace"), str1, Eps); var str2 = StringFromArrayOp.StrAverageConditional(new DiscreteChar[0]); Assert.Equal(StringDistribution.Empty(), str2, Eps); var chars1 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "cd"), new[] { DiscreteChar.PointMass('a'), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars1[0], Eps); Assert.Equal(DiscreteChar.PointMass('b'), chars1[1], Eps); var chars2 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "ac"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.PointMass('a'), chars2[0], Eps); Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars2[1], Eps); var chars3 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "ac", "bc"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(2.0 / 3.0, chars3[0]['a'], Eps); Assert.Equal(1.0 / 3.0, chars3[0]['b'], Eps); Assert.Equal(1.0 / 3.0, chars3[1]['b'], Eps); Assert.Equal(2.0 / 3.0, chars3[1]['c'], Eps); var chars4 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "cde"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.PointMass('a'), chars4[0], Eps); Assert.Equal(DiscreteChar.PointMass('b'), chars4[1], Eps); var chars5 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "cb", "ae", "ax"), new[] { DiscreteChar.PointMass('a'), DiscreteChar.PointMass('b') }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars5[0], Eps); Assert.Equal(DiscreteChar.OneOf('b', 'e', 'x'), chars5[1], Eps); var chars6 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("abcd", "accd", "acce"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.PointMass('d') }, new DiscreteChar[4]); Assert.Equal(DiscreteChar.PointMass('a'), chars6[0], Eps); Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars6[1], Eps); Assert.Equal(DiscreteChar.PointMass('c'), chars6[2], Eps); Assert.Equal(2.0 / 3.0, chars6[3]['d'], Eps); Assert.Equal(1.0 / 3.0, chars6[3]['e'], Eps); }