public static StringDistribution SubAverageConditional(StringDistribution str, int start, int minLength, int maxLength) { Argument.CheckIfNotNull(str, "str"); Argument.CheckIfInRange(start >= 0, "start", "Start index must be non-negative."); Argument.CheckIfInRange(minLength >= 0, "minLength", "Min length must be non-negative."); Argument.CheckIfInRange(maxLength >= 0, "maxLength", "Max length must be non-negative."); if (str.IsPointMass) { var strPoint = str.Point; var alts = new HashSet <string>(); for (int length = minLength; length <= maxLength; length++) { var s = strPoint.Substring(start, Math.Min(length, strPoint.Length)); alts.Add(s); } return(StringDistribution.OneOf(alts)); } var anyChar = StringAutomaton.ConstantOnElement(1.0, ImmutableDiscreteChar.Any()); var transducer = StringTransducer.Consume(StringAutomaton.Repeat(anyChar, minTimes: start, maxTimes: start)); transducer.AppendInPlace(StringTransducer.Copy(StringAutomaton.Repeat(anyChar, minTimes: minLength, maxTimes: maxLength))); transducer.AppendInPlace(StringTransducer.Consume(StringAutomaton.Constant(1.0))); return(StringDistribution.FromWeightFunction(transducer.ProjectSource(str.ToAutomaton()))); }
public void SampleGeometric() { Rand.Restart(96); const double StoppingProbability = 0.7; // The length of sequences sampled from this distribution must follow a geometric distribution StringAutomaton automaton = StringAutomaton.Zero(); automaton.Start = automaton.AddState(); automaton.Start.SetEndWeight(Weight.FromValue(StoppingProbability)); automaton.Start.AddTransition('a', Weight.FromValue(1 - StoppingProbability), automaton.Start); StringDistribution dist = StringDistribution.FromWeightFunction(automaton); var acc = new MeanVarianceAccumulator(); const int SampleCount = 30000; for (int i = 0; i < SampleCount; ++i) { string sample = dist.Sample(); acc.Add(sample.Length); } const double ExpectedMean = (1.0 - StoppingProbability) / StoppingProbability; const double ExpectedVariance = (1.0 - StoppingProbability) / (StoppingProbability * StoppingProbability); Assert.Equal(ExpectedMean, acc.Mean, 1e-2); Assert.Equal(ExpectedVariance, acc.Variance, 1e-2); }
public void PointMassDetection() { StringDistribution s1 = StringDistribution.OneOf("hello", "world", "people"); StringDistribution s2 = StringDistribution.OneOf("greetings", "people", "animals"); StringDistribution point1 = s1.Product(s2); Assert.True(point1.IsPointMass); Assert.Equal("people", point1.Point); StringDistribution point2 = StringDistribution.OneOf(new Dictionary <string, double> { { "a", 3.0 }, { "b", 0.0 } }); Assert.True(point2.IsPointMass); Assert.Equal("a", point2.Point); StringDistribution point3 = StringDistribution.CaseInvariant("123"); Assert.True(point3.IsPointMass); Assert.Equal("123", point3.Point); StringDistribution point4 = StringDistribution.Char('Z'); Assert.True(point4.IsPointMass); Assert.Equal("Z", point4.Point); StringDistribution point5 = StringDistribution.OneOf(1.0, StringDistribution.String("!"), 0.0, StringDistribution.Any()); Assert.True(point5.IsPointMass); Assert.Equal("!", point5.Point); StringDistribution point6 = StringDistribution.Repeat('@', minTimes: 3, maxTimes: 3); Assert.True(point6.IsPointMass); Assert.Equal("@@@", point6.Point); StringDistribution point7 = StringDistribution.String("hello").Append(StringDistribution.String(" world")); Assert.True(point7.IsPointMass); Assert.Equal("hello world", point7.Point); string point = string.Empty; StringAutomaton point8Automaton = StringAutomaton.Empty(); for (int i = 0; i < 22; ++i) { const string PointElement = "a"; point8Automaton.AppendInPlace(StringAutomaton.ConstantOn(1.0, PointElement, PointElement)); point += PointElement; } StringDistribution point8 = StringDistribution.FromWeightFunction(point8Automaton); Assert.True(point8.IsPointMass); Assert.Equal(point, point8.Point); }
/// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="StringConcatOp"]/message_doc[@name="Str2AverageConditional(StringDistribution, StringDistribution)"]/*'/> public static StringDistribution Str2AverageConditional(StringDistribution concat, StringDistribution str1) { Argument.CheckIfNotNull(concat, "concat"); Argument.CheckIfNotNull(str1, "str1"); StringTransducer transducer = StringTransducer.Consume(str1.ToAutomaton()); transducer.AppendInPlace(StringTransducer.Copy()); return(StringDistribution.FromWeightFunction(transducer.ProjectSource(concat.ToAutomaton()))); }
/// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="StringOfLengthOp"]/message_doc[@name="StrAverageConditional(DiscreteChar, Discrete)"]/*'/> public static StringDistribution StrAverageConditional(DiscreteChar allowedChars, Discrete length) { Argument.CheckIfNotNull(length, "length"); Argument.CheckIfValid(allowedChars.IsPartialUniform(), "allowedChars", "The set of allowed characters must be passed as a partial uniform distribution."); double logNormalizer = allowedChars.GetLogAverageOf(allowedChars); var oneCharacter = StringAutomaton.ConstantOnElementLog(logNormalizer, allowedChars.WrappedDistribution); var manyCharacters = StringAutomaton.Repeat(oneCharacter, length.GetWorkspace()); return(StringDistribution.FromWeightFunction(manyCharacters)); }
/// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="SubstringOp"]/message_doc[@name="StrAverageConditional(StringDistribution, int, int)"]/*'/> public static StringDistribution StrAverageConditional(StringDistribution sub, int start, int length) { Argument.CheckIfNotNull(sub, "sub"); Argument.CheckIfInRange(start >= 0, "start", "Start index must be non-negative."); Argument.CheckIfInRange(length >= 0, "length", "Length must be non-negative."); var anyChar = StringAutomaton.ConstantOnElement(1.0, ImmutableDiscreteChar.Any()); var transducer = StringTransducer.Produce(StringAutomaton.Repeat(anyChar, minTimes: start, maxTimes: start)); transducer.AppendInPlace(StringTransducer.Copy(StringAutomaton.Repeat(anyChar, minTimes: length, maxTimes: length))); transducer.AppendInPlace(StringTransducer.Produce(StringAutomaton.Constant(1.0))); return(StringDistribution.FromWeightFunction(transducer.ProjectSource(sub.ToAutomaton()))); }
public void ProductWithGroups() { StringDistribution lhsWithoutGroup = StringDistribution.String("ab"); var weightFunction = lhsWithoutGroup.GetWorkspaceOrPoint(); var transitionWithGroup = weightFunction.Start.GetTransitions()[0]; transitionWithGroup.Group = 1; weightFunction.Start.SetTransition(0, transitionWithGroup); StringDistribution lhs = StringDistribution.FromWeightFunction(weightFunction); StringDistribution rhs = StringDistribution.OneOf("ab", "ac"); Assert.True(lhs.GetWorkspaceOrPoint().HasGroup(1)); Assert.False(rhs.GetWorkspaceOrPoint().UsesGroups()); var result = StringDistribution.Zero(); result.SetToProduct(lhs, rhs); Assert.True(result.GetWorkspaceOrPoint().HasGroup(1)); }
public void Product3() { StringAutomaton weights1 = StringAutomaton.Sum( StringAutomaton.ConstantOn(1.0, "a"), StringAutomaton.ConstantOn(2.0, "b"), StringAutomaton.ConstantOn(4.0, "c")); StringAutomaton weights2 = StringAutomaton.Sum( StringAutomaton.ConstantOn(2.0, "a"), StringAutomaton.ConstantOn(5.0, "b"), StringAutomaton.ConstantOn(7.0, "c")); StringDistribution dist1 = StringDistribution.FromWeightFunction(weights1); StringDistribution dist2 = StringDistribution.FromWeightFunction(weights2); StringDistribution product = dist1.Product(dist2); StringInferenceTestUtilities.TestProbability(product, 2.0 / 40.0, "a"); StringInferenceTestUtilities.TestProbability(product, 10.0 / 40.0, "b"); StringInferenceTestUtilities.TestProbability(product, 28.0 / 40.0, "c"); }
public static StringDistribution FormatAverageConditional(StringDistribution str, IReadOnlyList <StringDistribution> args, IReadOnlyList <string> argNames) { Argument.CheckIfNotNull(str, "str"); ValidateArguments(args, argNames); var allowedArgs = args.Select(arg => arg.ToAutomaton()).ToList(); // Try optimizations for special cases if (TryOptimizedFormatAverageConditionalImpl(str, allowedArgs, argNames, out StringDistribution resultDist)) { return(resultDist); } // Reverse the process defined by StrAverageConditional var placeholderReplacer = GetPlaceholderReplacingTransducer(allowedArgs, argNames, true, false); StringAutomaton format = str.IsPointMass ? placeholderReplacer.ProjectSource(str.Point) : placeholderReplacer.ProjectSource(str.ToAutomaton()); StringAutomaton validatedFormat = GetValidatedFormatString(format, argNames); return(StringDistribution.FromWeightFunction(validatedFormat)); }
public void ProductWithGroups() { StringDistribution lhsWithoutGroup = StringDistribution.String("ab"); // add a group to first transition of the start state var weightFunctionBuilder = StringAutomaton.Builder.FromAutomaton(lhsWithoutGroup.GetWorkspaceOrPoint()); var transitionIterator = weightFunctionBuilder.Start.TransitionIterator; var transitionWithGroup = transitionIterator.Value; transitionWithGroup.Group = 1; transitionIterator.Value = transitionWithGroup; StringDistribution lhs = StringDistribution.FromWeightFunction(weightFunctionBuilder.GetAutomaton()); StringDistribution rhs = StringDistribution.OneOf("ab", "ac"); Assert.True(lhs.GetWorkspaceOrPoint().HasGroup(1)); Assert.False(rhs.GetWorkspaceOrPoint().UsesGroups); var result = StringDistribution.Zero(); result.SetToProduct(lhs, rhs); Assert.True(result.GetWorkspaceOrPoint().HasGroup(1)); }
/// <summary> /// The implementation of <see cref="StrAverageConditional(StringDistribution, IReadOnlyList{StringDistribution}, IReadOnlyList{string})"/>. /// </summary> /// <param name="format">The message from <c>format</c>.</param> /// <param name="allowedArgs">The message from <c>args</c>, truncated to allowed values and converted to automata.</param> /// <param name="argNames">The names of the arguments.</param> /// <param name="withGroups">Whether the result should mark different arguments with groups.</param> /// <param name="noValidation">Whether incorrect format string values should not be pruned.</param> /// <returns>The message to <c>str</c>.</returns> private static StringDistribution StrAverageConditionalImpl( StringDistribution format, IReadOnlyList <StringAutomaton> allowedArgs, IReadOnlyList <string> argNames, bool withGroups, bool noValidation) { StringDistribution resultDist = TryOptimizedStrAverageConditionalImpl(format, allowedArgs, argNames, withGroups); if (resultDist != null) { return(resultDist); } // Check braces for correctness. StringAutomaton validatedFormat = format.ToAutomaton(); if (!noValidation) { validatedFormat = GetValidatedFormatString(format.ToAutomaton(), argNames); } // Now replace placeholders with arguments var placeholderReplacer = GetPlaceholderReplacingTransducer(allowedArgs, argNames, false, withGroups); StringAutomaton str = placeholderReplacer.ProjectSource(validatedFormat); return(StringDistribution.FromWeightFunction(str)); }
/// <summary> /// An implementation of <see cref="StrAverageConditional(StringDistribution, IReadOnlyList{StringDistribution}, IReadOnlyList{string})"/> /// specialized for some cases for performance reasons. /// </summary> /// <param name="format">The message from <c>format</c>.</param> /// <param name="allowedArgs">The message from <c>args</c>, truncated to allowed values and converted to automata.</param> /// <param name="argNames">The names of the arguments.</param> /// <param name="withGroups">Whether the result should mark different arguments with groups.</param> /// <returns> /// Result distribution if there is an optimized implementation available for the provided parameters. /// <see langword="null"/> otherwise. /// </returns> /// <remarks> /// Supports the case of point mass <paramref name="format"/>. /// </remarks> private static StringDistribution TryOptimizedStrAverageConditionalImpl( StringDistribution format, IReadOnlyList <StringAutomaton> allowedArgs, IReadOnlyList <string> argNames, bool withGroups) { if (!format.IsPointMass) { // Fall back to the general case return(null); } // Check braces for correctness & replace placeholders with arguments simultaneously var result = StringAutomaton.Builder.ConstantOn(Weight.One, string.Empty); bool[] argumentSeen = new bool[allowedArgs.Count]; int openingBraceIndex = format.Point.IndexOf("{", StringComparison.Ordinal), closingBraceIndex = -1; while (openingBraceIndex != -1) { // Add the part of the format before the placeholder result.Append(StringAutomaton.ConstantOn(1.0, format.Point.Substring(closingBraceIndex + 1, openingBraceIndex - closingBraceIndex - 1))); // Find next opening and closing braces closingBraceIndex = format.Point.IndexOf("}", openingBraceIndex + 1, StringComparison.Ordinal); int nextOpeningBraceIndex = format.Point.IndexOf("{", openingBraceIndex + 1, StringComparison.Ordinal); // Opening brace must be followed by a closing brace if (closingBraceIndex == -1 || (nextOpeningBraceIndex != -1 && nextOpeningBraceIndex < closingBraceIndex)) { return(StringDistribution.Zero()); } string argumentName = format.Point.Substring(openingBraceIndex + 1, closingBraceIndex - openingBraceIndex - 1); int argumentIndex = argNames.IndexOf(argumentName); // Unknown or previously seen argument found if (argumentIndex == -1 || argumentSeen[argumentIndex]) { return(StringDistribution.Zero()); } // Replace the placeholder by the argument result.Append(allowedArgs[argumentIndex], withGroups ? argumentIndex + 1 : 0); // Mark the argument as 'seen' argumentSeen[argumentIndex] = true; openingBraceIndex = nextOpeningBraceIndex; } // There should be no closing braces after the last opening brace if (format.Point.IndexOf('}', closingBraceIndex + 1) != -1) { return(StringDistribution.Zero()); } if (RequirePlaceholderForEveryArgument && argumentSeen.Any(seen => !seen)) { // Some argument wasn't present although it was required return(StringDistribution.Zero()); } // Append the part of the format after the last placeholder result.Append(StringAutomaton.ConstantOn(1.0, format.Point.Substring(closingBraceIndex + 1, format.Point.Length - closingBraceIndex - 1))); return(StringDistribution.FromWeightFunction(result.GetAutomaton())); }
/// <summary> /// An implementation of <see cref="FormatAverageConditional(StringDistribution, IReadOnlyList{StringDistribution}, IReadOnlyList{string})"/> /// specialized for some cases for performance reasons. /// </summary> /// <param name="str">The message from <c>str</c>.</param> /// <param name="allowedArgs">The message from <c>args</c>, truncated to allowed values and converted to automata.</param> /// <param name="argNames">The names of the arguments.</param> /// <param name="resultDist">The computed result.</param> /// <returns> /// <see langword="true"/> if there is an optimized implementation available for the provided parameters, /// and <paramref name="resultDist"/> has been computed using it. /// <see langword="false"/> otherwise. /// </returns> /// <remarks> /// Supports the case of point mass <paramref name="str"/> and <paramref name="allowedArgs"/>, /// where each of the arguments is present in <paramref name="str"/> at most once and the occurrences /// are non-overlapping. /// </remarks> private static bool TryOptimizedFormatAverageConditionalImpl( StringDistribution str, IReadOnlyList <StringAutomaton> allowedArgs, IReadOnlyList <string> argNames, out StringDistribution resultDist) { resultDist = null; string[] allowedArgPoints = Util.ArrayInit(allowedArgs.Count, i => allowedArgs[i].TryComputePoint()); if (!str.IsPointMass || !allowedArgPoints.All(argPoint => argPoint != null && SubstringOccurrencesCount(str.Point, argPoint) <= 1)) { // Fall back to the general case return(false); } // Obtain arguments present in 'str' (ordered by position) var argPositions = allowedArgPoints.Select((arg, argIndex) => Tuple.Create(argIndex, str.Point.IndexOf(arg, StringComparison.Ordinal))) .Where(t => t.Item2 != -1) .OrderBy(t => t.Item2) .ToList(); if (RequirePlaceholderForEveryArgument && argPositions.Count != allowedArgs.Count) { // Some argument is not in 'str' resultDist = StringDistribution.Zero(); return(true); } StringAutomaton result = StringAutomaton.ConstantOn(1.0, string.Empty); int curArgumentIndex = -1; int curArgumentPos = -1; int curArgumentLength = 1; for (int i = 0; i < argPositions.Count; ++i) { int prevArgumentIndex = curArgumentIndex; int prevArgumentPos = curArgumentPos; int prevArgumentLength = curArgumentLength; curArgumentIndex = argPositions[i].Item1; curArgumentPos = argPositions[i].Item2; curArgumentLength = allowedArgPoints[curArgumentIndex].Length; if (prevArgumentIndex != -1 && curArgumentPos < prevArgumentPos + prevArgumentLength) { // It's easier to fall back to the general case in case of overlapping arguments return(false); } // Append the contents of 'str' preceeding the current argument result = result.Append(str.Point.Substring(prevArgumentPos + prevArgumentLength, curArgumentPos - prevArgumentPos - prevArgumentLength)); // The format may have included either the text ot the placeholder string argName = "{" + argNames[curArgumentIndex] + "}"; if (RequirePlaceholderForEveryArgument) { result = result.Append(StringAutomaton.ConstantOn(1.0, argName)); } else { result = result.Append(StringAutomaton.ConstantOn(1.0, argName, allowedArgPoints[curArgumentIndex])); } } // Append the rest of 'str' result = result.Append(str.Point.Substring(curArgumentPos + curArgumentLength, str.Point.Length - curArgumentPos - curArgumentLength)); resultDist = StringDistribution.FromWeightFunction(result); return(true); }