Beispiel #1
0
        /// <summary>
        /// Creates a uniform distribution over strings that start with an upper case letter followed by
        /// one or more letters, with length within the given bounds.
        /// If <paramref name="maxLength"/> is set to <see langword="null"/>,
        /// there will be no upper bound on the length, and the resulting distribution will thus be improper.
        /// </summary>
        /// <param name="minLength">The minimum possible string length. Defaults to 2.</param>
        /// <param name="maxLength">
        /// The maximum possible sequence length, or <see langword="null"/> for no upper bound on length.
        /// Defaults to <see langword="null"/>.
        /// </param>
        /// <param name="allowUpperAfterFirst">Whether to allow upper case letters after the initial upper case letter.  If false, only lower case letters will be allowed.</param>
        /// <returns>The created distribution.</returns>
        public static StringDistribution Capitalized(int minLength = 2, int?maxLength = null, bool allowUpperAfterFirst = false)
        {
            Argument.CheckIfInRange(minLength >= 1, "minLength", "The minimum length of a capitalized string should be 1 or more.");
            Argument.CheckIfValid(!maxLength.HasValue || maxLength.Value >= minLength, "The maximum length cannot be less than the minimum length.");

            var result = StringDistribution.Char(ImmutableDiscreteChar.Upper());

            if (maxLength.HasValue)
            {
                result.AppendInPlace(
                    allowUpperAfterFirst ? StringDistribution.Letters(minLength: minLength - 1, maxLength: maxLength.Value - 1)
                    : StringDistribution.Lower(minLength: minLength - 1, maxLength: maxLength.Value - 1));
            }
            else
            {
                // Concatenation with an improper distribution, need to adjust its scale so that the result is 1 on its support
                double logNormalizer       = result.GetLogAverageOf(result);
                var    lowercaseSuffixFunc = (allowUpperAfterFirst ? StringDistribution.Letters(minLength: minLength - 1)
                    : StringDistribution.Lower(minLength: minLength - 1)).ToNormalizedAutomaton();
                var lowercaseSuffixFuncScaled = lowercaseSuffixFunc.ScaleLog(-logNormalizer);
                result.AppendInPlace(StringDistribution.FromWeightFunction(lowercaseSuffixFuncScaled));
            }

            return(result);
        }
Beispiel #2
0
        public static StringDistribution EmptyOrStartsWith(ImmutableDiscreteChar charsInMainString, ImmutableDiscreteChar startsWith)
        {
            // TODO: fix equality and then use factory methods to create this
            var result = new StringAutomaton.Builder();

            result.Start.SetEndWeight(Weight.One);
            var otherState = result.Start.AddTransition(startsWith, Weight.FromLogValue(-startsWith.GetLogAverageOf(startsWith)));

            otherState.AddSelfTransition(charsInMainString, Weight.FromLogValue(-charsInMainString.GetLogAverageOf(charsInMainString)));
            otherState.SetEndWeight(Weight.One);

            return(StringDistribution.FromWeightFunction(result.GetAutomaton()));
        }
Beispiel #3
0
        /// <summary>
        /// Creates a uniform distribution over any string starting and ending with a non-word character.
        /// Characters other than the first and the last are restricted to be non-zero probability characters
        /// from a given distribution.
        /// </summary>
        /// <param name="allowedChars">The distribution representing allowed characters.</param>
        /// <param name="nonWordCharacter">The word separating characters.</param>
        /// <returns>The created distribution.</returns>
        public static StringDistribution WordMiddle(ImmutableDiscreteChar allowedChars, ImmutableDiscreteChar?nonWordCharacter = null)
        {
            // TODO: fix equality and then use factory methods to create this
            nonWordCharacter = nonWordCharacter ?? NonWordCharacter;
            var result      = new StringAutomaton.Builder();
            var otherState1 = result.Start.AddTransition(
                Option.FromNullable(nonWordCharacter),
                Weight.FromLogValue(-nonWordCharacter.Value.GetLogAverageOf(nonWordCharacter.Value)));

            otherState1.SetEndWeight(Weight.One);
            var otherState2 = otherState1.AddEpsilonTransition(Weight.One)
                              .AddSelfTransition(allowedChars, Weight.FromLogValue(-allowedChars.GetLogAverageOf(allowedChars))).AddTransition(
                Option.FromNullable(nonWordCharacter),
                Weight.FromLogValue(-nonWordCharacter.Value.GetLogAverageOf(nonWordCharacter.Value)));

            otherState2.SetEndWeight(Weight.One);

            return(StringDistribution.FromWeightFunction(result.GetAutomaton()));
        }