/// <summary>
        /// Creates a uniform distribution over strings that start with an upper case letter followed by
        /// one or more letters, with length within the given bounds.
        /// If <paramref name="maxLength"/> is set to <see langword="null"/>,
        /// there will be no upper bound on the length, and the resulting distribution will thus be improper.
        /// </summary>
        /// <param name="minLength">The minimum possible string length. Defaults to 2.</param>
        /// <param name="maxLength">
        /// The maximum possible sequence length, or <see langword="null"/> for no upper bound on length.
        /// Defaults to <see langword="null"/>.
        /// </param>
        /// <param name="allowUpperAfterFirst">Whether to allow upper case letters after the initial upper case letter.  If false, only lower case letters will be allowed.</param>
        /// <returns>The created distribution.</returns>
        public static StringDistribution Capitalized(int minLength = 2, int?maxLength = null, bool allowUpperAfterFirst = false)
        {
            Argument.CheckIfInRange(minLength >= 1, "minLength", "The minimum length of a capitalized string should be 1 or more.");
            Argument.CheckIfValid(!maxLength.HasValue || maxLength.Value >= minLength, "The maximum length cannot be less than the minimum length.");

            var result = StringDistribution.Char(DiscreteChar.Upper());

            if (maxLength.HasValue)
            {
                result.AppendInPlace(
                    allowUpperAfterFirst ? StringDistribution.Letters(minLength: minLength - 1, maxLength: maxLength.Value - 1)
                    : StringDistribution.Lower(minLength: minLength - 1, maxLength: maxLength.Value - 1));
            }
            else
            {
                // Concatenation with an improper distribution, need to adjust its scale so that the result is 1 on its support
                double logNormalizer       = result.GetLogAverageOf(result);
                var    lowercaseSuffixFunc = (allowUpperAfterFirst ? StringDistribution.Letters(minLength: minLength - 1)
                    : StringDistribution.Lower(minLength: minLength - 1)).GetNormalizedWorkspaceOrPoint();
                var lowercaseSuffixFuncScaled = lowercaseSuffixFunc.ScaleLog(-logNormalizer);
                result.AppendInPlace(StringDistribution.FromWorkspace(lowercaseSuffixFuncScaled));
            }

            return(result);
        }
示例#2
0
        /// <summary>
        /// Creates a uniform distribution over any string starting and ending with a non-word character,
        /// with a length in given bounds.
        /// Characters other than the first and the last are restricted to be non-zero probability characters
        /// from a given distribution.
        /// </summary>
        /// <param name="minLength">The minimum allowed string length.</param>
        /// <param name="maxLength">The maximum allowed string length.</param>
        /// <param name="allowedChars">The distribution representing allowed characters.</param>
        /// <returns>The created distribution.</returns>
        public static StringDistribution WordMiddle(int minLength, int maxLength, ImmutableDiscreteChar allowedChars)
        {
            if (maxLength < minLength)
            {
                throw new ArgumentException("The maximum length cannot be less than the minimum length.");
            }

            if (minLength < 1)
            {
                throw new ArgumentException("The minimum length must be at least one.");
            }

            var nonWordChar = StringDistribution.Char(NonWordCharacter);

            if ((minLength == 1) && (maxLength == 1))
            {
                return(nonWordChar);
            }

            // TODO: make a PartialUniform copy of allowedChars
            var suffix = StringDistribution.Repeat(allowedChars, minTimes: Math.Max(minLength - 2, 0), maxTimes: maxLength - 2);

            suffix.AppendInPlace(nonWordChar);

            if (minLength == 1)
            {
                var allowedChar   = allowedChars.GetMode();
                var allowedSuffix = new string(Enumerable.Repeat(allowedChar, Math.Max(minLength - 2, 0)).ToArray()) + ' ';
                var suffixLogProb = suffix.GetLogProb(allowedSuffix);
                suffix.SetToSumLog(suffixLogProb, StringDistribution.Empty(), 0.0, suffix);
            }

            return(nonWordChar + suffix);
        }