private Config GetConfig(CleanStringType stringType, CultureInfo culture)
        {
            stringType = stringType & CleanStringType.RoleMask;

            Dictionary <CleanStringType, Config> config;

            if (_configs.ContainsKey(culture))
            {
                config = _configs[culture];
                if (config.ContainsKey(stringType)) // have we got a config for _that_ role?
                {
                    return(config[stringType]);
                }
                if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles?
                {
                    return(config[CleanStringType.RoleMask]);
                }
            }
            else if (_configs.ContainsKey(_defaultCulture))
            {
                config = _configs[_defaultCulture];
                if (config.ContainsKey(stringType)) // have we got a config for _that_ role?
                {
                    return(config[stringType]);
                }
                if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles?
                {
                    return(config[CleanStringType.RoleMask]);
                }
            }

            return(Config.NotConfigured);
        }
Example #2
0
        public void CleanStringToAsciiWithCase(string input, string expected, CleanStringType caseType)
        {
            var output = _helper.CleanString(input, caseType | CleanStringType.Ascii);

            // legacy does nothing
            Assert.AreEqual(input, output);
        }
Example #3
0
        // internal: we don't want ppl to retrieve a config and modify it
        // (the helper uses a private clone to prevent modifications)
        internal Config For(CleanStringType stringType, string culture)
        {
            culture    = culture ?? "";
            stringType = stringType & CleanStringType.RoleMask;

            Dictionary <CleanStringType, Config> config;

            if (_configs.ContainsKey(culture))
            {
                config = _configs[culture];
                if (config.ContainsKey(stringType)) // have we got a config for _that_ role?
                {
                    return(config[stringType]);
                }
                if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles?
                {
                    return(config[CleanStringType.RoleMask]);
                }
            }
            else if (_configs.ContainsKey(DefaultCulture))
            {
                config = _configs[DefaultCulture];
                if (config.ContainsKey(stringType)) // have we got a config for _that_ role?
                {
                    return(config[stringType]);
                }
                if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles?
                {
                    return(config[CleanStringType.RoleMask]);
                }
            }

            return(Config.NotConfigured);
        }
 public DefaultShortStringHelper WithConfig(CleanStringType stringRole,
                                            Func <string, string> preFilter = null,
                                            bool breakTermsOnUpper          = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
 {
     return(WithConfig(_defaultCulture, stringRole,
                       preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm));
 }
 public void LegacyConvertStringCase(string input, string expected, CleanStringType caseType)
 {
     // NOTE LegacyConvertStringCase has issues with a few cases
     // -> ignore test cases
     // also it removes symbols, etc... except the quote?
     var output = _helper.LegacyConvertStringCase(input, caseType);
     Assert.AreEqual(expected, output);
 }
        protected virtual string CleanString(string text, CleanStringType stringType, CultureInfo culture, char?separator)
        {
            // be safe
            if (text == null)
            {
                throw new ArgumentNullException("text");
            }
            if (culture == null)
            {
                throw new ArgumentNullException("culture");
            }

            // get config
            var config = GetConfig(stringType, culture);

            stringType = config.StringTypeExtend(stringType);

            // apply defaults
            if ((stringType & CleanStringType.CaseMask) == CleanStringType.None)
            {
                stringType |= CleanStringType.CamelCase;
            }
            if ((stringType & CleanStringType.CodeMask) == CleanStringType.None)
            {
                stringType |= CleanStringType.Ascii;
            }

            // use configured unless specified
            separator = separator ?? config.Separator;

            // apply pre-filter
            if (config.PreFilter != null)
            {
                text = config.PreFilter(text);
            }

            // apply replacements
            //if (config.Replacements != null)
            //    text = ReplaceMany(text, config.Replacements);

            // recode
            var codeType = stringType & CleanStringType.CodeMask;

            text = codeType == CleanStringType.Ascii
                ? Utf8ToAsciiConverter.ToAsciiString(text)
                : RemoveSurrogatePairs(text);

            // clean
            text = CleanCodeString(text, stringType, separator.Value, culture, config);

            // apply post-filter
            if (config.PostFilter != null)
            {
                text = config.PostFilter(text);
            }

            return(text);
        }
        /// <summary>
        /// Cleans a string in the context of a specified culture, using a specified separator and configuration.
        /// </summary>
        /// <param name="text">The text to clean.</param>
        /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default,
        /// strings are cleaned up to camelCase and Ascii.</param>
        /// <param name="separator">The separator.</param>
        /// <param name="culture">The culture.</param>
        /// <param name="config">The configuration.</param>
        /// <returns>The clean string.</returns>
        private string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture, HelperConfig config)
        {
            // be safe
            if (text == null)
            {
                throw new ArgumentNullException("text");
            }
            if (culture == null)
            {
                throw new ArgumentNullException("culture");
            }

            // apply defaults
            if ((stringType & CleanStringType.CaseMask) == CleanStringType.None)
            {
                stringType |= CleanStringType.CamelCase;
            }
            if ((stringType & CleanStringType.CodeMask) == CleanStringType.None)
            {
                stringType |= CleanStringType.Ascii;
            }

            var codeType = stringType & CleanStringType.CodeMask;

            // apply pre-filter
            if (config.PreFilter != null)
            {
                text = config.PreFilter(text);
            }

            // apply replacements
            //if (config.Replacements != null)
            //    text = ReplaceMany(text, config.Replacements);

            // recode
            text = Recode(text, stringType);

            // clean
            switch (codeType)
            {
            case CleanStringType.Ascii:
            // see note below - don't use CleanAsciiString
            //text = CleanAsciiString(text, stringType, separator);
            //break;
            case CleanStringType.Utf8:
                text = CleanUtf8String(text, stringType, separator, culture, config);
                break;

            case CleanStringType.Unicode:
                throw new NotImplementedException("DefaultShortStringHelper does not handle unicode yet.");

            default:
                throw new ArgumentOutOfRangeException("stringType");
            }

            return(text);
        }
 public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole,
                                            Func <string, string> preFilter = null,
                                            bool breakTermsOnUpper          = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
 {
     EnsureNotFrozen();
     if (_configs.ContainsKey(culture) == false)
     {
         _configs[culture] = new Dictionary <CleanStringType, HelperConfig>();
     }
     _configs[culture][stringRole] = new HelperConfig(preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
     return(this);
 }
        public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, Config config)
        {
            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            EnsureNotFrozen();
            if (_configs.ContainsKey(culture) == false)
            {
                _configs[culture] = new Dictionary <CleanStringType, Config>();
            }
            _configs[culture][stringRole] = config.Clone(); // clone so it can't be changed
            return(this);
        }
        /// <summary>
        /// Filters a string to convert case, and more.
        /// </summary>
        /// <param name="phrase">the text to filter.</param>
        /// <param name="cases">The string case type.</param>
        /// <returns>The filtered text.</returns>
        /// <remarks>
        /// <para>This is the legacy method, so we can't really change it, although it has issues (see unit tests).</para>
        /// <para>It does more than "converting the case", and also remove spaces, etc.</para>
        /// </remarks>
        public string LegacyConvertStringCase(string phrase, CleanStringType cases)
        {
            // ported from StringExtensions.ConvertCase

            cases &= CleanStringType.CaseMask;

            var splittedPhrase = Regex.Split(phrase, @"[^a-zA-Z0-9\']", RegexOptions.Compiled);

            if (cases == CleanStringType.Unchanged)
            {
                return(string.Join("", splittedPhrase));
            }

            //var splittedPhrase = phrase.Split(' ', '-', '.');
            var sb = new StringBuilder();

            foreach (var splittedPhraseChars in splittedPhrase.Select(s => s.ToCharArray()))
            {
                if (splittedPhraseChars.Length > 0)
                {
                    splittedPhraseChars[0] = ((new String(splittedPhraseChars[0], 1)).ToUpperInvariant().ToCharArray())[0];
                }
                sb.Append(new String(splittedPhraseChars));
            }

            var result = sb.ToString();

            if (cases == CleanStringType.CamelCase)
            {
                if (result.Length > 1)
                {
                    var pattern = new Regex("^([A-Z]*)([A-Z].*)$", RegexOptions.Singleline | RegexOptions.Compiled);
                    var match   = pattern.Match(result);
                    if (match.Success)
                    {
                        result = match.Groups[1].Value.ToLowerInvariant() + match.Groups[2].Value;

                        return(result.Substring(0, 1).ToLowerInvariant() + result.Substring(1));
                    }

                    return(result);
                }

                return(result.ToLowerInvariant());
            }

            return(result);
        }
Example #11
0
        public DefaultShortStringHelperConfig WithConfig(string culture, CleanStringType stringRole, Config config)
        {
            if (config == null)
            {
                throw new ArgumentNullException(nameof(config));
            }

            culture = culture ?? "";

            if (_configs.ContainsKey(culture) == false)
            {
                _configs[culture] = new Dictionary <CleanStringType, Config>();
            }
            _configs[culture][stringRole] = config;
            return(this);
        }
            // extends the config
            public CleanStringType StringTypeExtend(CleanStringType stringType)
            {
                var st = StringType;

                foreach (var mask in new[] { CleanStringType.CaseMask, CleanStringType.CodeMask })
                {
                    var a = stringType & mask;
                    if (a == 0)
                    {
                        continue;
                    }

                    st = st & ~mask; // clear what we have
                    st = st | a;     // set the new value
                }
                return(st);
            }
        /// <summary>
        /// Returns a new string containing only characters within the specified code type.
        /// </summary>
        /// <param name="text">The string to filter.</param>
        /// <param name="stringType">The string type.</param>
        /// <returns>The filtered string.</returns>
        /// <remarks>If <paramref name="stringType"/> is not <c>Unicode</c> then non-utf8 characters are
        /// removed. If it is <c>Ascii</c> we try to do some intelligent replacement of accents, etc.</remarks>
        public virtual string Recode(string text, CleanStringType stringType)
        {
            // be safe
            if (text == null)
            {
                throw new ArgumentNullException("text");
            }

            var codeType = stringType & CleanStringType.CodeMask;

            // unicode to utf8 or ascii: just remove the unicode chars
            // utf8 to ascii: try to be clever and replace some chars

            // what's the point?
            if (codeType == CleanStringType.Unicode)
            {
                return(text);
            }

            return(codeType == CleanStringType.Utf8
                ? RemoveNonUtf8(text)
                : Utf8ToAsciiConverter.ToAsciiString(text));
        }
 public string CleanString(string text, CleanStringType stringType, char separator)
 {
     return(text);
 }
 public string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture)
 {
     return(text);
 }
 /// <summary>
 /// Cleans a string in the context of a specified culture.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default,
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <param name="culture">The culture.</param>
 /// <returns>The clean string.</returns>
 public string CleanString(string text, CleanStringType stringType, CultureInfo culture)
 {
     return(CleanString(text, stringType, culture, null));
 }
        // note: supports surrogate pairs in input string
        internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len,
                               CleanStringType caseType, CultureInfo culture, bool isAcronym)
        {
            var term = input.Substring(ipos, len);

            if (isAcronym)
            {
                if ((caseType == CleanStringType.CamelCase && len <= 2 && opos > 0) ||
                    (caseType == CleanStringType.PascalCase && len <= 2) ||
                    (caseType == CleanStringType.UmbracoCase))
                {
                    caseType = CleanStringType.Unchanged;
                }
            }

            // note: MSDN seems to imply that ToUpper or ToLower preserve the length
            // of the string, but that this behavior is not guaranteed and could change.

            char   c;
            int    i;
            string s;

            switch (caseType)
            {
            //case CleanStringType.LowerCase:
            //case CleanStringType.UpperCase:
            case CleanStringType.Unchanged:
                term.CopyTo(0, output, opos, len);
                opos += len;
                break;

            case CleanStringType.LowerCase:
                term = term.ToLower(culture);
                term.CopyTo(0, output, opos, term.Length);
                opos += term.Length;
                break;

            case CleanStringType.UpperCase:
                term = term.ToUpper(culture);
                term.CopyTo(0, output, opos, term.Length);
                opos += term.Length;
                break;

            case CleanStringType.CamelCase:
                c = term[0];
                i = 1;
                if (char.IsSurrogate(c))
                {
                    s = term.Substring(ipos, 2);
                    s = opos == 0 ? s.ToLower(culture) : s.ToUpper(culture);
                    s.CopyTo(0, output, opos, s.Length);
                    opos += s.Length;
                    i++;     // surrogate pair len is 2
                }
                else
                {
                    output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture);
                }
                if (len > i)
                {
                    term = term.Substring(i).ToLower(culture);
                    term.CopyTo(0, output, opos, term.Length);
                    opos += term.Length;
                }
                break;

            case CleanStringType.PascalCase:
                c = term[0];
                i = 1;
                if (char.IsSurrogate(c))
                {
                    s = term.Substring(ipos, 2);
                    s = s.ToUpper(culture);
                    s.CopyTo(0, output, opos, s.Length);
                    opos += s.Length;
                    i++;     // surrogate pair len is 2
                }
                else
                {
                    output[opos++] = char.ToUpper(c, culture);
                }
                if (len > i)
                {
                    term = term.Substring(i).ToLower(culture);
                    term.CopyTo(0, output, opos, term.Length);
                    opos += term.Length;
                }
                break;

            case CleanStringType.UmbracoCase:
                c = term[0];
                i = 1;
                if (char.IsSurrogate(c))
                {
                    s = term.Substring(ipos, 2);
                    s = opos == 0 ? s : s.ToUpper(culture);
                    s.CopyTo(0, output, opos, s.Length);
                    opos += s.Length;
                    i++;     // surrogate pair len is 2
                }
                else
                {
                    output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture);
                }
                if (len > i)
                {
                    term = term.Substring(i);
                    term.CopyTo(0, output, opos, term.Length);
                    opos += term.Length;
                }
                break;

            default:
                throw new ArgumentOutOfRangeException("caseType");
            }
        }
 /// <summary>
 /// Cleans a string, using a specified separator.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, 
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <param name="separator">The separator.</param>
 /// <returns>The clean string.</returns>
 /// <remarks>The string is cleaned in the context of the default culture.</remarks>
 public string CleanString(string text, CleanStringType stringType, char separator)
 {
     return CleanString(text, stringType, separator, _defaultCulture);
 }
 /// <summary>
 /// Cleans a string.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default,
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <returns>The clean string.</returns>
 /// <remarks>The string is cleaned in the context of the default culture.</remarks>
 public string CleanString(string text, CleanStringType stringType)
 {
     return(CleanString(text, stringType, _defaultCulture, null));
 }
 public string CleanString(string text, CleanStringType stringType, char separator)
 {
     return "CLEAN-STRING-B::" + text;
 }
 /// <summary>
 /// Cleans a string in the context of a specified culture, using a specified separator.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, 
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <param name="separator">The separator.</param>
 /// <param name="culture">The culture.</param>
 /// <returns>The clean string.</returns>
 public virtual string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture)
 {
     var config = GetConfig(stringType & CleanStringType.RoleMask, culture);
     return CleanString(text, stringType, separator, culture, config);
 }
        /// <summary>
        /// Returns a new string containing only characters within the specified code type.
        /// </summary>
        /// <param name="text">The string to filter.</param>
        /// <param name="stringType">The string type.</param>
        /// <returns>The filtered string.</returns>
        /// <remarks>If <paramref name="stringType"/> is not <c>Unicode</c> then non-utf8 characters are
        /// removed. If it is <c>Ascii</c> we try to do some intelligent replacement of accents, etc.</remarks>
        public virtual string Recode(string text, CleanStringType stringType)
        {
            // be safe
            if (text == null)
                throw new ArgumentNullException("text");

            var codeType = stringType & CleanStringType.CodeMask;

            // unicode to utf8 or ascii: just remove the unicode chars
            // utf8 to ascii: try to be clever and replace some chars

            // what's the point?
            if (codeType == CleanStringType.Unicode)
                return text;

            return codeType == CleanStringType.Utf8 
                ? RemoveNonUtf8(text) 
                : Utf8ToAsciiConverter.ToAsciiString(text);
        }
 public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole,
     Func<string, string> preFilter = null,
     bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
 {
     EnsureNotFrozen();
     if (_configs.ContainsKey(culture) == false)
         _configs[culture] = new Dictionary<CleanStringType, HelperConfig>();
     _configs[culture][stringRole] = new HelperConfig(preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
     return this;
 }
        internal void CopyUtf8Term(string input, int ipos, char[] output, ref int opos, int len,
            CleanStringType caseType, CultureInfo culture, /*Func<string, string> termFilter,*/ bool isAcronym)
        {
            var term = input.Substring(ipos, len);
            ipos = 0;

            //if (termFilter != null)
            //{
            //    term = termFilter(term);
            //    len = term.Length;
            //}

            if (isAcronym)
            {
                if ((caseType == CleanStringType.CamelCase && len <= 2 && opos > 0) ||
                        (caseType == CleanStringType.PascalCase && len <= 2) ||
                        (caseType == CleanStringType.UmbracoCase))
                    caseType = CleanStringType.Unchanged;
            }

            char c;
            switch (caseType)
            {
                //case CleanStringType.LowerCase:
                //case CleanStringType.UpperCase:
                case CleanStringType.Unchanged:
                    term.CopyTo(ipos, output, opos, len);
                    opos += len;
                    break;

                case CleanStringType.LowerCase:
                    term.ToLower(culture).CopyTo(ipos, output, opos, len);
                    opos += len;
                    break;

                case CleanStringType.UpperCase:
                    term.ToUpper(culture).CopyTo(ipos, output, opos, len);
                    opos += len;
                    break;

                case CleanStringType.CamelCase:
                    c = term[ipos++];
                    output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture);
                    if (len > 1)
                        term.ToLower(culture).CopyTo(ipos, output, opos, len - 1);
                    opos += len - 1;
                    break;

                case CleanStringType.PascalCase:
                    c = term[ipos++];
                    output[opos++] = char.ToUpper(c, culture);
                    if (len > 1)
                        term.ToLower(culture).CopyTo(ipos, output, opos, len - 1);
                    opos += len - 1;
                    break;

                case CleanStringType.UmbracoCase:
                    c = term[ipos++];
                    output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture);
                    if (len > 1)
                        term.CopyTo(ipos, output, opos, len - 1);
                    opos += len - 1;
                    break;

                default:
                    throw new ArgumentOutOfRangeException("caseType");
            }
        }
        internal string CleanUtf8String(string text, CleanStringType caseType, char separator, CultureInfo culture, HelperConfig config)
        {
            int opos = 0, ipos = 0;
            var state = StateBreak;

            caseType &= CleanStringType.CaseMask;

            // if we apply global ToUpper or ToLower to text here
            // then we cannot break words on uppercase chars
            var input = text;

            // it's faster to use an array than a StringBuilder
            var ilen = input.Length;
            var output = new char[ilen * 2]; // twice the length should be OK in all cases

            //var termFilter = config.TermFilter;

            for (var i = 0; i < ilen; i++)
            {
                var c = input[i];
                var isDigit = char.IsDigit(c);
                var isUpper = char.IsUpper(c); // false for digits, symbols...
                var isLower = char.IsLower(c); // false for digits, symbols...
                var isUnder = config.AllowUnderscoreInTerm && c == '_';
                var isTerm = char.IsLetterOrDigit(c) || isUnder;

                switch (state)
                {
                    case StateBreak:
                        if (isTerm && (opos > 0 || (isUnder == false && (config.AllowLeadingDigits || isDigit == false))))
                        {
                            ipos = i;
                            if (opos > 0 && separator != char.MinValue)
                                output[opos++] = separator;
                            state = isUpper ? StateUp : StateWord;
                        }
                        break;

                    case StateWord:
                        if (isTerm == false || (config.BreakTermsOnUpper && isUpper))
                        {
                            CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ false);
                            ipos = i;
                            state = isTerm ? StateUp : StateBreak;
                            if (state != StateBreak && separator != char.MinValue)
                                output[opos++] = separator;
                        }
                        break;

                    case StateAcronym:
                        if (isTerm == false || isLower || isDigit)
                        {
                            if (isLower && config.GreedyAcronyms == false)
                                i -= 1;
                            CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ true);
                            ipos = i;
                            state = isTerm ? StateWord : StateBreak;
                            if (state != StateBreak && separator != char.MinValue)
                                output[opos++] = separator;
                        }
                        break;

                    case StateUp:
                        if (isTerm)
                        {
                            state = isUpper ? StateAcronym : StateWord;
                        }
                        else
                        {
                            CopyUtf8Term(input, ipos, output, ref opos, 1, caseType, culture, /*termFilter,*/ false);
                            state = StateBreak;
                        }
                        break;

                    default:
                        throw new Exception("Invalid state.");
                }
            }

            switch (state)
            {
                case StateBreak:
                    break;

                case StateWord:
                    CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ false);
                    break;

                case StateAcronym:
                case StateUp:
                    CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ true);
                    break;

                default:
                    throw new Exception("Invalid state.");
            }

            return new string(output, 0, opos);
        }
        /// <summary>
        /// Cleans a string in the context of a specified culture, using a specified separator and configuration.
        /// </summary>
        /// <param name="text">The text to clean.</param>
        /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, 
        /// strings are cleaned up to camelCase and Ascii.</param>
        /// <param name="separator">The separator.</param>
        /// <param name="culture">The culture.</param>
        /// <param name="config">The configuration.</param>
        /// <returns>The clean string.</returns>
        private string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture, HelperConfig config)
        {
            // be safe
            if (text == null)
                throw new ArgumentNullException("text");
            if (culture == null)
                throw new ArgumentNullException("culture");

            // apply defaults
            if ((stringType & CleanStringType.CaseMask) == CleanStringType.None)
                stringType |= CleanStringType.CamelCase;
            if ((stringType & CleanStringType.CodeMask) == CleanStringType.None)
                stringType |= CleanStringType.Ascii;

            var codeType = stringType & CleanStringType.CodeMask;

            // apply pre-filter
            if (config.PreFilter != null)
                text = config.PreFilter(text);

            // apply replacements
            //if (config.Replacements != null)
            //    text = ReplaceMany(text, config.Replacements);

            // recode
            text = Recode(text, stringType);

            // clean
            switch (codeType)
            {
                case CleanStringType.Ascii:
                    // see note below - don't use CleanAsciiString
                    //text = CleanAsciiString(text, stringType, separator);
                    //break;
                case CleanStringType.Utf8:
                    text = CleanUtf8String(text, stringType, separator, culture, config);
                    break;
                case CleanStringType.Unicode:
                    throw new NotImplementedException("DefaultShortStringHelper does not handle unicode yet.");
                default:
                    throw new ArgumentOutOfRangeException("stringType");
            }

            return text;
        }
 /// <summary>
 /// Cleans a string.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, 
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <returns>The clean string.</returns>
 /// <remarks>The string is cleaned in the context of the default culture.</remarks>
 public string CleanString(string text, CleanStringType stringType)
 {
     return CleanString(text, stringType, char.MinValue, _defaultCulture);
 }
 public DefaultShortStringHelper WithConfig(CleanStringType stringRole, Config config)
 {
     return(WithConfig(_defaultCulture, stringRole, config));
 }
 public string CleanString(string text, CleanStringType stringType)
 {
     return "CLEAN-STRING-A::" + text;
 }
 /// <summary>
 /// Cleans a string in the context of a specified culture.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, 
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <param name="culture">The culture.</param>
 /// <returns>The clean string.</returns>
 public string CleanString(string text, CleanStringType stringType, CultureInfo culture)
 {
     return CleanString(text, stringType, char.MinValue, culture);
 }
 public string CleanString(string text, CleanStringType stringType, char separator, System.Globalization.CultureInfo culture)
 {
     return "CLEAN-STRING-D::" + text;
 }
 public DefaultShortStringHelper WithConfig(CleanStringType stringRole,
     Func<string, string> preFilter = null,
     bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
 {
     return WithConfig(_defaultCulture, stringRole,
         preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
 }
 public void CleanStringWithTypeAndCulture(string input, string expected, string culture, CleanStringType stringType)
 {
     var cinfo = culture == null ? CultureInfo.InvariantCulture : new CultureInfo(culture);
     var separator = (stringType & CleanStringType.Url) == CleanStringType.Url ? '-' : char.MinValue;
     var output = _helper.CleanString(input, stringType, separator, cinfo);
     Assert.AreEqual(expected, output);
 }
Example #34
0
        // the new methods to clean a string (to alias, url segment...)

        /// <summary>
        /// Cleans a string.
        /// </summary>
        /// <param name="text">The text to clean.</param>
        /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default,
        /// strings are cleaned up to camelCase and Ascii.</param>
        /// <returns>The clean string.</returns>
        /// <remarks>The string is cleaned in the context of the IShortStringHelper default culture.</remarks>
        public static string ToCleanString(this string text, CleanStringType stringType)
        {
            return(ShortStringHelper.CleanString(text, stringType));
        }
 public void LegacyConvertStringCase(string input, string expected, CleanStringType caseType)
 {
     // NOTE LegacyConvertStringCase has issues with a few cases
     // -> ignore test cases
     // also it removes symbols, etc... except the quote?
     var output = _helper.LegacyConvertStringCase(input, caseType);
     Assert.AreEqual(expected, output);
 }
 public void CleanStringToAsciiWithTypeAndSeparator(string input, string expected, char separator, CleanStringType caseType)
 {
     var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator);
     Assert.AreEqual(expected, output);
 }
 /// <summary>
 /// Cleans a string, using a specified separator.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default,
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <param name="separator">The separator.</param>
 /// <returns>The clean string.</returns>
 /// <remarks>The string is cleaned in the context of the default culture.</remarks>
 public string CleanString(string text, CleanStringType stringType, char separator)
 {
     return(CleanString(text, stringType, _defaultCulture, separator));
 }
 public string CleanString(string text, CleanStringType stringType, char separator)
 {
     return text;
 }
        // here was a subtle, ascii-optimized version of the cleaning code, and I was
        // very proud of it until benchmarking showed it was an order of magnitude slower
        // that the utf8 version. Micro-optimizing sometimes isn't such a good idea.

        // note: does NOT support surrogate pairs in text
        internal string CleanCodeString(string text, CleanStringType caseType, char separator, CultureInfo culture, Config config)
        {
            int opos = 0, ipos = 0;
            var state = StateBreak;

            caseType &= CleanStringType.CaseMask;

            // if we apply global ToUpper or ToLower to text here
            // then we cannot break words on uppercase chars
            var input = text;

            // it's faster to use an array than a StringBuilder
            var ilen   = input.Length;
            var output = new char[ilen * 2]; // twice the length should be OK in all cases

            for (var i = 0; i < ilen; i++)
            {
                var c = input[i];
                // leading as long as StateBreak and ipos still zero
                var leading = state == StateBreak && ipos == 0;
                var isTerm  = config.IsTerm(c, leading);

                //var isDigit = char.IsDigit(c);
                var isUpper = char.IsUpper(c); // false for digits, symbols...
                //var isLower = char.IsLower(c); // false for digits, symbols...

                // what should I do with surrogates?
                // no idea, really, so they are not supported at the moment
                var isPair = char.IsSurrogate(c);
                if (isPair)
                {
                    throw new NotSupportedException("Surrogate pairs are not supported.");
                }

                switch (state)
                {
                // within a break
                case StateBreak:
                    // begin a new term if char is a term char,
                    // and ( pos > 0 or it's also a valid leading char )
                    if (isTerm)
                    {
                        ipos = i;
                        if (opos > 0 && separator != char.MinValue)
                        {
                            output[opos++] = separator;
                        }
                        state = isUpper ? StateUp : StateWord;
                    }
                    break;

                // within a term / word
                case StateWord:
                    // end a term if char is not a term char,
                    // or ( it's uppercase and we break terms on uppercase)
                    if (isTerm == false || (config.BreakTermsOnUpper && isUpper))
                    {
                        CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, false);
                        ipos  = i;
                        state = isTerm ? StateUp : StateBreak;
                        if (state != StateBreak && separator != char.MinValue)
                        {
                            output[opos++] = separator;
                        }
                    }
                    break;

                // within a term / acronym
                case StateAcronym:
                    // end an acronym if char is not a term char,
                    // or if it's not uppercase / config
                    if (isTerm == false || (config.CutAcronymOnNonUpper && isUpper == false))
                    {
                        // whether it's part of the acronym depends on whether we're greedy
                        if (isTerm && config.GreedyAcronyms == false)
                        {
                            i -= 1;       // handle that char again, in another state - not part of the acronym
                        }
                        if (i - ipos > 1) // single-char can't be an acronym
                        {
                            CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, true);
                            ipos  = i;
                            state = isTerm ? StateWord : StateBreak;
                            if (state != StateBreak && separator != char.MinValue)
                            {
                                output[opos++] = separator;
                            }
                        }
                        else if (isTerm)
                        {
                            state = StateWord;
                        }
                    }
                    else if (isUpper == false)     // isTerm == true
                    {
                        // it's a term char and we don't cut...
                        // keep moving forward as a word
                        state = StateWord;
                    }
                    break;

                // within a term / uppercase = could be a word or an acronym
                case StateUp:
                    if (isTerm)
                    {
                        // add that char to the term and pick word or acronym
                        state = isUpper ? StateAcronym : StateWord;
                    }
                    else
                    {
                        // single char, copy then break
                        CopyTerm(input, ipos, output, ref opos, 1, caseType, culture, false);
                        state = StateBreak;
                    }
                    break;

                default:
                    throw new Exception("Invalid state.");
                }
            }

            switch (state)
            {
            case StateBreak:
                break;

            case StateWord:
                CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, false);
                break;

            case StateAcronym:
            case StateUp:
                CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, true);
                break;

            default:
                throw new Exception("Invalid state.");
            }

            return(new string(output, 0, opos));
        }
 public string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture)
 {
     return text;
 }
        // legacy does not implement these

        public string CleanString(string text, CleanStringType stringType)
        {
            return(text);
        }
 public void CleanStringToAsciiWithType(string input, string expected, CleanStringType caseType)
 {
     var output = _helper.CleanString(input, caseType | CleanStringType.Ascii);
     Assert.AreEqual(expected, output);
 }
 public string CleanString(string text, CleanStringType stringType, CultureInfo culture)
 {
     return(text);
 }
Example #44
0
 public string CleanString(string text, CleanStringType stringType, char separator, string culture)
 {
     return("CLEAN-STRING-D::" + text);
 }
Example #45
0
 /// <summary>
 /// Cleans a string, using a specified separator.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default,
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <param name="separator">The separator.</param>
 /// <returns>The clean string.</returns>
 /// <remarks>The string is cleaned in the context of the IShortStringHelper default culture.</remarks>
 public static string ToCleanString(this string text, CleanStringType stringType, char separator)
 {
     return(ShortStringHelper.CleanString(text, stringType, separator));
 }
        private HelperConfig GetConfig(CleanStringType stringType, CultureInfo culture)
        {
            Dictionary<CleanStringType, HelperConfig> config;
            if (_configs.ContainsKey(culture))
            {
                config = _configs[culture];
                if (config.ContainsKey(stringType)) // have we got a config for _that_ role?
                    return config[stringType];
                if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles?
                    return config[CleanStringType.RoleMask];
            }
            else if (_configs.ContainsKey(_defaultCulture))
            {
                config = _configs[_defaultCulture];
                if (config.ContainsKey(stringType)) // have we got a config for _that_ role?
                    return config[stringType];
                if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles?
                    return config[CleanStringType.RoleMask];
            }

            return HelperConfig.Empty;
        }
 public void CleanStringToAsciiWithCaseAndSeparator(string input, string expected, char separator, CleanStringType caseType)
 {
     var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator);
     // legacy does nothing
     Assert.AreEqual(input, output);
 }
Example #48
0
 /// <summary>
 /// Cleans a string in the context of a specified culture, using a specified separator.
 /// </summary>
 /// <param name="text">The text to clean.</param>
 /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default,
 /// strings are cleaned up to camelCase and Ascii.</param>
 /// <param name="separator">The separator.</param>
 /// <param name="culture">The culture.</param>
 /// <returns>The clean string.</returns>
 public static string ToCleanString(this string text, CleanStringType stringType, char separator, CultureInfo culture)
 {
     return(ShortStringHelper.CleanString(text, stringType, separator, culture));
 }
        // legacy does not implement these

        public string CleanString(string text, CleanStringType stringType)
        {
            return text;
        }
Example #50
0
        public void CleanStringWithTypeAndCulture(string input, string expected, string culture, CleanStringType stringType)
        {
            // picks the proper config per culture
            // and overrides some stringType params (ascii...)
            var output = ShortStringHelper.CleanString(input, stringType, culture);

            Assert.AreEqual(expected, output);
        }
 public string CleanString(string text, CleanStringType stringType, CultureInfo culture)
 {
     return text;
 }
Example #52
0
 public string CleanString(string text, CleanStringType stringType)
 {
     return("CLEAN-STRING-A::" + text);
 }
        /// <summary>
        /// Filters a string to convert case, and more.
        /// </summary>
        /// <param name="phrase">the text to filter.</param>
        /// <param name="cases">The string case type.</param>
        /// <returns>The filtered text.</returns>
        /// <remarks>
        /// <para>This is the legacy method, so we can't really change it, although it has issues (see unit tests).</para>
        /// <para>It does more than "converting the case", and also remove spaces, etc.</para>
        /// </remarks>
        public string LegacyConvertStringCase(string phrase, CleanStringType cases)
        {
            // ported from StringExtensions.ConvertCase

            cases &= CleanStringType.CaseMask;

            var splittedPhrase = Regex.Split(phrase, @"[^a-zA-Z0-9\']", RegexOptions.Compiled);

            if (cases == CleanStringType.Unchanged)
                return string.Join("", splittedPhrase);

            //var splittedPhrase = phrase.Split(' ', '-', '.');
            var sb = new StringBuilder();

            foreach (var splittedPhraseChars in splittedPhrase.Select(s => s.ToCharArray()))
            {
                if (splittedPhraseChars.Length > 0)
                {
                    splittedPhraseChars[0] = ((new String(splittedPhraseChars[0], 1)).ToUpperInvariant().ToCharArray())[0];
                }
                sb.Append(new String(splittedPhraseChars));
            }

            var result = sb.ToString();

            if (cases == CleanStringType.CamelCase)
            {
                if (result.Length > 1)
                {
                    var pattern = new Regex("^([A-Z]*)([A-Z].*)$", RegexOptions.Singleline | RegexOptions.Compiled);
                    var match = pattern.Match(result);
                    if (match.Success)
                    {
                        result = match.Groups[1].Value.ToLowerInvariant() + match.Groups[2].Value;

                        return result.Substring(0, 1).ToLowerInvariant() + result.Substring(1);
                    }

                    return result;
                }

                return result.ToLowerInvariant();
            }

            return result;
        }
Example #54
0
 public string CleanString(string text, CleanStringType stringType, string culture)
 {
     return("CLEAN-STRING-C::" + text);
 }