private Config GetConfig(CleanStringType stringType, CultureInfo culture) { stringType = stringType & CleanStringType.RoleMask; Dictionary <CleanStringType, Config> config; if (_configs.ContainsKey(culture)) { config = _configs[culture]; if (config.ContainsKey(stringType)) // have we got a config for _that_ role? { return(config[stringType]); } if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles? { return(config[CleanStringType.RoleMask]); } } else if (_configs.ContainsKey(_defaultCulture)) { config = _configs[_defaultCulture]; if (config.ContainsKey(stringType)) // have we got a config for _that_ role? { return(config[stringType]); } if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles? { return(config[CleanStringType.RoleMask]); } } return(Config.NotConfigured); }
public void CleanStringToAsciiWithCase(string input, string expected, CleanStringType caseType) { var output = _helper.CleanString(input, caseType | CleanStringType.Ascii); // legacy does nothing Assert.AreEqual(input, output); }
// internal: we don't want ppl to retrieve a config and modify it // (the helper uses a private clone to prevent modifications) internal Config For(CleanStringType stringType, string culture) { culture = culture ?? ""; stringType = stringType & CleanStringType.RoleMask; Dictionary <CleanStringType, Config> config; if (_configs.ContainsKey(culture)) { config = _configs[culture]; if (config.ContainsKey(stringType)) // have we got a config for _that_ role? { return(config[stringType]); } if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles? { return(config[CleanStringType.RoleMask]); } } else if (_configs.ContainsKey(DefaultCulture)) { config = _configs[DefaultCulture]; if (config.ContainsKey(stringType)) // have we got a config for _that_ role? { return(config[stringType]); } if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles? { return(config[CleanStringType.RoleMask]); } } return(Config.NotConfigured); }
public DefaultShortStringHelper WithConfig(CleanStringType stringRole, Func <string, string> preFilter = null, bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false) { return(WithConfig(_defaultCulture, stringRole, preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm)); }
public void LegacyConvertStringCase(string input, string expected, CleanStringType caseType) { // NOTE LegacyConvertStringCase has issues with a few cases // -> ignore test cases // also it removes symbols, etc... except the quote? var output = _helper.LegacyConvertStringCase(input, caseType); Assert.AreEqual(expected, output); }
protected virtual string CleanString(string text, CleanStringType stringType, CultureInfo culture, char?separator) { // be safe if (text == null) { throw new ArgumentNullException("text"); } if (culture == null) { throw new ArgumentNullException("culture"); } // get config var config = GetConfig(stringType, culture); stringType = config.StringTypeExtend(stringType); // apply defaults if ((stringType & CleanStringType.CaseMask) == CleanStringType.None) { stringType |= CleanStringType.CamelCase; } if ((stringType & CleanStringType.CodeMask) == CleanStringType.None) { stringType |= CleanStringType.Ascii; } // use configured unless specified separator = separator ?? config.Separator; // apply pre-filter if (config.PreFilter != null) { text = config.PreFilter(text); } // apply replacements //if (config.Replacements != null) // text = ReplaceMany(text, config.Replacements); // recode var codeType = stringType & CleanStringType.CodeMask; text = codeType == CleanStringType.Ascii ? Utf8ToAsciiConverter.ToAsciiString(text) : RemoveSurrogatePairs(text); // clean text = CleanCodeString(text, stringType, separator.Value, culture, config); // apply post-filter if (config.PostFilter != null) { text = config.PostFilter(text); } return(text); }
/// <summary> /// Cleans a string in the context of a specified culture, using a specified separator and configuration. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="separator">The separator.</param> /// <param name="culture">The culture.</param> /// <param name="config">The configuration.</param> /// <returns>The clean string.</returns> private string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture, HelperConfig config) { // be safe if (text == null) { throw new ArgumentNullException("text"); } if (culture == null) { throw new ArgumentNullException("culture"); } // apply defaults if ((stringType & CleanStringType.CaseMask) == CleanStringType.None) { stringType |= CleanStringType.CamelCase; } if ((stringType & CleanStringType.CodeMask) == CleanStringType.None) { stringType |= CleanStringType.Ascii; } var codeType = stringType & CleanStringType.CodeMask; // apply pre-filter if (config.PreFilter != null) { text = config.PreFilter(text); } // apply replacements //if (config.Replacements != null) // text = ReplaceMany(text, config.Replacements); // recode text = Recode(text, stringType); // clean switch (codeType) { case CleanStringType.Ascii: // see note below - don't use CleanAsciiString //text = CleanAsciiString(text, stringType, separator); //break; case CleanStringType.Utf8: text = CleanUtf8String(text, stringType, separator, culture, config); break; case CleanStringType.Unicode: throw new NotImplementedException("DefaultShortStringHelper does not handle unicode yet."); default: throw new ArgumentOutOfRangeException("stringType"); } return(text); }
public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, Func <string, string> preFilter = null, bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false) { EnsureNotFrozen(); if (_configs.ContainsKey(culture) == false) { _configs[culture] = new Dictionary <CleanStringType, HelperConfig>(); } _configs[culture][stringRole] = new HelperConfig(preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm); return(this); }
public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, Config config) { if (config == null) { throw new ArgumentNullException("config"); } EnsureNotFrozen(); if (_configs.ContainsKey(culture) == false) { _configs[culture] = new Dictionary <CleanStringType, Config>(); } _configs[culture][stringRole] = config.Clone(); // clone so it can't be changed return(this); }
/// <summary> /// Filters a string to convert case, and more. /// </summary> /// <param name="phrase">the text to filter.</param> /// <param name="cases">The string case type.</param> /// <returns>The filtered text.</returns> /// <remarks> /// <para>This is the legacy method, so we can't really change it, although it has issues (see unit tests).</para> /// <para>It does more than "converting the case", and also remove spaces, etc.</para> /// </remarks> public string LegacyConvertStringCase(string phrase, CleanStringType cases) { // ported from StringExtensions.ConvertCase cases &= CleanStringType.CaseMask; var splittedPhrase = Regex.Split(phrase, @"[^a-zA-Z0-9\']", RegexOptions.Compiled); if (cases == CleanStringType.Unchanged) { return(string.Join("", splittedPhrase)); } //var splittedPhrase = phrase.Split(' ', '-', '.'); var sb = new StringBuilder(); foreach (var splittedPhraseChars in splittedPhrase.Select(s => s.ToCharArray())) { if (splittedPhraseChars.Length > 0) { splittedPhraseChars[0] = ((new String(splittedPhraseChars[0], 1)).ToUpperInvariant().ToCharArray())[0]; } sb.Append(new String(splittedPhraseChars)); } var result = sb.ToString(); if (cases == CleanStringType.CamelCase) { if (result.Length > 1) { var pattern = new Regex("^([A-Z]*)([A-Z].*)$", RegexOptions.Singleline | RegexOptions.Compiled); var match = pattern.Match(result); if (match.Success) { result = match.Groups[1].Value.ToLowerInvariant() + match.Groups[2].Value; return(result.Substring(0, 1).ToLowerInvariant() + result.Substring(1)); } return(result); } return(result.ToLowerInvariant()); } return(result); }
public DefaultShortStringHelperConfig WithConfig(string culture, CleanStringType stringRole, Config config) { if (config == null) { throw new ArgumentNullException(nameof(config)); } culture = culture ?? ""; if (_configs.ContainsKey(culture) == false) { _configs[culture] = new Dictionary <CleanStringType, Config>(); } _configs[culture][stringRole] = config; return(this); }
// extends the config public CleanStringType StringTypeExtend(CleanStringType stringType) { var st = StringType; foreach (var mask in new[] { CleanStringType.CaseMask, CleanStringType.CodeMask }) { var a = stringType & mask; if (a == 0) { continue; } st = st & ~mask; // clear what we have st = st | a; // set the new value } return(st); }
/// <summary> /// Returns a new string containing only characters within the specified code type. /// </summary> /// <param name="text">The string to filter.</param> /// <param name="stringType">The string type.</param> /// <returns>The filtered string.</returns> /// <remarks>If <paramref name="stringType"/> is not <c>Unicode</c> then non-utf8 characters are /// removed. If it is <c>Ascii</c> we try to do some intelligent replacement of accents, etc.</remarks> public virtual string Recode(string text, CleanStringType stringType) { // be safe if (text == null) { throw new ArgumentNullException("text"); } var codeType = stringType & CleanStringType.CodeMask; // unicode to utf8 or ascii: just remove the unicode chars // utf8 to ascii: try to be clever and replace some chars // what's the point? if (codeType == CleanStringType.Unicode) { return(text); } return(codeType == CleanStringType.Utf8 ? RemoveNonUtf8(text) : Utf8ToAsciiConverter.ToAsciiString(text)); }
public string CleanString(string text, CleanStringType stringType, char separator) { return(text); }
public string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture) { return(text); }
/// <summary> /// Cleans a string in the context of a specified culture. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="culture">The culture.</param> /// <returns>The clean string.</returns> public string CleanString(string text, CleanStringType stringType, CultureInfo culture) { return(CleanString(text, stringType, culture, null)); }
// note: supports surrogate pairs in input string internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len, CleanStringType caseType, CultureInfo culture, bool isAcronym) { var term = input.Substring(ipos, len); if (isAcronym) { if ((caseType == CleanStringType.CamelCase && len <= 2 && opos > 0) || (caseType == CleanStringType.PascalCase && len <= 2) || (caseType == CleanStringType.UmbracoCase)) { caseType = CleanStringType.Unchanged; } } // note: MSDN seems to imply that ToUpper or ToLower preserve the length // of the string, but that this behavior is not guaranteed and could change. char c; int i; string s; switch (caseType) { //case CleanStringType.LowerCase: //case CleanStringType.UpperCase: case CleanStringType.Unchanged: term.CopyTo(0, output, opos, len); opos += len; break; case CleanStringType.LowerCase: term = term.ToLower(culture); term.CopyTo(0, output, opos, term.Length); opos += term.Length; break; case CleanStringType.UpperCase: term = term.ToUpper(culture); term.CopyTo(0, output, opos, term.Length); opos += term.Length; break; case CleanStringType.CamelCase: c = term[0]; i = 1; if (char.IsSurrogate(c)) { s = term.Substring(ipos, 2); s = opos == 0 ? s.ToLower(culture) : s.ToUpper(culture); s.CopyTo(0, output, opos, s.Length); opos += s.Length; i++; // surrogate pair len is 2 } else { output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture); } if (len > i) { term = term.Substring(i).ToLower(culture); term.CopyTo(0, output, opos, term.Length); opos += term.Length; } break; case CleanStringType.PascalCase: c = term[0]; i = 1; if (char.IsSurrogate(c)) { s = term.Substring(ipos, 2); s = s.ToUpper(culture); s.CopyTo(0, output, opos, s.Length); opos += s.Length; i++; // surrogate pair len is 2 } else { output[opos++] = char.ToUpper(c, culture); } if (len > i) { term = term.Substring(i).ToLower(culture); term.CopyTo(0, output, opos, term.Length); opos += term.Length; } break; case CleanStringType.UmbracoCase: c = term[0]; i = 1; if (char.IsSurrogate(c)) { s = term.Substring(ipos, 2); s = opos == 0 ? s : s.ToUpper(culture); s.CopyTo(0, output, opos, s.Length); opos += s.Length; i++; // surrogate pair len is 2 } else { output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture); } if (len > i) { term = term.Substring(i); term.CopyTo(0, output, opos, term.Length); opos += term.Length; } break; default: throw new ArgumentOutOfRangeException("caseType"); } }
/// <summary> /// Cleans a string, using a specified separator. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="separator">The separator.</param> /// <returns>The clean string.</returns> /// <remarks>The string is cleaned in the context of the default culture.</remarks> public string CleanString(string text, CleanStringType stringType, char separator) { return CleanString(text, stringType, separator, _defaultCulture); }
/// <summary> /// Cleans a string. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <returns>The clean string.</returns> /// <remarks>The string is cleaned in the context of the default culture.</remarks> public string CleanString(string text, CleanStringType stringType) { return(CleanString(text, stringType, _defaultCulture, null)); }
public string CleanString(string text, CleanStringType stringType, char separator) { return "CLEAN-STRING-B::" + text; }
/// <summary> /// Cleans a string in the context of a specified culture, using a specified separator. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="separator">The separator.</param> /// <param name="culture">The culture.</param> /// <returns>The clean string.</returns> public virtual string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture) { var config = GetConfig(stringType & CleanStringType.RoleMask, culture); return CleanString(text, stringType, separator, culture, config); }
/// <summary> /// Returns a new string containing only characters within the specified code type. /// </summary> /// <param name="text">The string to filter.</param> /// <param name="stringType">The string type.</param> /// <returns>The filtered string.</returns> /// <remarks>If <paramref name="stringType"/> is not <c>Unicode</c> then non-utf8 characters are /// removed. If it is <c>Ascii</c> we try to do some intelligent replacement of accents, etc.</remarks> public virtual string Recode(string text, CleanStringType stringType) { // be safe if (text == null) throw new ArgumentNullException("text"); var codeType = stringType & CleanStringType.CodeMask; // unicode to utf8 or ascii: just remove the unicode chars // utf8 to ascii: try to be clever and replace some chars // what's the point? if (codeType == CleanStringType.Unicode) return text; return codeType == CleanStringType.Utf8 ? RemoveNonUtf8(text) : Utf8ToAsciiConverter.ToAsciiString(text); }
public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, Func<string, string> preFilter = null, bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false) { EnsureNotFrozen(); if (_configs.ContainsKey(culture) == false) _configs[culture] = new Dictionary<CleanStringType, HelperConfig>(); _configs[culture][stringRole] = new HelperConfig(preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm); return this; }
internal void CopyUtf8Term(string input, int ipos, char[] output, ref int opos, int len, CleanStringType caseType, CultureInfo culture, /*Func<string, string> termFilter,*/ bool isAcronym) { var term = input.Substring(ipos, len); ipos = 0; //if (termFilter != null) //{ // term = termFilter(term); // len = term.Length; //} if (isAcronym) { if ((caseType == CleanStringType.CamelCase && len <= 2 && opos > 0) || (caseType == CleanStringType.PascalCase && len <= 2) || (caseType == CleanStringType.UmbracoCase)) caseType = CleanStringType.Unchanged; } char c; switch (caseType) { //case CleanStringType.LowerCase: //case CleanStringType.UpperCase: case CleanStringType.Unchanged: term.CopyTo(ipos, output, opos, len); opos += len; break; case CleanStringType.LowerCase: term.ToLower(culture).CopyTo(ipos, output, opos, len); opos += len; break; case CleanStringType.UpperCase: term.ToUpper(culture).CopyTo(ipos, output, opos, len); opos += len; break; case CleanStringType.CamelCase: c = term[ipos++]; output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture); if (len > 1) term.ToLower(culture).CopyTo(ipos, output, opos, len - 1); opos += len - 1; break; case CleanStringType.PascalCase: c = term[ipos++]; output[opos++] = char.ToUpper(c, culture); if (len > 1) term.ToLower(culture).CopyTo(ipos, output, opos, len - 1); opos += len - 1; break; case CleanStringType.UmbracoCase: c = term[ipos++]; output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture); if (len > 1) term.CopyTo(ipos, output, opos, len - 1); opos += len - 1; break; default: throw new ArgumentOutOfRangeException("caseType"); } }
internal string CleanUtf8String(string text, CleanStringType caseType, char separator, CultureInfo culture, HelperConfig config) { int opos = 0, ipos = 0; var state = StateBreak; caseType &= CleanStringType.CaseMask; // if we apply global ToUpper or ToLower to text here // then we cannot break words on uppercase chars var input = text; // it's faster to use an array than a StringBuilder var ilen = input.Length; var output = new char[ilen * 2]; // twice the length should be OK in all cases //var termFilter = config.TermFilter; for (var i = 0; i < ilen; i++) { var c = input[i]; var isDigit = char.IsDigit(c); var isUpper = char.IsUpper(c); // false for digits, symbols... var isLower = char.IsLower(c); // false for digits, symbols... var isUnder = config.AllowUnderscoreInTerm && c == '_'; var isTerm = char.IsLetterOrDigit(c) || isUnder; switch (state) { case StateBreak: if (isTerm && (opos > 0 || (isUnder == false && (config.AllowLeadingDigits || isDigit == false)))) { ipos = i; if (opos > 0 && separator != char.MinValue) output[opos++] = separator; state = isUpper ? StateUp : StateWord; } break; case StateWord: if (isTerm == false || (config.BreakTermsOnUpper && isUpper)) { CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ false); ipos = i; state = isTerm ? StateUp : StateBreak; if (state != StateBreak && separator != char.MinValue) output[opos++] = separator; } break; case StateAcronym: if (isTerm == false || isLower || isDigit) { if (isLower && config.GreedyAcronyms == false) i -= 1; CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ true); ipos = i; state = isTerm ? StateWord : StateBreak; if (state != StateBreak && separator != char.MinValue) output[opos++] = separator; } break; case StateUp: if (isTerm) { state = isUpper ? StateAcronym : StateWord; } else { CopyUtf8Term(input, ipos, output, ref opos, 1, caseType, culture, /*termFilter,*/ false); state = StateBreak; } break; default: throw new Exception("Invalid state."); } } switch (state) { case StateBreak: break; case StateWord: CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ false); break; case StateAcronym: case StateUp: CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ true); break; default: throw new Exception("Invalid state."); } return new string(output, 0, opos); }
/// <summary> /// Cleans a string in the context of a specified culture, using a specified separator and configuration. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="separator">The separator.</param> /// <param name="culture">The culture.</param> /// <param name="config">The configuration.</param> /// <returns>The clean string.</returns> private string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture, HelperConfig config) { // be safe if (text == null) throw new ArgumentNullException("text"); if (culture == null) throw new ArgumentNullException("culture"); // apply defaults if ((stringType & CleanStringType.CaseMask) == CleanStringType.None) stringType |= CleanStringType.CamelCase; if ((stringType & CleanStringType.CodeMask) == CleanStringType.None) stringType |= CleanStringType.Ascii; var codeType = stringType & CleanStringType.CodeMask; // apply pre-filter if (config.PreFilter != null) text = config.PreFilter(text); // apply replacements //if (config.Replacements != null) // text = ReplaceMany(text, config.Replacements); // recode text = Recode(text, stringType); // clean switch (codeType) { case CleanStringType.Ascii: // see note below - don't use CleanAsciiString //text = CleanAsciiString(text, stringType, separator); //break; case CleanStringType.Utf8: text = CleanUtf8String(text, stringType, separator, culture, config); break; case CleanStringType.Unicode: throw new NotImplementedException("DefaultShortStringHelper does not handle unicode yet."); default: throw new ArgumentOutOfRangeException("stringType"); } return text; }
/// <summary> /// Cleans a string. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <returns>The clean string.</returns> /// <remarks>The string is cleaned in the context of the default culture.</remarks> public string CleanString(string text, CleanStringType stringType) { return CleanString(text, stringType, char.MinValue, _defaultCulture); }
public DefaultShortStringHelper WithConfig(CleanStringType stringRole, Config config) { return(WithConfig(_defaultCulture, stringRole, config)); }
public string CleanString(string text, CleanStringType stringType) { return "CLEAN-STRING-A::" + text; }
/// <summary> /// Cleans a string in the context of a specified culture. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="culture">The culture.</param> /// <returns>The clean string.</returns> public string CleanString(string text, CleanStringType stringType, CultureInfo culture) { return CleanString(text, stringType, char.MinValue, culture); }
public string CleanString(string text, CleanStringType stringType, char separator, System.Globalization.CultureInfo culture) { return "CLEAN-STRING-D::" + text; }
public DefaultShortStringHelper WithConfig(CleanStringType stringRole, Func<string, string> preFilter = null, bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false) { return WithConfig(_defaultCulture, stringRole, preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm); }
public void CleanStringWithTypeAndCulture(string input, string expected, string culture, CleanStringType stringType) { var cinfo = culture == null ? CultureInfo.InvariantCulture : new CultureInfo(culture); var separator = (stringType & CleanStringType.Url) == CleanStringType.Url ? '-' : char.MinValue; var output = _helper.CleanString(input, stringType, separator, cinfo); Assert.AreEqual(expected, output); }
// the new methods to clean a string (to alias, url segment...) /// <summary> /// Cleans a string. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <returns>The clean string.</returns> /// <remarks>The string is cleaned in the context of the IShortStringHelper default culture.</remarks> public static string ToCleanString(this string text, CleanStringType stringType) { return(ShortStringHelper.CleanString(text, stringType)); }
public void CleanStringToAsciiWithTypeAndSeparator(string input, string expected, char separator, CleanStringType caseType) { var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator); Assert.AreEqual(expected, output); }
/// <summary> /// Cleans a string, using a specified separator. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="separator">The separator.</param> /// <returns>The clean string.</returns> /// <remarks>The string is cleaned in the context of the default culture.</remarks> public string CleanString(string text, CleanStringType stringType, char separator) { return(CleanString(text, stringType, _defaultCulture, separator)); }
public string CleanString(string text, CleanStringType stringType, char separator) { return text; }
// here was a subtle, ascii-optimized version of the cleaning code, and I was // very proud of it until benchmarking showed it was an order of magnitude slower // that the utf8 version. Micro-optimizing sometimes isn't such a good idea. // note: does NOT support surrogate pairs in text internal string CleanCodeString(string text, CleanStringType caseType, char separator, CultureInfo culture, Config config) { int opos = 0, ipos = 0; var state = StateBreak; caseType &= CleanStringType.CaseMask; // if we apply global ToUpper or ToLower to text here // then we cannot break words on uppercase chars var input = text; // it's faster to use an array than a StringBuilder var ilen = input.Length; var output = new char[ilen * 2]; // twice the length should be OK in all cases for (var i = 0; i < ilen; i++) { var c = input[i]; // leading as long as StateBreak and ipos still zero var leading = state == StateBreak && ipos == 0; var isTerm = config.IsTerm(c, leading); //var isDigit = char.IsDigit(c); var isUpper = char.IsUpper(c); // false for digits, symbols... //var isLower = char.IsLower(c); // false for digits, symbols... // what should I do with surrogates? // no idea, really, so they are not supported at the moment var isPair = char.IsSurrogate(c); if (isPair) { throw new NotSupportedException("Surrogate pairs are not supported."); } switch (state) { // within a break case StateBreak: // begin a new term if char is a term char, // and ( pos > 0 or it's also a valid leading char ) if (isTerm) { ipos = i; if (opos > 0 && separator != char.MinValue) { output[opos++] = separator; } state = isUpper ? StateUp : StateWord; } break; // within a term / word case StateWord: // end a term if char is not a term char, // or ( it's uppercase and we break terms on uppercase) if (isTerm == false || (config.BreakTermsOnUpper && isUpper)) { CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, false); ipos = i; state = isTerm ? StateUp : StateBreak; if (state != StateBreak && separator != char.MinValue) { output[opos++] = separator; } } break; // within a term / acronym case StateAcronym: // end an acronym if char is not a term char, // or if it's not uppercase / config if (isTerm == false || (config.CutAcronymOnNonUpper && isUpper == false)) { // whether it's part of the acronym depends on whether we're greedy if (isTerm && config.GreedyAcronyms == false) { i -= 1; // handle that char again, in another state - not part of the acronym } if (i - ipos > 1) // single-char can't be an acronym { CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, true); ipos = i; state = isTerm ? StateWord : StateBreak; if (state != StateBreak && separator != char.MinValue) { output[opos++] = separator; } } else if (isTerm) { state = StateWord; } } else if (isUpper == false) // isTerm == true { // it's a term char and we don't cut... // keep moving forward as a word state = StateWord; } break; // within a term / uppercase = could be a word or an acronym case StateUp: if (isTerm) { // add that char to the term and pick word or acronym state = isUpper ? StateAcronym : StateWord; } else { // single char, copy then break CopyTerm(input, ipos, output, ref opos, 1, caseType, culture, false); state = StateBreak; } break; default: throw new Exception("Invalid state."); } } switch (state) { case StateBreak: break; case StateWord: CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, false); break; case StateAcronym: case StateUp: CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, true); break; default: throw new Exception("Invalid state."); } return(new string(output, 0, opos)); }
public string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture) { return text; }
// legacy does not implement these public string CleanString(string text, CleanStringType stringType) { return(text); }
public void CleanStringToAsciiWithType(string input, string expected, CleanStringType caseType) { var output = _helper.CleanString(input, caseType | CleanStringType.Ascii); Assert.AreEqual(expected, output); }
public string CleanString(string text, CleanStringType stringType, CultureInfo culture) { return(text); }
public string CleanString(string text, CleanStringType stringType, char separator, string culture) { return("CLEAN-STRING-D::" + text); }
/// <summary> /// Cleans a string, using a specified separator. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="separator">The separator.</param> /// <returns>The clean string.</returns> /// <remarks>The string is cleaned in the context of the IShortStringHelper default culture.</remarks> public static string ToCleanString(this string text, CleanStringType stringType, char separator) { return(ShortStringHelper.CleanString(text, stringType, separator)); }
private HelperConfig GetConfig(CleanStringType stringType, CultureInfo culture) { Dictionary<CleanStringType, HelperConfig> config; if (_configs.ContainsKey(culture)) { config = _configs[culture]; if (config.ContainsKey(stringType)) // have we got a config for _that_ role? return config[stringType]; if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles? return config[CleanStringType.RoleMask]; } else if (_configs.ContainsKey(_defaultCulture)) { config = _configs[_defaultCulture]; if (config.ContainsKey(stringType)) // have we got a config for _that_ role? return config[stringType]; if (config.ContainsKey(CleanStringType.RoleMask)) // have we got a generic config for _all_ roles? return config[CleanStringType.RoleMask]; } return HelperConfig.Empty; }
public void CleanStringToAsciiWithCaseAndSeparator(string input, string expected, char separator, CleanStringType caseType) { var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator); // legacy does nothing Assert.AreEqual(input, output); }
/// <summary> /// Cleans a string in the context of a specified culture, using a specified separator. /// </summary> /// <param name="text">The text to clean.</param> /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, /// strings are cleaned up to camelCase and Ascii.</param> /// <param name="separator">The separator.</param> /// <param name="culture">The culture.</param> /// <returns>The clean string.</returns> public static string ToCleanString(this string text, CleanStringType stringType, char separator, CultureInfo culture) { return(ShortStringHelper.CleanString(text, stringType, separator, culture)); }
// legacy does not implement these public string CleanString(string text, CleanStringType stringType) { return text; }
public void CleanStringWithTypeAndCulture(string input, string expected, string culture, CleanStringType stringType) { // picks the proper config per culture // and overrides some stringType params (ascii...) var output = ShortStringHelper.CleanString(input, stringType, culture); Assert.AreEqual(expected, output); }
public string CleanString(string text, CleanStringType stringType, CultureInfo culture) { return text; }
public string CleanString(string text, CleanStringType stringType) { return("CLEAN-STRING-A::" + text); }
/// <summary> /// Filters a string to convert case, and more. /// </summary> /// <param name="phrase">the text to filter.</param> /// <param name="cases">The string case type.</param> /// <returns>The filtered text.</returns> /// <remarks> /// <para>This is the legacy method, so we can't really change it, although it has issues (see unit tests).</para> /// <para>It does more than "converting the case", and also remove spaces, etc.</para> /// </remarks> public string LegacyConvertStringCase(string phrase, CleanStringType cases) { // ported from StringExtensions.ConvertCase cases &= CleanStringType.CaseMask; var splittedPhrase = Regex.Split(phrase, @"[^a-zA-Z0-9\']", RegexOptions.Compiled); if (cases == CleanStringType.Unchanged) return string.Join("", splittedPhrase); //var splittedPhrase = phrase.Split(' ', '-', '.'); var sb = new StringBuilder(); foreach (var splittedPhraseChars in splittedPhrase.Select(s => s.ToCharArray())) { if (splittedPhraseChars.Length > 0) { splittedPhraseChars[0] = ((new String(splittedPhraseChars[0], 1)).ToUpperInvariant().ToCharArray())[0]; } sb.Append(new String(splittedPhraseChars)); } var result = sb.ToString(); if (cases == CleanStringType.CamelCase) { if (result.Length > 1) { var pattern = new Regex("^([A-Z]*)([A-Z].*)$", RegexOptions.Singleline | RegexOptions.Compiled); var match = pattern.Match(result); if (match.Success) { result = match.Groups[1].Value.ToLowerInvariant() + match.Groups[2].Value; return result.Substring(0, 1).ToLowerInvariant() + result.Substring(1); } return result; } return result.ToLowerInvariant(); } return result; }
public string CleanString(string text, CleanStringType stringType, string culture) { return("CLEAN-STRING-C::" + text); }