protected virtual string CleanString(string text, CleanStringType stringType, CultureInfo culture, char?separator) { // be safe if (text == null) { throw new ArgumentNullException("text"); } if (culture == null) { throw new ArgumentNullException("culture"); } // get config var config = GetConfig(stringType, culture); stringType = config.StringTypeExtend(stringType); // apply defaults if ((stringType & CleanStringType.CaseMask) == CleanStringType.None) { stringType |= CleanStringType.CamelCase; } if ((stringType & CleanStringType.CodeMask) == CleanStringType.None) { stringType |= CleanStringType.Ascii; } // use configured unless specified separator = separator ?? config.Separator; // apply pre-filter if (config.PreFilter != null) { text = config.PreFilter(text); } // apply replacements //if (config.Replacements != null) // text = ReplaceMany(text, config.Replacements); // recode var codeType = stringType & CleanStringType.CodeMask; text = codeType == CleanStringType.Ascii ? Utf8ToAsciiConverter.ToAsciiString(text) : RemoveSurrogatePairs(text); // clean text = CleanCodeString(text, stringType, separator.Value, culture, config); // apply post-filter if (config.PostFilter != null) { text = config.PostFilter(text); } return(text); }
/// <summary> /// Returns a new string containing only characters within the specified code type. /// </summary> /// <param name="text">The string to filter.</param> /// <param name="stringType">The string type.</param> /// <returns>The filtered string.</returns> /// <remarks>If <paramref name="stringType"/> is not <c>Unicode</c> then non-utf8 characters are /// removed. If it is <c>Ascii</c> we try to do some intelligent replacement of accents, etc.</remarks> public virtual string Recode(string text, CleanStringType stringType) { // be safe if (text == null) { throw new ArgumentNullException("text"); } var codeType = stringType & CleanStringType.CodeMask; // unicode to utf8 or ascii: just remove the unicode chars // utf8 to ascii: try to be clever and replace some chars // what's the point? if (codeType == CleanStringType.Unicode) { return(text); } return(codeType == CleanStringType.Utf8 ? RemoveNonUtf8(text) : Utf8ToAsciiConverter.ToAsciiString(text)); }
protected virtual string CleanString(string text, CleanStringType stringType, string culture, char?separator) { // be safe if (text == null) { throw new ArgumentNullException(nameof(text)); } culture = culture ?? ""; // get config var config = _config.For(stringType, culture); stringType = config.StringTypeExtend(stringType); // apply defaults if ((stringType & CleanStringType.CaseMask) == CleanStringType.None) { stringType |= CleanStringType.CamelCase; } if ((stringType & CleanStringType.CodeMask) == CleanStringType.None) { stringType |= CleanStringType.Ascii; } // use configured unless specified separator = separator ?? config.Separator; // apply pre-filter if (config.PreFilter != null) { text = config.PreFilter(text); } // apply replacements //if (config.Replacements != null) // text = ReplaceMany(text, config.Replacements); // recode var codeType = stringType & CleanStringType.CodeMask; switch (codeType) { case CleanStringType.Ascii: text = Utf8ToAsciiConverter.ToAsciiString(text); break; case CleanStringType.TryAscii: const char ESC = (char)27; var ctext = Utf8ToAsciiConverter.ToAsciiString(text, ESC); if (ctext.Contains(ESC) == false) { text = ctext; } break; default: text = RemoveSurrogatePairs(text); break; } // clean text = CleanCodeString(text, stringType, separator.Value, culture, config); // apply post-filter if (config.PostFilter != null) { text = config.PostFilter(text); } return(text); }