private static bool IsPrintableCharacter(char c) { // See https://stackoverflow.com/questions/3253247/how-do-i-detect-non-printable-characters-in-net if (c == 0x2028 || c == 0x2029) { return(false); } // To ensure that we don't get new lines and whatnot. if (char.IsWhiteSpace(c)) { return(false); } // See https://docs.microsoft.com/en-us/dotnet/api/system.globalization.unicodecategory?view=netframework-4.8 var nonRenderingCategories = new UnicodeCategory[] { UnicodeCategory.Control, UnicodeCategory.OtherNotAssigned, UnicodeCategory.Surrogate, UnicodeCategory.LineSeparator, UnicodeCategory.ParagraphSeparator }; return(!nonRenderingCategories.Contains(Char.GetUnicodeCategory(c))); }
public static string RemoveNonAlphanumericCharacters(this string s) { var alphanumericCategories = new UnicodeCategory[] { UnicodeCategory.LowercaseLetter, UnicodeCategory.UppercaseLetter, UnicodeCategory.DecimalDigitNumber, }; return(new string(s.Where(c => alphanumericCategories.Contains(CharUnicodeInfo.GetUnicodeCategory(c))).ToArray())); }
public static string clean(string name, HashSet <char> invalid) { if (string.IsNullOrEmpty(name)) { return(name); } if (invalid.Count <= 0) { invalid = nremove; } // https://stackoverflow.com/questions/146134/how-to-remove-illegal-characters-from-path-and-filenames // https://stackoverflow.com/questions/3253247/how-do-i-detect-non-printable-characters-in-net // The set of Unicode character categories containing non-rendering, // unknown, or incomplete characters. // !! Unicode.Format and Unicode.PrivateUse can NOT be included in // !! this set, because they may (private-use) or do (format) // !! contain at least *some* rendering characters. var nonRenderingCategories = new UnicodeCategory[] { UnicodeCategory.Control, UnicodeCategory.OtherNotAssigned, UnicodeCategory.Surrogate }; var sb = new StringBuilder(); foreach (var c in name) { // Char.IsWhiteSpace() includes the ASCII whitespace characters that // are categorized as control characters. Any other character is // printable, unless it falls into the non-rendering categories. var isPrintable = Char.IsWhiteSpace(c) || !nonRenderingCategories.Contains(Char.GetUnicodeCategory(c)); if (isPrintable && invalid.Count > 0) { isPrintable = (invalid.Contains(c) == false); } // ASCII/Unicode characters 1 through 31 if (c < 32) { isPrintable = false; } if (isPrintable) { sb.Append(c); } } return(sb.ToString()); }
static public bool CharIsPrintable(char character) { // The set of Unicode character categories containing non-rendering, // unknown, or incomplete characters. // !! Unicode.Format and Unicode.PrivateUse can NOT be included in this set, var nonRenderingCategories = new UnicodeCategory[] { UnicodeCategory.Control, UnicodeCategory.OtherNotAssigned, UnicodeCategory.Surrogate }; // Char.IsWhiteSpace() includes the ASCII whitespace characters that // are categorized as control characters. Any other character is // printable, unless it falls into the non-rendering categories. bool isPrintable = Char.IsWhiteSpace(character) || !nonRenderingCategories.Contains(Char.GetUnicodeCategory(character)); return(isPrintable); }
/// <summary> /// 剔除乱码 /// </summary> /// <param name="content"></param> /// <returns></returns> public static string TrimMessyChars(this string content) { string result = null; if (!string.IsNullOrWhiteSpace(content)) { content = content.Trim(); var arr = new UnicodeCategory[] { UnicodeCategory.OtherSymbol, UnicodeCategory.Surrogate, UnicodeCategory.Control }; foreach (var c in content) { if (!arr.Contains(CharUnicodeInfo.GetUnicodeCategory(c))) { result += c; } } } return(result); }
public static char[] InvisibleChars() { List <char> e = new List <char>(); UnicodeCategory[] uc = new UnicodeCategory[] { UnicodeCategory.Format, }; char b = '\0'; for (int i = 1; i < 0xFFFF; i++) { b = (char)i; if (uc.Contains(CharUnicodeInfo.GetUnicodeCategory(b))) { e.Add(b); } } return(e.ToArray()); }
/// <summary> /// Will determine if a string contains printable char or not /// (extension) /// </summary> /// <param name="text"></param> /// <returns>true if string contains printable char and false otherwise</returns> public static bool IsPrintable(this string text) { var nonRenderingCategories = new UnicodeCategory[] { UnicodeCategory.Control, UnicodeCategory.OtherNotAssigned, UnicodeCategory.Surrogate }; bool pritable = true; foreach (char c in text) { var isPrintable = char.IsWhiteSpace(c) || !nonRenderingCategories.Contains(char.GetUnicodeCategory(c)); if (!isPrintable) { return(false); } } return(pritable); }