public static void ApplyControlBackspace(TextBox textBox) { if (textBox.SelectionLength == 0) { var text = textBox.Text; var deleteUpTo = textBox.SelectionStart; if (deleteUpTo > 0 && deleteUpTo <= text.Length) { text = text.Substring(0, deleteUpTo); var textElementIndices = StringInfo.ParseCombiningCharacters(text); var index = textElementIndices.Length; var textIndex = deleteUpTo; var deleteFrom = -1; while (index > 0) { index--; textIndex = textElementIndices[index]; if (!IsSpaceCategory(CharUnicodeInfo.GetUnicodeCategory(text, textIndex))) { break; } } if (index > 0) // HTML tag? { if (text[textIndex] == '>') { var openingBracketIndex = text.LastIndexOf('<', textIndex - 1); if (openingBracketIndex >= 0 && text.IndexOf('>', openingBracketIndex + 1) == textIndex) { deleteFrom = openingBracketIndex; // delete whole tag } } else if (text[textIndex] == '}') { var startIdx = text.LastIndexOf(@"{\", textIndex - 1, StringComparison.Ordinal); if (startIdx >= 0 && text.IndexOf('}', startIdx + 1) == textIndex) { deleteFrom = startIdx; } } } if (deleteFrom < 0) { if (BreakChars.Contains(text[textIndex])) { deleteFrom = -2; } while (index > 0) { index--; textIndex = textElementIndices[index]; if (IsSpaceCategory(CharUnicodeInfo.GetUnicodeCategory(text, textIndex))) { if (deleteFrom > -2) { if (deleteFrom < 0) { deleteFrom = textElementIndices[index + 1]; } break; } deleteFrom = textElementIndices[index + 1]; if (!":!?".Contains(text[deleteFrom])) { break; } } else if (BreakChars.Contains(text[textIndex])) { if (deleteFrom > -2) { if (deleteFrom < 0) { deleteFrom = textElementIndices[index + 1]; } break; } } else { deleteFrom = -1; } } } if (deleteFrom < deleteUpTo) { if (deleteFrom < 0) { deleteFrom = 0; } textBox.Select(deleteFrom, deleteUpTo - deleteFrom); textBox.Paste(string.Empty); } } } }
public static double GetNumericValue(char c) { return(CharUnicodeInfo.GetNumericValue(c)); }
/// <summary> /// Maps <paramref name="identityNumber" /> to a /// <see cref="StatementBruteForce.Core.SouthAfricanIdentityNumberModel" /> object. /// </summary> /// <param name="identityNumber">13 digit South African identity number, defined as YYMMDDSSSSCAZ.</param> /// <returns> /// A <see cref="StatementBruteForce.Core.SouthAfricanIdentityNumberModel" /> object. /// </returns> public static SouthAfricanIdentityNumberModel ParseIdentityNumberStringToModel(string identityNumber) { var chars = identityNumber.ToCharArray(); #region local functions int yy() { if (char.IsDigit(c: chars[0]) && char.IsDigit(c: chars[1])) { return(1900 + 10 * CharUnicodeInfo.GetDigitValue(ch: chars[0]) + CharUnicodeInfo.GetDigitValue(ch: chars[1])); } return(-1); } int mm() { if (char.IsDigit(c: chars[2]) && char.IsDigit(c: chars[3])) { return(10 * CharUnicodeInfo.GetDigitValue(ch: chars[2]) + CharUnicodeInfo.GetDigitValue(ch: chars[3])); } return(-1); } int dd() { if (char.IsDigit(c: chars[4]) && char.IsDigit(c: chars[5])) { return(10 * CharUnicodeInfo.GetDigitValue(ch: chars[4]) + CharUnicodeInfo.GetDigitValue(ch: chars[5])); } return(-1); } #endregion var model = new SouthAfricanIdentityNumberModel(yearOfBirth: yy(), monthOfBirth: mm(), dayOfBirth: dd(), gender: char.IsDigit(c: chars[6]) ? CharUnicodeInfo.GetDigitValue(ch: chars[6]) : -1, genderSequence1: char.IsDigit(c: chars[7]) ? CharUnicodeInfo.GetDigitValue(ch: chars[7]) : -1, genderSequence2: char.IsDigit(c: chars[8]) ? CharUnicodeInfo.GetDigitValue(ch: chars[8]) : -1, genderSequence3: char.IsDigit(c: chars[9]) ? CharUnicodeInfo.GetDigitValue(ch: chars[9]) : -1, citizenship: char.IsDigit(c: chars[10]) ? CharUnicodeInfo.GetDigitValue(ch: chars[10]) : -1, obsolete: char.IsDigit(c: chars[11]) ? CharUnicodeInfo.GetDigitValue(ch: chars[11]) : -1, checksum: char.IsDigit(c: chars[12]) ? CharUnicodeInfo.GetDigitValue(ch: chars[12]) : -1); return(model); }
private void WriteObject(string name, ProtectedString value, bool bIsEntryString) { Debug.Assert(name != null); Debug.Assert(value != null); if (value == null) { throw new ArgumentNullException("value"); } m_xmlWriter.WriteStartElement(ElemString); m_xmlWriter.WriteStartElement(ElemKey); m_xmlWriter.WriteString(StrUtil.SafeXmlString(name)); m_xmlWriter.WriteEndElement(); m_xmlWriter.WriteStartElement(ElemValue); bool bProtected = value.IsProtected; if (bIsEntryString) { // Adjust memory protection setting (which might be different // from the database default, e.g. due to an import which // didn't specify the correct setting) if (name == PwDefs.TitleField) { bProtected = m_pwDatabase.MemoryProtection.ProtectTitle; } else if (name == PwDefs.UserNameField) { bProtected = m_pwDatabase.MemoryProtection.ProtectUserName; } else if (name == PwDefs.PasswordField) { bProtected = m_pwDatabase.MemoryProtection.ProtectPassword; } else if (name == PwDefs.UrlField) { bProtected = m_pwDatabase.MemoryProtection.ProtectUrl; } else if (name == PwDefs.NotesField) { bProtected = m_pwDatabase.MemoryProtection.ProtectNotes; } } if (bProtected && (m_format != KdbxFormat.PlainXml)) { m_xmlWriter.WriteAttributeString(AttrProtected, ValTrue); byte[] pbEncoded = value.ReadXorredString(m_randomStream); if (pbEncoded.Length > 0) { m_xmlWriter.WriteBase64(pbEncoded, 0, pbEncoded.Length); } } else { string strValue = value.ReadString(); // If names should be localized, we need to apply the language-dependent // string transformation here. By default, language-dependent conversions // should be applied, otherwise characters could be rendered incorrectly // (code page problems). if (m_bLocalizedNames) { StringBuilder sb = new StringBuilder(); foreach (char ch in strValue) { char chMapped = ch; // Symbols and surrogates must be moved into the correct code // page area if (char.IsSymbol(ch) || char.IsSurrogate(ch)) { System.Globalization.UnicodeCategory cat = CharUnicodeInfo.GetUnicodeCategory(ch); // Map character to correct position in code page chMapped = (char)((int)cat * 32 + ch); } else if (char.IsControl(ch)) { if (ch >= 256) // Control character in high ANSI code page { // Some of the control characters map to corresponding ones // in the low ANSI range (up to 255) when calling // ToLower on them with invariant culture (see // http://lists.ximian.com/pipermail/mono-patches/2002-February/086106.html ) #if !KeePassLibSD chMapped = char.ToLowerInvariant(ch); #else chMapped = char.ToLower(ch); #endif } } sb.Append(chMapped); } strValue = sb.ToString(); // Correct string for current code page } if ((m_format == KdbxFormat.PlainXml) && bProtected) { m_xmlWriter.WriteAttributeString(AttrProtectedInMemPlainXml, ValTrue); } m_xmlWriter.WriteString(StrUtil.SafeXmlString(strValue)); } m_xmlWriter.WriteEndElement(); // ElemValue m_xmlWriter.WriteEndElement(); // ElemString }
/// <summary> /// Returns true if the Unicode character is a formatting character (Unicode class Cf). /// </summary> /// <param name="ch">The Unicode character.</param> internal static bool IsFormattingChar(char ch) { // There are no FormattingChars in ASCII range return(ch > 127 && IsFormattingChar(CharUnicodeInfo.GetUnicodeCategory(ch))); }
/// <summary> /// Creates a URL And SEO friendly slug /// </summary> /// <param name="text">Text to slugify</param> /// <param name="maxLength">Max length of slug</param> /// <returns>URL and SEO friendly string</returns> public static string UrlFriendly(string text, int maxLength = 0) { // Return empty value if text is null if (text == null) { return(""); } var normalizedString = text // Make lowercase .ToLowerInvariant() // Normalize the text .Normalize(NormalizationForm.FormD); var stringBuilder = new StringBuilder(); var stringLength = normalizedString.Length; var prevdash = false; var trueLength = 0; char c; for (int i = 0; i < stringLength; i++) { c = normalizedString[i]; switch (CharUnicodeInfo.GetUnicodeCategory(c)) { // Check if the character is a letter or a digit if the character is a // international character remap it to an ascii valid character case UnicodeCategory.LowercaseLetter: case UnicodeCategory.UppercaseLetter: case UnicodeCategory.DecimalDigitNumber: if (c < 128) { stringBuilder.Append(c); } else { stringBuilder.Append(ConstHelper.RemapInternationalCharToAscii(c)); } prevdash = false; trueLength = stringBuilder.Length; break; // Check if the character is to be replaced by a hyphen but only if the last character wasn't case UnicodeCategory.SpaceSeparator: case UnicodeCategory.ConnectorPunctuation: case UnicodeCategory.DashPunctuation: case UnicodeCategory.OtherPunctuation: case UnicodeCategory.MathSymbol: if (!prevdash) { stringBuilder.Append('-'); prevdash = true; trueLength = stringBuilder.Length; } break; } // If we are at max length, stop parsing if (maxLength > 0 && trueLength >= maxLength) { break; } } // Trim excess hyphens var result = stringBuilder.ToString().Trim('-'); // Remove any excess character to meet maxlength criteria return(maxLength <= 0 || result.Length <= maxLength ? result : result.Substring(0, maxLength)); }
// private methods private string EscapedString(string value) { if (value.All(c => !NeedsEscaping(c))) { return(value); } var sb = new StringBuilder(value.Length); foreach (char c in value) { switch (c) { case '"': sb.Append("\\\""); break; case '\\': sb.Append("\\\\"); break; case '\b': sb.Append("\\b"); break; case '\f': sb.Append("\\f"); break; case '\n': sb.Append("\\n"); break; case '\r': sb.Append("\\r"); break; case '\t': sb.Append("\\t"); break; default: switch (CharUnicodeInfo.GetUnicodeCategory(c)) { case UnicodeCategory.UppercaseLetter: case UnicodeCategory.LowercaseLetter: case UnicodeCategory.TitlecaseLetter: case UnicodeCategory.OtherLetter: case UnicodeCategory.DecimalDigitNumber: case UnicodeCategory.LetterNumber: case UnicodeCategory.OtherNumber: case UnicodeCategory.SpaceSeparator: case UnicodeCategory.ConnectorPunctuation: case UnicodeCategory.DashPunctuation: case UnicodeCategory.OpenPunctuation: case UnicodeCategory.ClosePunctuation: case UnicodeCategory.InitialQuotePunctuation: case UnicodeCategory.FinalQuotePunctuation: case UnicodeCategory.OtherPunctuation: case UnicodeCategory.MathSymbol: case UnicodeCategory.CurrencySymbol: case UnicodeCategory.ModifierSymbol: case UnicodeCategory.OtherSymbol: sb.Append(c); break; default: sb.AppendFormat("\\u{0:x4}", (int)c); break; } break; } } return(sb.ToString()); }
/// <summary> /// Returns the <see cref="UnicodeCharacterClass"/> of the character at the specified offset in the given string. /// </summary> /// <param name="value">A <see cref="string"/>.</param> /// <param name="index">The character position in <paramref name="value"/>.</param> /// <returns>A <see cref="UnicodeCharacterClass"/> enumerated constant that identifies the character class of the character at position <paramref name="index"/> in <paramref name="value"/>.</returns> public static UnicodeCharacterClass GetCharacterClass(string value, int index) => GetCharacterClassFromCategory(CharUnicodeInfo.GetUnicodeCategory(value, index));
private string NamePrep(string part, int start) { part = part.Normalize(NormalizationForm.FormKC); for (int i = 0; i < part.Length; i++) { switch (CharUnicodeInfo.GetUnicodeCategory(part, i)) { case UnicodeCategory.SpaceSeparator: if (part[i] < '\x80') { continue; // valid } break; case UnicodeCategory.Control: if (part[i] != '\x0' && part[i] < '\x80') { continue; // valid } break; case UnicodeCategory.PrivateUse: case UnicodeCategory.Surrogate: break; default: char c = part[i]; if ('\uFDDF' <= c && c <= '\uFDEF' || ((int)c & 0xFFFF) == 0xFFFE || '\uFFF9' <= c && c <= '\uFFFD' || '\u2FF0' <= c && c <= '\u2FFB' || '\u202A' <= c && c <= '\u202E' || '\u206A' <= c && c <= '\u206F') { break; } switch (c) { case '\u0340': case '\u0341': case '\u200E': case '\u200F': case '\u2028': case '\u2029': break; default: continue; } break; } throw new ArgumentException(string.Format("Not allowed character was in the input string, at {0}", start + i)); } if (!this.allowUnassigned) { for (int i = 0; i < part.Length; i++) { if (CharUnicodeInfo.GetUnicodeCategory(part, i) == UnicodeCategory.OtherNotAssigned) { throw new ArgumentException(string.Format("Use of unassigned Unicode character is prohibited at {0}", start + i)); } } } return(part); }
private static bool IsInvalidPunctuationSymbol(char character) { return(character == '%' || CharUnicodeInfo.GetUnicodeCategory(character) == UnicodeCategory.CurrencySymbol); }
public static string RemoveDiacritics(this string text) { return(string.Concat(text.Normalize(NormalizationForm.FormD) .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark)) .Normalize(NormalizationForm.FormC)); }
public static bool HasAccents(this string source) { return (source.Normalize(NormalizationForm.FormD) .Any(x => CharUnicodeInfo.GetUnicodeCategory(x) == UnicodeCategory.NonSpacingMark)); }
public static string FormatLiteral(string value, ObjectDisplayOptions options) { if (value == null) { throw new ArgumentNullException(nameof(value)); } const char quote = '"'; var pooledBuilder = PooledStringBuilder.GetInstance(); var builder = pooledBuilder.Builder; var useQuotes = options.IncludesOption(ObjectDisplayOptions.UseQuotes); var escapeNonPrintable = options.IncludesOption(ObjectDisplayOptions.EscapeNonPrintableCharacters); var isVerbatim = useQuotes && !escapeNonPrintable && ContainsNewLine(value); if (useQuotes) { if (isVerbatim) { builder.Append('@'); } builder.Append(quote); } for (int i = 0; i < value.Length; i++) { char c = value[i]; if (escapeNonPrintable && CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Surrogate) { var category = CharUnicodeInfo.GetUnicodeCategory(value, i); if (category == UnicodeCategory.Surrogate) { // an unpaired surrogate builder.Append("\\u" + ((int)c).ToString("x4")); } else if (NeedsEscaping(category)) { // a surrogate pair that needs to be escaped var unicode = char.ConvertToUtf32(value, i); builder.Append("\\U" + unicode.ToString("x8")); i++; // skip the already-encoded second surrogate of the pair } else { // copy a printable surrogate pair directly builder.Append(c); builder.Append(value[++i]); } } else if (escapeNonPrintable && TryReplaceChar(c, out var replaceWith)) { builder.Append(replaceWith); } else if (useQuotes && c == quote) { if (isVerbatim) { builder.Append(quote); builder.Append(quote); } else { builder.Append('\\'); builder.Append(quote); } } else { builder.Append(c); } } if (useQuotes) { builder.Append(quote); } return(pooledBuilder.ToStringAndFree()); }
/// <summary> /// Computes the type of the given character /// </summary> /// <param name="ch"> Character whose type is to be determined </param> /// <returns> Type of the character </returns> public static byte GetType(int ch) { switch (CharUnicodeInfo.GetUnicodeCategory((char)ch)) { case UnicodeCategory.UppercaseLetter: return(WordDelimiterFilter.UPPER); case UnicodeCategory.LowercaseLetter: return(WordDelimiterFilter.LOWER); case UnicodeCategory.TitlecaseLetter: case UnicodeCategory.ModifierLetter: case UnicodeCategory.OtherLetter: case UnicodeCategory.NonSpacingMark: case UnicodeCategory.EnclosingMark: // depends what it encloses? case UnicodeCategory.SpacingCombiningMark: return(WordDelimiterFilter.ALPHA); case UnicodeCategory.DecimalDigitNumber: case UnicodeCategory.LetterNumber: case UnicodeCategory.OtherNumber: return(WordDelimiterFilter.DIGIT); // case Character.SPACE_SEPARATOR: // case Character.LINE_SEPARATOR: // case Character.PARAGRAPH_SEPARATOR: // case Character.CONTROL: // case Character.FORMAT: // case Character.PRIVATE_USE: case UnicodeCategory.Surrogate: return(WordDelimiterFilter.ALPHA | WordDelimiterFilter.DIGIT); // case Character.DASH_PUNCTUATION: // case Character.START_PUNCTUATION: // case Character.END_PUNCTUATION: // case Character.CONNECTOR_PUNCTUATION: // case Character.OTHER_PUNCTUATION: // case Character.MATH_SYMBOL: // case Character.CURRENCY_SYMBOL: // case Character.MODIFIER_SYMBOL: // case Character.OTHER_SYMBOL: // case Character.INITIAL_QUOTE_PUNCTUATION: // case Character.FINAL_QUOTE_PUNCTUATION: default: return(WordDelimiterFilter.SUBWORD_DELIM); } //switch (char.getType(ch)) //{ // case char.UPPERCASE_LETTER: // return WordDelimiterFilter.UPPER; // case char.LOWERCASE_LETTER: // return WordDelimiterFilter.LOWER; // case char.TITLECASE_LETTER: // case char.MODIFIER_LETTER: // case char.OTHER_LETTER: // case char.NON_SPACING_MARK: // case char.ENCLOSING_MARK: // depends what it encloses? // case char.COMBINING_SPACING_MARK: // return WordDelimiterFilter.ALPHA; // case char.DECIMAL_DIGIT_NUMBER: // case char.LETTER_NUMBER: // case char.OTHER_NUMBER: // return WordDelimiterFilter.DIGIT; // // case Character.SPACE_SEPARATOR: // // case Character.LINE_SEPARATOR: // // case Character.PARAGRAPH_SEPARATOR: // // case Character.CONTROL: // // case Character.FORMAT: // // case Character.PRIVATE_USE: // case char.SURROGATE: // prevent splitting // return WordDelimiterFilter.ALPHA | WordDelimiterFilter.DIGIT; // // case Character.DASH_PUNCTUATION: // // case Character.START_PUNCTUATION: // // case Character.END_PUNCTUATION: // // case Character.CONNECTOR_PUNCTUATION: // // case Character.OTHER_PUNCTUATION: // // case Character.MATH_SYMBOL: // // case Character.CURRENCY_SYMBOL: // // case Character.MODIFIER_SYMBOL: // // case Character.OTHER_SYMBOL: // // case Character.INITIAL_QUOTE_PUNCTUATION: // // case Character.FINAL_QUOTE_PUNCTUATION: // default: // return WordDelimiterFilter.SUBWORD_DELIM; //} }
private static UniCatFlags GetCatFlags(char ch) { return((UniCatFlags)(1u << (int)CharUnicodeInfo.GetUnicodeCategory(ch))); }
private static UnicodeCategory GetUnicodeCategoryNonAscii(Rune value) { Debug.Assert(!value.IsAscii, "Shouldn't use this non-optimized code path for ASCII characters."); return(CharUnicodeInfo.GetUnicodeCategory(value.Value)); }
public static bool IsFormat(char ch) { return(ch >= _rgchi.Length && CharUnicodeInfo.GetUnicodeCategory(ch) == UnicodeCategory.Format); }
/// <summary> /// Is the character a valid first identifier character? /// </summary> private static bool IsValidIdFirstChar(char c) { return (char.IsLetter(c) || CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.ConnectorPunctuation); }
/// <summary> /// An extension method to generate slugs from any language /// </summary> /// <param name="textToSlug">Text to convert to slug</param> /// <param name="maxLength">Maximum number of characters of the generated slug</param> /// <returns>human-readable and SEO friendly string</returns> public static string Slugify(this string textToSlug, int maxLength = 1000) { string stFormKD = textToSlug.ToLower().Normalize(NormalizationForm.FormKD); var sb = new StringBuilder(); foreach (char t in stFormKD) { // Allowed symbols if (t == '-' || t == '_' || t == '~') { sb.Append(t); continue; } UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(t); switch (uc) { case UnicodeCategory.LowercaseLetter: case UnicodeCategory.OtherLetter: case UnicodeCategory.DecimalDigitNumber: // Keep letters and digits sb.Append(t); break; case UnicodeCategory.NonSpacingMark: // Remove diacritics break; default: // Replace all other chars with dash sb.Append('-'); break; } } var slug = sb.ToString().Normalize(NormalizationForm.FormC); // Simplifies dash groups for (int i = 0; i < slug.Length - 1; i++) { if (slug[i] == '-') { int j = 0; while (i + j + 1 < slug.Length && slug[i + j + 1] == '-') { j++; } if (j > 0) { slug = slug.Remove(i + 1, j); } } } // Limit resultant string length to maxLength if (slug.Length > maxLength) { slug = slug.Substring(0, maxLength); } // Remove any leading and trailing hyphen, underscore or dot slug = slug.Trim('-', '_', '.'); return(slug); }
static bool IsIdentifierStartCharacter(char ch) => ch == '_' || IsLetterChar(CharUnicodeInfo.GetUnicodeCategory(ch));
public static void RegexUnicodeChar() { // Regex engine is Unicode aware now for the \w and \d character classes // \s is not - i.e. it still only recognizes the ASCII space separators, not Unicode ones // The new character classes for this: // [\p{L1}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Pc}] List <char> validChars = new List <char>(); List <char> invalidChars = new List <char>(); for (int i = 0; i < MaxUnicodeRange; i++) { char c = (char)i; switch (CharUnicodeInfo.GetUnicodeCategory(c)) { case UnicodeCategory.UppercaseLetter: //Lu case UnicodeCategory.LowercaseLetter: //Li case UnicodeCategory.TitlecaseLetter: // Lt case UnicodeCategory.ModifierLetter: // Lm case UnicodeCategory.OtherLetter: // Lo case UnicodeCategory.DecimalDigitNumber: // Nd // case UnicodeCategory.LetterNumber: // ?? // case UnicodeCategory.OtherNumber: // ?? case UnicodeCategory.NonSpacingMark: // case UnicodeCategory.SpacingCombiningMark: // Mc case UnicodeCategory.ConnectorPunctuation: // Pc validChars.Add(c); break; default: invalidChars.Add(c); break; } } // \w - we will create strings from valid characters that form \w and make sure that the regex engine catches this. // Build a random string with valid characters followed by invalid characters Random random = new Random(-55); Regex regex = new Regex(@"\w*"); int validCharLength = 10; int charCount = validChars.Count; int invalidCharCount = invalidChars.Count; int invalidCharLength = 15; for (int i = 0; i < 100; i++) { StringBuilder builder1 = new StringBuilder(); StringBuilder builder2 = new StringBuilder(); for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(charCount)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidCharCount)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.True(match.Success); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(0, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); do { // This is tedious. But we report empty Matches for each of the non-matching characters!!! // duh!!! because we say so on the pattern - remember what * stands for :-) Assert.Equal(string.Empty, match.Value); Assert.Equal(0, match.Length); match = match.NextMatch(); } while (match.Success); } // Build a random string with invalid characters followed by valid characters and then again invalid random = new Random(-55); regex = new Regex(@"\w+"); validCharLength = 10; charCount = validChars.Count; invalidCharCount = invalidChars.Count; invalidCharLength = 15; for (int i = 0; i < 500; i++) { StringBuilder builder1 = new StringBuilder(); StringBuilder builder2 = new StringBuilder(); for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidCharCount)]); } for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(charCount)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidCharCount)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.True(match.Success); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(invalidCharLength, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); Assert.False(match.Success); } validChars = new List <char>(); invalidChars = new List <char>(); for (int i = 0; i < MaxUnicodeRange; i++) { char c = (char)i; switch (CharUnicodeInfo.GetUnicodeCategory(c)) { case UnicodeCategory.DecimalDigitNumber: // Nd validChars.Add(c); break; default: invalidChars.Add(c); break; } } // \d - we will create strings from valid characters that form \d and make sure that the regex engine catches this. // Build a random string with valid characters and then again invalid regex = new Regex(@"\d+"); validCharLength = 10; invalidCharLength = 15; charCount = validChars.Count; invalidCharCount = invalidChars.Count; for (int i = 0; i < 100; i++) { StringBuilder builder1 = new StringBuilder(); StringBuilder builder2 = new StringBuilder(); for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(charCount)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidCharCount)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(0, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); Assert.False(match.Success); } // Build a random string with invalid characters, valid and then again invalid regex = new Regex(@"\d+"); validCharLength = 10; invalidCharLength = 15; charCount = validChars.Count; invalidCharCount = invalidChars.Count; for (int i = 0; i < 100; i++) { StringBuilder builder1 = new StringBuilder(); StringBuilder builder2 = new StringBuilder(); for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidCharCount)]); } for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(charCount)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidCharCount)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.True(match.Success); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(invalidCharLength, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); Assert.False(match.Success); } }
private static bool IsValidFirstIdentifierChar(char c) { UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(c); return(s_firstIdentifierCharCategories.Contains(category)); }
public string Slugify(string text) { if (string.IsNullOrEmpty(text)) { return(text); } var sb = new StringBuilder(); var stFormKD = text.Trim().ToLower().Normalize(NormalizationForm.FormKD); foreach (var t in stFormKD) { // Allowed symbols if (t == '-' || t == '_' || t == '~') { sb.Append(t); continue; } var uc = CharUnicodeInfo.GetUnicodeCategory(t); switch (uc) { case UnicodeCategory.LowercaseLetter: case UnicodeCategory.OtherLetter: case UnicodeCategory.DecimalDigitNumber: // Keep letters and digits sb.Append(t); break; case UnicodeCategory.NonSpacingMark: // Remove diacritics break; default: // Replace all other chars with dash sb.Append('-'); break; } } var slug = sb.ToString().Normalize(NormalizationForm.FormC); // Simplifies dash groups for (var i = 0; i < slug.Length - 1; i++) { if (slug[i] == '-') { var j = 0; while (i + j + 1 < slug.Length && slug[i + j + 1] == '-') { j++; } if (j > 0) { slug = slug.Remove(i + 1, j); } } } if (slug.Length > 1000) { slug = slug.Substring(0, 1000); } slug = slug.Trim('-', '_', '.'); return(slug); }
private static bool IsIdentifierStart(char character) { return(char.IsLetter(character) || character == '_' || CharUnicodeInfo.GetUnicodeCategory(character) == UnicodeCategory.LetterNumber); }
/// <summary> /// Creates http url friendly text from input /// </summary> /// <param name="input"></param> /// <param name="maxLength"></param> /// <returns></returns> public static string ToSlug(this string input, int maxLength = 0) { if (String.IsNullOrWhiteSpace(input)) { return(String.Empty); } var normalizedString = input .ToLowerInvariant() .Normalize(NormalizationForm.FormD); var stringBuilder = new StringBuilder(); var stringLength = normalizedString.Length; var prevdash = false; var trueLength = 0; char c; for (int i = 0; i < stringLength; i++) { c = normalizedString[i]; switch (CharUnicodeInfo.GetUnicodeCategory(c)) { case UnicodeCategory.LowercaseLetter: case UnicodeCategory.UppercaseLetter: case UnicodeCategory.DecimalDigitNumber: if (c < 128) { stringBuilder.Append(c); } else { stringBuilder.Append(FindReplacement(c)); } prevdash = false; trueLength = stringBuilder.Length; break; // Check if the character is to be replaced by a hyphen but only if the last character wasn't case UnicodeCategory.SpaceSeparator: case UnicodeCategory.ConnectorPunctuation: case UnicodeCategory.DashPunctuation: case UnicodeCategory.OtherPunctuation: case UnicodeCategory.MathSymbol: if (!prevdash) { stringBuilder.Append('-'); prevdash = true; trueLength = stringBuilder.Length; } break; } // If we are at max length, stop parsing if (maxLength > 0 && trueLength >= maxLength) { break; } } var result = stringBuilder.ToString().Trim('-'); // Remove any excess character to meet maxlength criteria return(maxLength <= 0 || result.Length <= maxLength ? result : result.Substring(0, maxLength)); }
private static bool IsWordChar(char ch) { UnicodeCategory unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(ch); return((unicodeCategory == UnicodeCategory.LowercaseLetter) || ((unicodeCategory == UnicodeCategory.UppercaseLetter) || ((unicodeCategory == UnicodeCategory.TitlecaseLetter) || ((unicodeCategory == UnicodeCategory.OtherLetter) || ((unicodeCategory == UnicodeCategory.ModifierLetter) || ((unicodeCategory == UnicodeCategory.DecimalDigitNumber) || (unicodeCategory == UnicodeCategory.ConnectorPunctuation))))))); }