Exemplo n.º 1
0
 public static void ApplyControlBackspace(TextBox textBox)
 {
     if (textBox.SelectionLength == 0)
     {
         var text       = textBox.Text;
         var deleteUpTo = textBox.SelectionStart;
         if (deleteUpTo > 0 && deleteUpTo <= text.Length)
         {
             text = text.Substring(0, deleteUpTo);
             var textElementIndices = StringInfo.ParseCombiningCharacters(text);
             var index      = textElementIndices.Length;
             var textIndex  = deleteUpTo;
             var deleteFrom = -1;
             while (index > 0)
             {
                 index--;
                 textIndex = textElementIndices[index];
                 if (!IsSpaceCategory(CharUnicodeInfo.GetUnicodeCategory(text, textIndex)))
                 {
                     break;
                 }
             }
             if (index > 0) // HTML tag?
             {
                 if (text[textIndex] == '>')
                 {
                     var openingBracketIndex = text.LastIndexOf('<', textIndex - 1);
                     if (openingBracketIndex >= 0 && text.IndexOf('>', openingBracketIndex + 1) == textIndex)
                     {
                         deleteFrom = openingBracketIndex; // delete whole tag
                     }
                 }
                 else if (text[textIndex] == '}')
                 {
                     var startIdx = text.LastIndexOf(@"{\", textIndex - 1, StringComparison.Ordinal);
                     if (startIdx >= 0 && text.IndexOf('}', startIdx + 1) == textIndex)
                     {
                         deleteFrom = startIdx;
                     }
                 }
             }
             if (deleteFrom < 0)
             {
                 if (BreakChars.Contains(text[textIndex]))
                 {
                     deleteFrom = -2;
                 }
                 while (index > 0)
                 {
                     index--;
                     textIndex = textElementIndices[index];
                     if (IsSpaceCategory(CharUnicodeInfo.GetUnicodeCategory(text, textIndex)))
                     {
                         if (deleteFrom > -2)
                         {
                             if (deleteFrom < 0)
                             {
                                 deleteFrom = textElementIndices[index + 1];
                             }
                             break;
                         }
                         deleteFrom = textElementIndices[index + 1];
                         if (!":!?".Contains(text[deleteFrom]))
                         {
                             break;
                         }
                     }
                     else if (BreakChars.Contains(text[textIndex]))
                     {
                         if (deleteFrom > -2)
                         {
                             if (deleteFrom < 0)
                             {
                                 deleteFrom = textElementIndices[index + 1];
                             }
                             break;
                         }
                     }
                     else
                     {
                         deleteFrom = -1;
                     }
                 }
             }
             if (deleteFrom < deleteUpTo)
             {
                 if (deleteFrom < 0)
                 {
                     deleteFrom = 0;
                 }
                 textBox.Select(deleteFrom, deleteUpTo - deleteFrom);
                 textBox.Paste(string.Empty);
             }
         }
     }
 }
Exemplo n.º 2
0
 public static double GetNumericValue(char c)
 {
     return(CharUnicodeInfo.GetNumericValue(c));
 }
        /// <summary>
        ///     Maps <paramref name="identityNumber" /> to a
        ///     <see cref="StatementBruteForce.Core.SouthAfricanIdentityNumberModel" /> object.
        /// </summary>
        /// <param name="identityNumber">13 digit South African identity number, defined as YYMMDDSSSSCAZ.</param>
        /// <returns>
        ///     A <see cref="StatementBruteForce.Core.SouthAfricanIdentityNumberModel" /> object.
        /// </returns>
        public static SouthAfricanIdentityNumberModel ParseIdentityNumberStringToModel(string identityNumber)
        {
            var chars = identityNumber.ToCharArray();

            #region local functions

            int yy()
            {
                if (char.IsDigit(c: chars[0]) && char.IsDigit(c: chars[1]))
                {
                    return(1900 + 10 * CharUnicodeInfo.GetDigitValue(ch: chars[0]) +
                           CharUnicodeInfo.GetDigitValue(ch: chars[1]));
                }

                return(-1);
            }

            int mm()
            {
                if (char.IsDigit(c: chars[2]) && char.IsDigit(c: chars[3]))
                {
                    return(10 * CharUnicodeInfo.GetDigitValue(ch: chars[2]) +
                           CharUnicodeInfo.GetDigitValue(ch: chars[3]));
                }

                return(-1);
            }

            int dd()
            {
                if (char.IsDigit(c: chars[4]) && char.IsDigit(c: chars[5]))
                {
                    return(10 * CharUnicodeInfo.GetDigitValue(ch: chars[4]) +
                           CharUnicodeInfo.GetDigitValue(ch: chars[5]));
                }

                return(-1);
            }

            #endregion


            var model = new SouthAfricanIdentityNumberModel(yearOfBirth: yy(), monthOfBirth: mm(), dayOfBirth: dd(),
                                                            gender: char.IsDigit(c: chars[6]) ? CharUnicodeInfo.GetDigitValue(ch: chars[6]) : -1,
                                                            genderSequence1: char.IsDigit(c: chars[7]) ? CharUnicodeInfo.GetDigitValue(ch: chars[7]) : -1,
                                                            genderSequence2: char.IsDigit(c: chars[8]) ? CharUnicodeInfo.GetDigitValue(ch: chars[8]) : -1,
                                                            genderSequence3: char.IsDigit(c: chars[9]) ? CharUnicodeInfo.GetDigitValue(ch: chars[9]) : -1,
                                                            citizenship: char.IsDigit(c: chars[10]) ? CharUnicodeInfo.GetDigitValue(ch: chars[10]) : -1,
                                                            obsolete: char.IsDigit(c: chars[11]) ? CharUnicodeInfo.GetDigitValue(ch: chars[11]) : -1,
                                                            checksum: char.IsDigit(c: chars[12]) ? CharUnicodeInfo.GetDigitValue(ch: chars[12]) : -1);
            return(model);
        }
Exemplo n.º 4
0
        private void WriteObject(string name, ProtectedString value, bool bIsEntryString)
        {
            Debug.Assert(name != null);
            Debug.Assert(value != null); if (value == null)
            {
                throw new ArgumentNullException("value");
            }

            m_xmlWriter.WriteStartElement(ElemString);
            m_xmlWriter.WriteStartElement(ElemKey);
            m_xmlWriter.WriteString(StrUtil.SafeXmlString(name));
            m_xmlWriter.WriteEndElement();
            m_xmlWriter.WriteStartElement(ElemValue);

            bool bProtected = value.IsProtected;

            if (bIsEntryString)
            {
                // Adjust memory protection setting (which might be different
                // from the database default, e.g. due to an import which
                // didn't specify the correct setting)
                if (name == PwDefs.TitleField)
                {
                    bProtected = m_pwDatabase.MemoryProtection.ProtectTitle;
                }
                else if (name == PwDefs.UserNameField)
                {
                    bProtected = m_pwDatabase.MemoryProtection.ProtectUserName;
                }
                else if (name == PwDefs.PasswordField)
                {
                    bProtected = m_pwDatabase.MemoryProtection.ProtectPassword;
                }
                else if (name == PwDefs.UrlField)
                {
                    bProtected = m_pwDatabase.MemoryProtection.ProtectUrl;
                }
                else if (name == PwDefs.NotesField)
                {
                    bProtected = m_pwDatabase.MemoryProtection.ProtectNotes;
                }
            }

            if (bProtected && (m_format != KdbxFormat.PlainXml))
            {
                m_xmlWriter.WriteAttributeString(AttrProtected, ValTrue);

                byte[] pbEncoded = value.ReadXorredString(m_randomStream);
                if (pbEncoded.Length > 0)
                {
                    m_xmlWriter.WriteBase64(pbEncoded, 0, pbEncoded.Length);
                }
            }
            else
            {
                string strValue = value.ReadString();

                // If names should be localized, we need to apply the language-dependent
                // string transformation here. By default, language-dependent conversions
                // should be applied, otherwise characters could be rendered incorrectly
                // (code page problems).
                if (m_bLocalizedNames)
                {
                    StringBuilder sb = new StringBuilder();
                    foreach (char ch in strValue)
                    {
                        char chMapped = ch;

                        // Symbols and surrogates must be moved into the correct code
                        // page area
                        if (char.IsSymbol(ch) || char.IsSurrogate(ch))
                        {
                            System.Globalization.UnicodeCategory cat =
                                CharUnicodeInfo.GetUnicodeCategory(ch);
                            // Map character to correct position in code page
                            chMapped = (char)((int)cat * 32 + ch);
                        }
                        else if (char.IsControl(ch))
                        {
                            if (ch >= 256) // Control character in high ANSI code page
                            {
                                // Some of the control characters map to corresponding ones
                                // in the low ANSI range (up to 255) when calling
                                // ToLower on them with invariant culture (see
                                // http://lists.ximian.com/pipermail/mono-patches/2002-February/086106.html )
#if !KeePassLibSD
                                chMapped = char.ToLowerInvariant(ch);
#else
                                chMapped = char.ToLower(ch);
#endif
                            }
                        }

                        sb.Append(chMapped);
                    }

                    strValue = sb.ToString(); // Correct string for current code page
                }

                if ((m_format == KdbxFormat.PlainXml) && bProtected)
                {
                    m_xmlWriter.WriteAttributeString(AttrProtectedInMemPlainXml, ValTrue);
                }

                m_xmlWriter.WriteString(StrUtil.SafeXmlString(strValue));
            }

            m_xmlWriter.WriteEndElement(); // ElemValue
            m_xmlWriter.WriteEndElement(); // ElemString
        }
Exemplo n.º 5
0
        /// <summary>
        /// Returns true if the Unicode character is a formatting character (Unicode class Cf).
        /// </summary>
        /// <param name="ch">The Unicode character.</param>
        internal static bool IsFormattingChar(char ch)
        {
            // There are no FormattingChars in ASCII range

            return(ch > 127 && IsFormattingChar(CharUnicodeInfo.GetUnicodeCategory(ch)));
        }
Exemplo n.º 6
0
        /// <summary>
        /// Creates a URL And SEO friendly slug
        /// </summary>
        /// <param name="text">Text to slugify</param>
        /// <param name="maxLength">Max length of slug</param>
        /// <returns>URL and SEO friendly string</returns>
        public static string UrlFriendly(string text, int maxLength = 0)
        {
            // Return empty value if text is null
            if (text == null)
            {
                return("");
            }
            var normalizedString = text
                                   // Make lowercase
                                   .ToLowerInvariant()
                                   // Normalize the text
                                   .Normalize(NormalizationForm.FormD);
            var  stringBuilder = new StringBuilder();
            var  stringLength  = normalizedString.Length;
            var  prevdash      = false;
            var  trueLength    = 0;
            char c;

            for (int i = 0; i < stringLength; i++)
            {
                c = normalizedString[i];
                switch (CharUnicodeInfo.GetUnicodeCategory(c))
                {
                // Check if the character is a letter or a digit if the character is a
                // international character remap it to an ascii valid character
                case UnicodeCategory.LowercaseLetter:
                case UnicodeCategory.UppercaseLetter:
                case UnicodeCategory.DecimalDigitNumber:
                    if (c < 128)
                    {
                        stringBuilder.Append(c);
                    }
                    else
                    {
                        stringBuilder.Append(ConstHelper.RemapInternationalCharToAscii(c));
                    }
                    prevdash   = false;
                    trueLength = stringBuilder.Length;
                    break;

                // Check if the character is to be replaced by a hyphen but only if the last character wasn't
                case UnicodeCategory.SpaceSeparator:
                case UnicodeCategory.ConnectorPunctuation:
                case UnicodeCategory.DashPunctuation:
                case UnicodeCategory.OtherPunctuation:
                case UnicodeCategory.MathSymbol:
                    if (!prevdash)
                    {
                        stringBuilder.Append('-');
                        prevdash   = true;
                        trueLength = stringBuilder.Length;
                    }
                    break;
                }
                // If we are at max length, stop parsing
                if (maxLength > 0 && trueLength >= maxLength)
                {
                    break;
                }
            }
            // Trim excess hyphens
            var result = stringBuilder.ToString().Trim('-');

            // Remove any excess character to meet maxlength criteria
            return(maxLength <= 0 || result.Length <= maxLength ? result : result.Substring(0, maxLength));
        }
Exemplo n.º 7
0
        // private methods
        private string EscapedString(string value)
        {
            if (value.All(c => !NeedsEscaping(c)))
            {
                return(value);
            }

            var sb = new StringBuilder(value.Length);

            foreach (char c in value)
            {
                switch (c)
                {
                case '"': sb.Append("\\\""); break;

                case '\\': sb.Append("\\\\"); break;

                case '\b': sb.Append("\\b"); break;

                case '\f': sb.Append("\\f"); break;

                case '\n': sb.Append("\\n"); break;

                case '\r': sb.Append("\\r"); break;

                case '\t': sb.Append("\\t"); break;

                default:
                    switch (CharUnicodeInfo.GetUnicodeCategory(c))
                    {
                    case UnicodeCategory.UppercaseLetter:
                    case UnicodeCategory.LowercaseLetter:
                    case UnicodeCategory.TitlecaseLetter:
                    case UnicodeCategory.OtherLetter:
                    case UnicodeCategory.DecimalDigitNumber:
                    case UnicodeCategory.LetterNumber:
                    case UnicodeCategory.OtherNumber:
                    case UnicodeCategory.SpaceSeparator:
                    case UnicodeCategory.ConnectorPunctuation:
                    case UnicodeCategory.DashPunctuation:
                    case UnicodeCategory.OpenPunctuation:
                    case UnicodeCategory.ClosePunctuation:
                    case UnicodeCategory.InitialQuotePunctuation:
                    case UnicodeCategory.FinalQuotePunctuation:
                    case UnicodeCategory.OtherPunctuation:
                    case UnicodeCategory.MathSymbol:
                    case UnicodeCategory.CurrencySymbol:
                    case UnicodeCategory.ModifierSymbol:
                    case UnicodeCategory.OtherSymbol:
                        sb.Append(c);
                        break;

                    default:
                        sb.AppendFormat("\\u{0:x4}", (int)c);
                        break;
                    }
                    break;
                }
            }

            return(sb.ToString());
        }
Exemplo n.º 8
0
 /// <summary>
 /// Returns the <see cref="UnicodeCharacterClass"/> of the character at the specified offset in the given string.
 /// </summary>
 /// <param name="value">A <see cref="string"/>.</param>
 /// <param name="index">The character position in <paramref name="value"/>.</param>
 /// <returns>A <see cref="UnicodeCharacterClass"/> enumerated constant that identifies the character class of the character at position <paramref name="index"/> in <paramref name="value"/>.</returns>
 public static UnicodeCharacterClass GetCharacterClass(string value, int index) => GetCharacterClassFromCategory(CharUnicodeInfo.GetUnicodeCategory(value, index));
Exemplo n.º 9
0
        private string NamePrep(string part, int start)
        {
            part = part.Normalize(NormalizationForm.FormKC);

            for (int i = 0; i < part.Length; i++)
            {
                switch (CharUnicodeInfo.GetUnicodeCategory(part, i))
                {
                case UnicodeCategory.SpaceSeparator:
                    if (part[i] < '\x80')
                    {
                        continue;     // valid
                    }
                    break;

                case UnicodeCategory.Control:
                    if (part[i] != '\x0' && part[i] < '\x80')
                    {
                        continue;     // valid
                    }
                    break;

                case UnicodeCategory.PrivateUse:
                case UnicodeCategory.Surrogate:
                    break;

                default:
                    char c = part[i];
                    if ('\uFDDF' <= c && c <= '\uFDEF' ||
                        ((int)c & 0xFFFF) == 0xFFFE || '\uFFF9' <= c && c <= '\uFFFD' || '\u2FF0' <= c && c <= '\u2FFB' || '\u202A' <= c && c <= '\u202E' || '\u206A' <= c && c <= '\u206F')
                    {
                        break;
                    }
                    switch (c)
                    {
                    case '\u0340':
                    case '\u0341':
                    case '\u200E':
                    case '\u200F':
                    case '\u2028':
                    case '\u2029':
                        break;

                    default:
                        continue;
                    }
                    break;
                }
                throw new ArgumentException(string.Format("Not allowed character was in the input string, at {0}", start + i));
            }

            if (!this.allowUnassigned)
            {
                for (int i = 0; i < part.Length; i++)
                {
                    if (CharUnicodeInfo.GetUnicodeCategory(part, i) == UnicodeCategory.OtherNotAssigned)
                    {
                        throw new ArgumentException(string.Format("Use of unassigned Unicode character is prohibited at {0}", start + i));
                    }
                }
            }

            return(part);
        }
 private static bool IsInvalidPunctuationSymbol(char character)
 {
     return(character == '%' || CharUnicodeInfo.GetUnicodeCategory(character) == UnicodeCategory.CurrencySymbol);
 }
Exemplo n.º 11
0
 public static string RemoveDiacritics(this string text)
 {
     return(string.Concat(text.Normalize(NormalizationForm.FormD)
                          .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark))
            .Normalize(NormalizationForm.FormC));
 }
Exemplo n.º 12
0
 public static bool HasAccents(this string source)
 {
     return
         (source.Normalize(NormalizationForm.FormD)
          .Any(x => CharUnicodeInfo.GetUnicodeCategory(x) == UnicodeCategory.NonSpacingMark));
 }
Exemplo n.º 13
0
        public static string FormatLiteral(string value, ObjectDisplayOptions options)
        {
            if (value == null)
            {
                throw new ArgumentNullException(nameof(value));
            }

            const char quote = '"';

            var pooledBuilder = PooledStringBuilder.GetInstance();
            var builder       = pooledBuilder.Builder;

            var useQuotes          = options.IncludesOption(ObjectDisplayOptions.UseQuotes);
            var escapeNonPrintable = options.IncludesOption(ObjectDisplayOptions.EscapeNonPrintableCharacters);

            var isVerbatim = useQuotes && !escapeNonPrintable && ContainsNewLine(value);

            if (useQuotes)
            {
                if (isVerbatim)
                {
                    builder.Append('@');
                }
                builder.Append(quote);
            }

            for (int i = 0; i < value.Length; i++)
            {
                char c = value[i];
                if (escapeNonPrintable && CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Surrogate)
                {
                    var category = CharUnicodeInfo.GetUnicodeCategory(value, i);
                    if (category == UnicodeCategory.Surrogate)
                    {
                        // an unpaired surrogate
                        builder.Append("\\u" + ((int)c).ToString("x4"));
                    }
                    else if (NeedsEscaping(category))
                    {
                        // a surrogate pair that needs to be escaped
                        var unicode = char.ConvertToUtf32(value, i);
                        builder.Append("\\U" + unicode.ToString("x8"));
                        i++; // skip the already-encoded second surrogate of the pair
                    }
                    else
                    {
                        // copy a printable surrogate pair directly
                        builder.Append(c);
                        builder.Append(value[++i]);
                    }
                }
                else if (escapeNonPrintable && TryReplaceChar(c, out var replaceWith))
                {
                    builder.Append(replaceWith);
                }
                else if (useQuotes && c == quote)
                {
                    if (isVerbatim)
                    {
                        builder.Append(quote);
                        builder.Append(quote);
                    }
                    else
                    {
                        builder.Append('\\');
                        builder.Append(quote);
                    }
                }
                else
                {
                    builder.Append(c);
                }
            }

            if (useQuotes)
            {
                builder.Append(quote);
            }

            return(pooledBuilder.ToStringAndFree());
        }
Exemplo n.º 14
0
        /// <summary>
        /// Computes the type of the given character
        /// </summary>
        /// <param name="ch"> Character whose type is to be determined </param>
        /// <returns> Type of the character </returns>
        public static byte GetType(int ch)
        {
            switch (CharUnicodeInfo.GetUnicodeCategory((char)ch))
            {
            case UnicodeCategory.UppercaseLetter:
                return(WordDelimiterFilter.UPPER);

            case UnicodeCategory.LowercaseLetter:
                return(WordDelimiterFilter.LOWER);

            case UnicodeCategory.TitlecaseLetter:
            case UnicodeCategory.ModifierLetter:
            case UnicodeCategory.OtherLetter:
            case UnicodeCategory.NonSpacingMark:
            case UnicodeCategory.EnclosingMark:     // depends what it encloses?
            case UnicodeCategory.SpacingCombiningMark:
                return(WordDelimiterFilter.ALPHA);

            case UnicodeCategory.DecimalDigitNumber:
            case UnicodeCategory.LetterNumber:
            case UnicodeCategory.OtherNumber:
                return(WordDelimiterFilter.DIGIT);

            // case Character.SPACE_SEPARATOR:
            // case Character.LINE_SEPARATOR:
            // case Character.PARAGRAPH_SEPARATOR:
            // case Character.CONTROL:
            // case Character.FORMAT:
            // case Character.PRIVATE_USE:

            case UnicodeCategory.Surrogate:
                return(WordDelimiterFilter.ALPHA | WordDelimiterFilter.DIGIT);

            // case Character.DASH_PUNCTUATION:
            // case Character.START_PUNCTUATION:
            // case Character.END_PUNCTUATION:
            // case Character.CONNECTOR_PUNCTUATION:
            // case Character.OTHER_PUNCTUATION:
            // case Character.MATH_SYMBOL:
            // case Character.CURRENCY_SYMBOL:
            // case Character.MODIFIER_SYMBOL:
            // case Character.OTHER_SYMBOL:
            // case Character.INITIAL_QUOTE_PUNCTUATION:
            // case Character.FINAL_QUOTE_PUNCTUATION:

            default:
                return(WordDelimiterFilter.SUBWORD_DELIM);
            }

            //switch (char.getType(ch))
            //{
            //  case char.UPPERCASE_LETTER:
            //	  return WordDelimiterFilter.UPPER;
            //  case char.LOWERCASE_LETTER:
            //	  return WordDelimiterFilter.LOWER;

            //  case char.TITLECASE_LETTER:
            //  case char.MODIFIER_LETTER:
            //  case char.OTHER_LETTER:
            //  case char.NON_SPACING_MARK:
            //  case char.ENCLOSING_MARK: // depends what it encloses?
            //  case char.COMBINING_SPACING_MARK:
            //	return WordDelimiterFilter.ALPHA;

            //  case char.DECIMAL_DIGIT_NUMBER:
            //  case char.LETTER_NUMBER:
            //  case char.OTHER_NUMBER:
            //	return WordDelimiterFilter.DIGIT;

            //  // case Character.SPACE_SEPARATOR:
            //  // case Character.LINE_SEPARATOR:
            //  // case Character.PARAGRAPH_SEPARATOR:
            //  // case Character.CONTROL:
            //  // case Character.FORMAT:
            //  // case Character.PRIVATE_USE:

            //  case char.SURROGATE: // prevent splitting
            //	return WordDelimiterFilter.ALPHA | WordDelimiterFilter.DIGIT;

            //  // case Character.DASH_PUNCTUATION:
            //  // case Character.START_PUNCTUATION:
            //  // case Character.END_PUNCTUATION:
            //  // case Character.CONNECTOR_PUNCTUATION:
            //  // case Character.OTHER_PUNCTUATION:
            //  // case Character.MATH_SYMBOL:
            //  // case Character.CURRENCY_SYMBOL:
            //  // case Character.MODIFIER_SYMBOL:
            //  // case Character.OTHER_SYMBOL:
            //  // case Character.INITIAL_QUOTE_PUNCTUATION:
            //  // case Character.FINAL_QUOTE_PUNCTUATION:

            //  default:
            //	  return WordDelimiterFilter.SUBWORD_DELIM;
            //}
        }
Exemplo n.º 15
0
 private static UniCatFlags GetCatFlags(char ch)
 {
     return((UniCatFlags)(1u << (int)CharUnicodeInfo.GetUnicodeCategory(ch)));
 }
Exemplo n.º 16
0
 private static UnicodeCategory GetUnicodeCategoryNonAscii(Rune value)
 {
     Debug.Assert(!value.IsAscii, "Shouldn't use this non-optimized code path for ASCII characters.");
     return(CharUnicodeInfo.GetUnicodeCategory(value.Value));
 }
Exemplo n.º 17
0
 public static bool IsFormat(char ch)
 {
     return(ch >= _rgchi.Length && CharUnicodeInfo.GetUnicodeCategory(ch) == UnicodeCategory.Format);
 }
Exemplo n.º 18
0
 /// <summary>
 /// Is the character a valid first identifier character?
 /// </summary>
 private static bool IsValidIdFirstChar(char c)
 {
     return
         (char.IsLetter(c) ||
          CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.ConnectorPunctuation);
 }
Exemplo n.º 19
0
        /// <summary>
        /// An extension method to generate slugs from any language
        /// </summary>
        /// <param name="textToSlug">Text to convert to slug</param>
        /// <param name="maxLength">Maximum number of characters of the generated slug</param>
        /// <returns>human-readable and SEO friendly string</returns>
        public static string Slugify(this string textToSlug, int maxLength = 1000)
        {
            string stFormKD = textToSlug.ToLower().Normalize(NormalizationForm.FormKD);
            var    sb       = new StringBuilder();

            foreach (char t in stFormKD)
            {
                // Allowed symbols
                if (t == '-' || t == '_' || t == '~')
                {
                    sb.Append(t);
                    continue;
                }

                UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(t);
                switch (uc)
                {
                case UnicodeCategory.LowercaseLetter:
                case UnicodeCategory.OtherLetter:
                case UnicodeCategory.DecimalDigitNumber:
                    // Keep letters and digits
                    sb.Append(t);
                    break;

                case UnicodeCategory.NonSpacingMark:
                    // Remove diacritics
                    break;

                default:
                    // Replace all other chars with dash
                    sb.Append('-');
                    break;
                }
            }

            var slug = sb.ToString().Normalize(NormalizationForm.FormC);

            // Simplifies dash groups
            for (int i = 0; i < slug.Length - 1; i++)
            {
                if (slug[i] == '-')
                {
                    int j = 0;
                    while (i + j + 1 < slug.Length && slug[i + j + 1] == '-')
                    {
                        j++;
                    }
                    if (j > 0)
                    {
                        slug = slug.Remove(i + 1, j);
                    }
                }
            }

            // Limit resultant string length to maxLength
            if (slug.Length > maxLength)
            {
                slug = slug.Substring(0, maxLength);
            }

            // Remove any leading and trailing hyphen, underscore or dot
            slug = slug.Trim('-', '_', '.');


            return(slug);
        }
Exemplo n.º 20
0
 static bool IsIdentifierStartCharacter(char ch)
 => ch == '_' || IsLetterChar(CharUnicodeInfo.GetUnicodeCategory(ch));
Exemplo n.º 21
0
    public static void RegexUnicodeChar()
    {
        // Regex engine is Unicode aware now for the \w and \d character classes
        // \s is not - i.e. it still only recognizes the ASCII space separators, not Unicode ones
        // The new character classes for this:
        // [\p{L1}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Pc}]
        List <char> validChars   = new List <char>();
        List <char> invalidChars = new List <char>();

        for (int i = 0; i < MaxUnicodeRange; i++)
        {
            char c = (char)i;
            switch (CharUnicodeInfo.GetUnicodeCategory(c))
            {
            case UnicodeCategory.UppercaseLetter:            //Lu
            case UnicodeCategory.LowercaseLetter:            //Li
            case UnicodeCategory.TitlecaseLetter:            // Lt
            case UnicodeCategory.ModifierLetter:             // Lm
            case UnicodeCategory.OtherLetter:                // Lo
            case UnicodeCategory.DecimalDigitNumber:         // Nd
            //                    case UnicodeCategory.LetterNumber:           // ??
            //                    case UnicodeCategory.OtherNumber:            // ??
            case UnicodeCategory.NonSpacingMark:
            //                    case UnicodeCategory.SpacingCombiningMark:   // Mc
            case UnicodeCategory.ConnectorPunctuation:       // Pc
                validChars.Add(c);
                break;

            default:
                invalidChars.Add(c);
                break;
            }
        }

        // \w - we will create strings from valid characters that form \w and make sure that the regex engine catches this.
        // Build a random string with valid characters followed by invalid characters
        Random random = new Random(-55);
        Regex  regex  = new Regex(@"\w*");

        int validCharLength   = 10;
        int charCount         = validChars.Count;
        int invalidCharCount  = invalidChars.Count;
        int invalidCharLength = 15;

        for (int i = 0; i < 100; i++)
        {
            StringBuilder builder1 = new StringBuilder();
            StringBuilder builder2 = new StringBuilder();
            for (int j = 0; j < validCharLength; j++)
            {
                char c = validChars[random.Next(charCount)];
                builder1.Append(c);
                builder2.Append(c);
            }
            for (int j = 0; j < invalidCharLength; j++)
            {
                builder1.Append(invalidChars[random.Next(invalidCharCount)]);
            }

            string input = builder1.ToString();
            Match  match = regex.Match(input);
            Assert.True(match.Success);

            Assert.Equal(builder2.ToString(), match.Value);
            Assert.Equal(0, match.Index);
            Assert.Equal(validCharLength, match.Length);

            match = match.NextMatch();
            do
            {
                // This is tedious. But we report empty Matches for each of the non-matching characters!!!
                // duh!!! because we say so on the pattern - remember what * stands for :-)
                Assert.Equal(string.Empty, match.Value);
                Assert.Equal(0, match.Length);
                match = match.NextMatch();
            } while (match.Success);
        }

        // Build a random string with invalid characters followed by valid characters and then again invalid
        random = new Random(-55);
        regex  = new Regex(@"\w+");

        validCharLength   = 10;
        charCount         = validChars.Count;
        invalidCharCount  = invalidChars.Count;
        invalidCharLength = 15;

        for (int i = 0; i < 500; i++)
        {
            StringBuilder builder1 = new StringBuilder();
            StringBuilder builder2 = new StringBuilder();
            for (int j = 0; j < invalidCharLength; j++)
            {
                builder1.Append(invalidChars[random.Next(invalidCharCount)]);
            }
            for (int j = 0; j < validCharLength; j++)
            {
                char c = validChars[random.Next(charCount)];
                builder1.Append(c);
                builder2.Append(c);
            }
            for (int j = 0; j < invalidCharLength; j++)
            {
                builder1.Append(invalidChars[random.Next(invalidCharCount)]);
            }
            string input = builder1.ToString();

            Match match = regex.Match(input);
            Assert.True(match.Success);

            Assert.Equal(builder2.ToString(), match.Value);
            Assert.Equal(invalidCharLength, match.Index);
            Assert.Equal(validCharLength, match.Length);

            match = match.NextMatch();
            Assert.False(match.Success);
        }

        validChars   = new List <char>();
        invalidChars = new List <char>();
        for (int i = 0; i < MaxUnicodeRange; i++)
        {
            char c = (char)i;
            switch (CharUnicodeInfo.GetUnicodeCategory(c))
            {
            case UnicodeCategory.DecimalDigitNumber:         // Nd
                validChars.Add(c);
                break;

            default:
                invalidChars.Add(c);
                break;
            }
        }

        // \d - we will create strings from valid characters that form \d and make sure that the regex engine catches this.
        // Build a random string with valid characters and then again invalid
        regex = new Regex(@"\d+");

        validCharLength   = 10;
        invalidCharLength = 15;
        charCount         = validChars.Count;
        invalidCharCount  = invalidChars.Count;

        for (int i = 0; i < 100; i++)
        {
            StringBuilder builder1 = new StringBuilder();
            StringBuilder builder2 = new StringBuilder();
            for (int j = 0; j < validCharLength; j++)
            {
                char c = validChars[random.Next(charCount)];
                builder1.Append(c);
                builder2.Append(c);
            }
            for (int j = 0; j < invalidCharLength; j++)
            {
                builder1.Append(invalidChars[random.Next(invalidCharCount)]);
            }
            string input = builder1.ToString();
            Match  match = regex.Match(input);


            Assert.Equal(builder2.ToString(), match.Value);
            Assert.Equal(0, match.Index);
            Assert.Equal(validCharLength, match.Length);

            match = match.NextMatch();
            Assert.False(match.Success);
        }

        // Build a random string with invalid characters, valid and then again invalid
        regex = new Regex(@"\d+");

        validCharLength   = 10;
        invalidCharLength = 15;
        charCount         = validChars.Count;
        invalidCharCount  = invalidChars.Count;

        for (int i = 0; i < 100; i++)
        {
            StringBuilder builder1 = new StringBuilder();
            StringBuilder builder2 = new StringBuilder();
            for (int j = 0; j < invalidCharLength; j++)
            {
                builder1.Append(invalidChars[random.Next(invalidCharCount)]);
            }
            for (int j = 0; j < validCharLength; j++)
            {
                char c = validChars[random.Next(charCount)];
                builder1.Append(c);
                builder2.Append(c);
            }
            for (int j = 0; j < invalidCharLength; j++)
            {
                builder1.Append(invalidChars[random.Next(invalidCharCount)]);
            }
            string input = builder1.ToString();

            Match match = regex.Match(input);
            Assert.True(match.Success);

            Assert.Equal(builder2.ToString(), match.Value);
            Assert.Equal(invalidCharLength, match.Index);
            Assert.Equal(validCharLength, match.Length);

            match = match.NextMatch();
            Assert.False(match.Success);
        }
    }
Exemplo n.º 22
0
        private static bool IsValidFirstIdentifierChar(char c)
        {
            UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(c);

            return(s_firstIdentifierCharCategories.Contains(category));
        }
Exemplo n.º 23
0
        public string Slugify(string text)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(text);
            }

            var sb = new StringBuilder();

            var stFormKD = text.Trim().ToLower().Normalize(NormalizationForm.FormKD);

            foreach (var t in stFormKD)
            {
                // Allowed symbols
                if (t == '-' || t == '_' || t == '~')
                {
                    sb.Append(t);
                    continue;
                }

                var uc = CharUnicodeInfo.GetUnicodeCategory(t);
                switch (uc)
                {
                case UnicodeCategory.LowercaseLetter:
                case UnicodeCategory.OtherLetter:
                case UnicodeCategory.DecimalDigitNumber:
                    // Keep letters and digits
                    sb.Append(t);
                    break;

                case UnicodeCategory.NonSpacingMark:
                    // Remove diacritics
                    break;

                default:
                    // Replace all other chars with dash
                    sb.Append('-');
                    break;
                }
            }

            var slug = sb.ToString().Normalize(NormalizationForm.FormC);

            // Simplifies dash groups
            for (var i = 0; i < slug.Length - 1; i++)
            {
                if (slug[i] == '-')
                {
                    var j = 0;
                    while (i + j + 1 < slug.Length && slug[i + j + 1] == '-')
                    {
                        j++;
                    }
                    if (j > 0)
                    {
                        slug = slug.Remove(i + 1, j);
                    }
                }
            }

            if (slug.Length > 1000)
            {
                slug = slug.Substring(0, 1000);
            }

            slug = slug.Trim('-', '_', '.');

            return(slug);
        }
 private static bool IsIdentifierStart(char character)
 {
     return(char.IsLetter(character) ||
            character == '_' ||
            CharUnicodeInfo.GetUnicodeCategory(character) == UnicodeCategory.LetterNumber);
 }
Exemplo n.º 25
0
        /// <summary>
        /// Creates http url friendly text from input
        /// </summary>
        /// <param name="input"></param>
        /// <param name="maxLength"></param>
        /// <returns></returns>
        public static string ToSlug(this string input, int maxLength = 0)
        {
            if (String.IsNullOrWhiteSpace(input))
            {
                return(String.Empty);
            }

            var normalizedString = input
                                   .ToLowerInvariant()
                                   .Normalize(NormalizationForm.FormD);

            var stringBuilder = new StringBuilder();
            var stringLength  = normalizedString.Length;
            var prevdash      = false;
            var trueLength    = 0;

            char c;

            for (int i = 0; i < stringLength; i++)
            {
                c = normalizedString[i];

                switch (CharUnicodeInfo.GetUnicodeCategory(c))
                {
                case UnicodeCategory.LowercaseLetter:
                case UnicodeCategory.UppercaseLetter:
                case UnicodeCategory.DecimalDigitNumber:
                    if (c < 128)
                    {
                        stringBuilder.Append(c);
                    }
                    else
                    {
                        stringBuilder.Append(FindReplacement(c));
                    }

                    prevdash   = false;
                    trueLength = stringBuilder.Length;
                    break;

                // Check if the character is to be replaced by a hyphen but only if the last character wasn't
                case UnicodeCategory.SpaceSeparator:
                case UnicodeCategory.ConnectorPunctuation:
                case UnicodeCategory.DashPunctuation:
                case UnicodeCategory.OtherPunctuation:
                case UnicodeCategory.MathSymbol:
                    if (!prevdash)
                    {
                        stringBuilder.Append('-');
                        prevdash   = true;
                        trueLength = stringBuilder.Length;
                    }
                    break;
                }

                // If we are at max length, stop parsing
                if (maxLength > 0 && trueLength >= maxLength)
                {
                    break;
                }
            }

            var result = stringBuilder.ToString().Trim('-');

            // Remove any excess character to meet maxlength criteria
            return(maxLength <= 0 || result.Length <= maxLength ? result : result.Substring(0, maxLength));
        }
Exemplo n.º 26
0
        private static bool IsWordChar(char ch)
        {
            UnicodeCategory unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(ch);

            return((unicodeCategory == UnicodeCategory.LowercaseLetter) || ((unicodeCategory == UnicodeCategory.UppercaseLetter) || ((unicodeCategory == UnicodeCategory.TitlecaseLetter) || ((unicodeCategory == UnicodeCategory.OtherLetter) || ((unicodeCategory == UnicodeCategory.ModifierLetter) || ((unicodeCategory == UnicodeCategory.DecimalDigitNumber) || (unicodeCategory == UnicodeCategory.ConnectorPunctuation)))))));
        }