static bool GetUriEnd_ValidChar(char ch) { if (ch <= 0x7f) { if ('a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || // alpha '0' <= ch && ch <= '9' || // digit 0 <= "./_-?&=#%~!$*+,:;@\\^|".IndexOf(ch)) { return(true); } return(false); } else { UnicodeCategory cat = Char.GetUnicodeCategory(ch); if (cat == UnicodeCategory.ClosePunctuation || cat == UnicodeCategory.OpenPunctuation || cat == UnicodeCategory.ParagraphSeparator || cat == UnicodeCategory.SpaceSeparator || cat == UnicodeCategory.Format || 0 <= "\x3001\x3002".IndexOf(ch)) { return(false); } return(true); } }
/// <summary> /// Returns a <see cref="System.String"/> that represents the current <see cref="Hime.SDK.UnicodeCodePoint"/>. /// </summary> /// <returns> /// A <see cref="System.String"/> that represents the current <see cref="Hime.SDK.UnicodeCodePoint"/>. /// </returns> public override string ToString() { // not in plane 0 => always give hexadecimal value if (value >= 0x10000) { return("U+" + value.ToString("X")); } // in plane 0, give the character only is it is printable char c = (char)value; System.Globalization.UnicodeCategory cat = char.GetUnicodeCategory(c); switch (cat) { case System.Globalization.UnicodeCategory.ModifierLetter: case System.Globalization.UnicodeCategory.NonSpacingMark: case System.Globalization.UnicodeCategory.SpacingCombiningMark: case System.Globalization.UnicodeCategory.EnclosingMark: case System.Globalization.UnicodeCategory.SpaceSeparator: case System.Globalization.UnicodeCategory.LineSeparator: case System.Globalization.UnicodeCategory.ParagraphSeparator: case System.Globalization.UnicodeCategory.Control: case System.Globalization.UnicodeCategory.Format: case System.Globalization.UnicodeCategory.Surrogate: case System.Globalization.UnicodeCategory.PrivateUse: case System.Globalization.UnicodeCategory.OtherNotAssigned: return("U+" + Convert.ToUInt16(c).ToString("X")); default: return(c.ToString()); } }
static void Main(string[] args) { string t = @"Tri Tôn Tịnh Biên Thoại Sơn Tân Châu Phú Tân Long Xuyên Chợ Mới Châu Thành Châu Phú Châu Đốc An Phú"; string stFormD = t.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); string s = (sb.ToString().Normalize(NormalizationForm.FormD)).ToLower(); string v = @"""" + string.Join(@""", """, s.Split('\r').Select(x => new string(x.Trim().Reverse().ToArray())).ToArray()) + @""""; string a = @"""" + string.Join(@""", """, t.Split('\r').Select(x => x.Trim()).ToArray()) + @""""; }
public static string UnsignCharacter(string s) { try { if (!string.IsNullOrEmpty(s)) { string stFormD = s.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); return(sb.ToString().Normalize(NormalizationForm.FormD)); } else { return(""); } } catch (Exception) { return(s); } }
public TakeDocModel.TypeDocument Add(string label, Guid entityId, Guid userId) { string normalized = label.Normalize(NormalizationForm.FormD); StringBuilder resultBuilder = new StringBuilder(); foreach (var character in normalized) { System.Globalization.UnicodeCategory category = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(character); if (category == System.Globalization.UnicodeCategory.LowercaseLetter || category == System.Globalization.UnicodeCategory.UppercaseLetter || category == System.Globalization.UnicodeCategory.SpaceSeparator) { resultBuilder.Append(character); } } string reference = System.Text.RegularExpressions.Regex.Replace(resultBuilder.ToString(), @"\s+", "-"); TakeDocModel.TypeDocument toAdd = new TakeDocModel.TypeDocument(); toAdd.TypeDocumentId = System.Guid.NewGuid(); toAdd.TypeDocumentLabel = label; toAdd.TypeDocumentReference = reference.ToUpper(); toAdd.TypeDocumentPageNeed = false; toAdd.UserCreateData = userId; toAdd.DateCreateData = System.DateTimeOffset.UtcNow; toAdd.EntityId = entityId; toAdd.EtatDeleteData = false; toAdd.TypeDocumentWorkflowTypeId = daoWorkflowType.GetBy(x => x.WorkflowTypeReference == "NO").First().WorkflowTypeId; return(daoTypeDocument.Add(toAdd)); }
public static string Convertor_UNICODE_ASCII(string unicodestring, bool includeSpace = false) { /* hàm convert từ chuỗi unicode sang chuỗi ascii */ unicodestring = unicodestring.Normalize(NormalizationForm.FormD); StringBuilder stringBuilder = new StringBuilder(); /* tạo một string builder để xây dựng chuỗi*/ for (int i = 0; i < unicodestring.Length; i++) { /* quét từng ký tự và chuyển ký tự đó thành ký tự ascii*/ System.Globalization.UnicodeCategory unicodeCategory = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(unicodestring[i]); if (unicodeCategory != System.Globalization.UnicodeCategory.NonSpacingMark) { stringBuilder.Append(unicodestring[i]); /* lưu ký tự đó vào trong string builder dựng sẵn */ } } stringBuilder.Replace("Đ", "D"); /* quá trình chuyển đổi không thể chuyển đổi 2 ký tự đ,Đ */ stringBuilder.Replace("đ", "d"); if (includeSpace) /* nếu người hàm kiểm tra có yêu cầu giữ lại ký tự space thì giữ */ { // do nothing } else { stringBuilder.Replace(" ", ""); } /* trả về kết quả chuyển đổi và không thay đổi chuỗi ban đầu */ string result = ((stringBuilder.ToString()).Normalize(NormalizationForm.FormD)).ToLower(); return(result); }
/// <summary> /// Returns true iff the char at s[p] is not allowed inside a (simple) token /// </summary> private static bool IsHardTokenTerminator(string s, int p) { System.Globalization.UnicodeCategory cat = Char.GetUnicodeCategory(s, p); if (cat == System.Globalization.UnicodeCategory.SpaceSeparator || cat == System.Globalization.UnicodeCategory.ParagraphSeparator || cat == System.Globalization.UnicodeCategory.LineSeparator || cat == System.Globalization.UnicodeCategory.OpenPunctuation || cat == System.Globalization.UnicodeCategory.ClosePunctuation || cat == System.Globalization.UnicodeCategory.FinalQuotePunctuation || cat == System.Globalization.UnicodeCategory.InitialQuotePunctuation || cat == System.Globalization.UnicodeCategory.MathSymbol ) { return(true); } // TODO switch to respective Unicode Properties? // Core.CharacterProperties.IsApostrophe, Core.CharacterProperties.IsColon, Core.CharacterProperties.IsSemicolon, char c = s[p]; if (c == '/' || c == '\\' || Core.CharacterProperties.IsColon(c) || Core.CharacterProperties.IsSemicolon(c)) { return(true); } return(false); }
public static String toPath(String text) { String stFormD = Util.normalizeString(text); int len = stFormD.Length; StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[i]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[i]); } } String output = Util.normalizeString(sb.ToString()).Trim().ToLower(); output = output.Replace(":", "-").Replace("=", "-").Replace("(", "-").Replace(")", "-").Replace("{", "-").Replace("}", "-").Replace("!", "-").Replace(";", "-").Replace(" ", "-").Replace("+", "-").Replace("&", "-").Replace("_", "-").Replace("/", "-").Replace("@", "-at-").Replace("–", "-").Replace(".", "-").Replace(",", "-").Replace("?", "-"); output = output.Replace("\\", "").Replace("\"", "").Replace("°", "").Replace("„", "").Replace("“", "").Replace("`", "").Replace("ʻ", "").Replace("*", "").Replace("¨", "").Replace("™", "").Replace("®", "").Replace("§", ""); output = output.Replace("---", "-").Replace("--", "-").Replace("--", "-"); if (output.StartsWith("-", StringComparison.OrdinalIgnoreCase)) { output = output.Substring(1, output.Length - 1); } if (output.EndsWith("-", StringComparison.OrdinalIgnoreCase)) { output = output.Substring(0, output.Length - 1); } return(output); }
//private void ReportSectionsLocalization(ReportDocument rptDocument, bool IsAllowGrow, System.Globalization.CultureInfo ci) //{ // //System.Drawing.FontFamily chineseFontFamily = AppUtils.GetChineseFontFamily(dbConn); // if (!rptDocument.IsSubreport) // foreach (ReportDocument subReport in rptDocument.Subreports) // try // { // ReportSectionsLocalization(subReport, IsAllowGrow, ci); // } // catch (Exception e) // { // System.Diagnostics.Debug.WriteLine(e.Message); // } // System.Resources.ResourceManager rm = AppUtils.getResourceManager(); // foreach (Section rptSection in rptDocument.ReportDefinition.Sections) // { // foreach (ReportObject rptObj in rptSection.ReportObjects) // { // if (rptObj is SubreportObject) // { // // do nothing on subreport object because there is a bug on visual studio 2008 such that the link parameter will be broken if ANY change of subreoprt object is made. // } // else // { // try // { // rptObj.ObjectFormat.EnableCanGrow = true;// IsAllowGrow; // //if (!IsAllowGrow) // //{ // // rptObj.Left *= 4; // // rptObj.Width *= 4; // //} // } // catch // { } // } // System.Reflection.PropertyInfo propInfo = rptObj.GetType().GetProperty("Text"); // if (rptObj is TextObject) // { // // System.Reflection.PropertyInfo propInfo = rptObj.GetType().GetProperty("Text"); // if (propInfo != null) // { // string originalString = propInfo.GetValue(rptObj, null).ToString(); // string newString = rm.GetString(originalString, ci); // if (!string.IsNullOrEmpty(newString)) // propInfo.SetValue(rptObj, newString, null); // else // { // originalString = originalString.Trim(); // newString = rm.GetString(originalString.Trim(), ci); // if (!string.IsNullOrEmpty(newString)) // propInfo.SetValue(rptObj, newString, null); // else // { // if (originalString.EndsWith(":")) // { // originalString = originalString.Substring(0, originalString.Length - 1).Trim(); // newString = rm.GetString(originalString.Trim(), ci); // if (!string.IsNullOrEmpty(newString)) // propInfo.SetValue(rptObj, newString + ":", null); // } // } // } // } // } // //System.Reflection.PropertyInfo fontPropInfo = rptObj.GetType().GetProperty("Font"); // //if (propInfo != null && fontPropInfo != null) // //{ // // string data = propInfo.GetValue(rptObj, null).ToString(); // // if (!IsEnglish(data) && chineseFontFamily != null) // // { // // System.Drawing.Font originalFont = (System.Drawing.Font)fontPropInfo.GetValue(rptObj, null); // // System.Drawing.Font chineseFont = new System.Drawing.Font(chineseFontFamily, originalFont.Size, originalFont.Style); // // rptObj.GetType().GetMethod("ApplyFont").Invoke(rptObj, new object[] { chineseFont }); // // } // //} // } // } //} //private static void ReportSectionsIncreaseFontSize(ReportDocument rptDocument) //{ // if (!rptDocument.IsSubreport) // foreach (ReportDocument subReport in rptDocument.Subreports) // try // { // ReportSectionsIncreaseFontSize(subReport); // } // catch (Exception e) // { // System.Diagnostics.Debug.WriteLine(e.Message); // } // foreach (Section rptSection in rptDocument.ReportDefinition.Sections) // { // foreach (ReportObject rptObj in rptSection.ReportObjects) // { // System.Reflection.PropertyInfo propInfo = rptObj.GetType().GetProperty("Text"); // System.Reflection.PropertyInfo fontPropInfo = rptObj.GetType().GetProperty("Font"); // if (fontPropInfo != null) // { // if (propInfo != null) // { // string data = propInfo.GetValue(rptObj, null).ToString(); // System.Diagnostics.Debug.WriteLine(data); // } // System.Drawing.Font originalFont = (System.Drawing.Font)fontPropInfo.GetValue(rptObj, null); // System.Drawing.Font newFont = new System.Drawing.Font(originalFont.FontFamily, originalFont.Size + 1, originalFont.Style, originalFont.Unit); // rptObj.GetType().GetMethod("ApplyFont").Invoke(rptObj, new object[] { newFont }); // } // } // } //} private static bool IsEnglish(string input) { foreach (char chr in input) { System.Globalization.UnicodeCategory cat = char.GetUnicodeCategory(chr); if (cat == System.Globalization.UnicodeCategory.OtherLetter) { return(false); } } return(true); }
private static string ClearString(string text) { string s = text.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int k = 0; k < s.Length; k++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(s[k]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(s[k]); } } return(sb.ToString()); }
/// <summary> /// Remove os caracteres especiais de uma string. /// </summary> /// <param name="inputString">String de entrada</param> /// <returns></returns> public static string RemoveDiacritics(this string inputString) { string stFormD = inputString.Normalize(System.Text.NormalizationForm.FormD); System.Text.StringBuilder sb = new System.Text.StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } return(sb.ToString().Normalize(System.Text.NormalizationForm.FormC)); }
private bool IsSeparablePunct(string s, int pos) { // NOTE full stops '.' are UnicodeCategory.OtherPunctuation but we need special // treatment as they may or may not be separated from a char sequence, depending // on whether we see an abbreviation or not. System.Globalization.UnicodeCategory cat = System.Char.GetUnicodeCategory(s, pos); bool isPunct = (cat == System.Globalization.UnicodeCategory.OpenPunctuation || cat == System.Globalization.UnicodeCategory.ClosePunctuation || cat == System.Globalization.UnicodeCategory.FinalQuotePunctuation || cat == System.Globalization.UnicodeCategory.InitialQuotePunctuation || cat == System.Globalization.UnicodeCategory.MathSymbol || (cat == System.Globalization.UnicodeCategory.OtherPunctuation && s[pos] != '.')); return(isPunct); }
public string RemoveDiacritics(string input) { string stFormD = input.Normalize(NormalizationForm.FormD); int len = stFormD.Length; StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[i]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[i]); } } return(sb.ToString().Normalize(NormalizationForm.FormC)); }
public static string convertToUnSign2(string s) { string stFormD = s.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); return(sb.ToString().Normalize(NormalizationForm.FormD)); }
public static string ConvertToUnsign(this string s) { var stFormD = s.Normalize(NormalizationForm.FormD); var sb = new StringBuilder(); foreach (var t in stFormD) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(t); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(t); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); return(sb.ToString().Normalize(NormalizationForm.FormD)); }
static string RemoveDiacritics(string text) { string normalizedString = text.Normalize(System.Text.NormalizationForm.FormD); System.Text.StringBuilder stringBuilder = new System.Text.StringBuilder(); foreach (char c in normalizedString) { System.Globalization.UnicodeCategory unicodeCategory = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(c); if (unicodeCategory != System.Globalization.UnicodeCategory.NonSpacingMark) { stringBuilder.Append(c); } // End if (unicodeCategory != System.Globalization.UnicodeCategory.NonSpacingMark) } // Next c return(stringBuilder.ToString().Normalize(System.Text.NormalizationForm.FormC)); } // End Function RemoveDiacritics
/// <summary> /// Helper method to transform an url into a acceptable url (not special cars, etc...) /// </summary> /// <param name="url">Url to sanitize</param> /// <returns>the sanitized Url</returns> static string UrlSanitize(string url) { url = System.Text.RegularExpressions.Regex.Replace(url, @"\s+", "-"); string stFormD = url.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } return(sb.ToString()); }
/// <summary> /// Chuyển chuyển tiếng việt thành tiếng anh không dấu /// </summary> /// <param name="text"></param> /// <returns></returns> public static string ConvertToUnsign2(string chuoi) { string str = chuoi.ToLower().Replace(" ", "-"); string strFormD = str.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int i = 0; i < strFormD.Length; i++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(strFormD[i]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(strFormD[i]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); return(sb.ToString().Normalize(NormalizationForm.FormD)); }
// hàm chuyển bỏ hết ba cái dấu tiếng việt đi cho dễ search :) public string convertToLatin(string s) { // loại bỏ các ký tự đặc biệt trước khi xóa dấu tiếng việt //s = Regex.Replace(s, @"(~|!|@|#|$|%|^|&|*|(|)|_|+|-|=|`|[|{|}|]|\|,|<|.|>|/|?)", " "); s = s.ToLower(); string stFormD = s.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); return(sb.ToString().Normalize(NormalizationForm.FormD)); }
public static String ToUnsign(string str) { if (str != null && str != string.Empty) { string strFormD = str.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int i = 0; i < strFormD.Length; i++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(strFormD[i]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(strFormD[i]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); return(sb.ToString().Normalize(NormalizationForm.FormD)); } return(""); }
public static string RemoveAccents(string input) { try { string stFormD = input.Normalize(NormalizationForm.FormD); int len = stFormD.Length; StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[i]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[i]); } } return(sb.ToString().Normalize(NormalizationForm.FormC)); } catch (Exception Error) { //Helpers.ModelsUtils.LayoutMessageHelper.SetErrorLog("Utils - RemoveAccents", l_exception); throw Error; } }
/// <summary> /// Gets a user-friendly representation of the character /// </summary> /// <param name="c">A character</param> /// <returns>The string representation</returns> private static string CharToString(char c) { System.Globalization.UnicodeCategory cat = char.GetUnicodeCategory(c); switch (cat) { case System.Globalization.UnicodeCategory.ModifierLetter: case System.Globalization.UnicodeCategory.NonSpacingMark: case System.Globalization.UnicodeCategory.SpacingCombiningMark: case System.Globalization.UnicodeCategory.EnclosingMark: case System.Globalization.UnicodeCategory.SpaceSeparator: case System.Globalization.UnicodeCategory.LineSeparator: case System.Globalization.UnicodeCategory.ParagraphSeparator: case System.Globalization.UnicodeCategory.Control: case System.Globalization.UnicodeCategory.Format: case System.Globalization.UnicodeCategory.Surrogate: case System.Globalization.UnicodeCategory.PrivateUse: case System.Globalization.UnicodeCategory.OtherNotAssigned: return(CharToString_NonPrintable(c)); default: return(c.ToString()); } }
} // End Function HandleAllowedAccentCharacters // string str = ApertureSucks.Latinize("(æøå âôû?aè"); public static string Latinize(string stIn) { // Special treatment for German + French accents stIn = HandleAllowedAccentCharacters(stIn); string stFormD = stIn.Normalize(System.Text.NormalizationForm.FormD); System.Text.StringBuilder sb = new System.Text.StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } // End if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) } // Next ich //return (sb.ToString().Normalize(System.Text.NormalizationForm.FormC)); return(sb.ToString().Normalize(System.Text.NormalizationForm.FormKC)); } // End Function Latinize
public static string RemoveDiacritics(this string text) { if (!string.IsNullOrWhiteSpace(text)) { string stFormD = text.Normalize(NormalizationForm.FormD); int len = stFormD.Length; StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[i]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[i]); } } return(sb.ToString().Normalize(NormalizationForm.FormC)); } else { return(text); } }
public static string convertToUnSign2(string s) { string stFormD = s.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); var str = sb.ToString().Normalize(NormalizationForm.FormD); str = System.Text.RegularExpressions.Regex.Replace(str, @"[^A-Za-z0-9\s-]", ""); // Remove all non valid chars str = System.Text.RegularExpressions.Regex.Replace(str, @"\s+", " ").Trim(); // convert multiple spaces into one space str = System.Text.RegularExpressions.Regex.Replace(str, @" ", "-"); // //Replace spaces by dashes string kq = str.ToLower(); return(kq); }
/// <summary> /// Creates a parser with a grammar that matches each character within the specified /// <paramref name="category"/>. /// </summary> /// <param name="category">The unicode character in which to match characters.</param> /// <returns>A parser with a grammar that matches each character within the specified /// <paramref name="category"/>.</returns> public IParser <char, char> Character(System.Globalization.UnicodeCategory category) { return(parser.Character(category)); }
public LiteralCharCategory(System.Globalization.UnicodeCategory category) { this.category = category; }
public string ConvertToUnSign(string s) { string stFormD = s.Normalize(NormalizationForm.FormD); StringBuilder sb = new StringBuilder(); for (int ich = 0; ich < stFormD.Length; ich++) { System.Globalization.UnicodeCategory uc = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]); if (uc != System.Globalization.UnicodeCategory.NonSpacingMark) { sb.Append(stFormD[ich]); } } sb = sb.Replace('Đ', 'D'); sb = sb.Replace('đ', 'd'); sb = sb.Replace('á', 'a'); sb = sb.Replace('à', 'a'); sb = sb.Replace('ả', 'a'); sb = sb.Replace('ã', 'a'); sb = sb.Replace('ạ', 'a'); sb = sb.Replace('ă', 'a'); sb = sb.Replace('ắ', 'a'); sb = sb.Replace('ằ', 'a'); sb = sb.Replace('ẳ', 'a'); sb = sb.Replace('ẵ', 'a'); sb = sb.Replace('ặ', 'a'); sb = sb.Replace('é', 'e'); sb = sb.Replace('è', 'e'); sb = sb.Replace('ẻ', 'e'); sb = sb.Replace('ẽ', 'e'); sb = sb.Replace('ẹ', 'e'); sb = sb.Replace('ê', 'e'); sb = sb.Replace('ế', 'e'); sb = sb.Replace('ề', 'e'); sb = sb.Replace('ể', 'e'); sb = sb.Replace('ễ', 'e'); sb = sb.Replace('ệ', 'e'); sb = sb.Replace('í', 'i'); sb = sb.Replace('ì', 'i'); sb = sb.Replace('ỉ', 'i'); sb = sb.Replace('ĩ', 'i'); sb = sb.Replace('ị', 'i'); sb = sb.Replace('ó', 'o'); sb = sb.Replace('ò', 'o'); sb = sb.Replace('ỏ', 'o'); sb = sb.Replace('õ', 'o'); sb = sb.Replace('ọ', 'o'); sb = sb.Replace('ô', 'o'); sb = sb.Replace('ố', 'o'); sb = sb.Replace('ồ', 'o'); sb = sb.Replace('ổ', 'o'); sb = sb.Replace('ỗ', 'o'); sb = sb.Replace('ộ', 'o'); sb = sb.Replace('ú', 'u'); sb = sb.Replace('ù', 'u'); sb = sb.Replace('ủ', 'u'); sb = sb.Replace('ũ', 'u'); sb = sb.Replace('ụ', 'u'); sb = sb.Replace('ý', 'y'); sb = sb.Replace('ỳ', 'y'); sb = sb.Replace('ỷ', 'y'); sb = sb.Replace('ỹ', 'y'); sb = sb.Replace('ỵ', 'y'); //Capital letter sb = sb.Replace('Á', 'A'); sb = sb.Replace('À', 'A'); sb = sb.Replace('Ả', 'A'); sb = sb.Replace('Ã', 'A'); sb = sb.Replace('Ạ', 'A'); sb = sb.Replace('Ă', 'A'); sb = sb.Replace('Ắ', 'A'); sb = sb.Replace('Ằ', 'A'); sb = sb.Replace('Ẳ', 'A'); sb = sb.Replace('Ẵ', 'A'); sb = sb.Replace('Ặ', 'A'); sb = sb.Replace('É', 'E'); sb = sb.Replace('È', 'E'); sb = sb.Replace('Ẻ', 'E'); sb = sb.Replace('Ẽ', 'E'); sb = sb.Replace('Ẹ', 'E'); sb = sb.Replace('Ê', 'E'); sb = sb.Replace('Ế', 'E'); sb = sb.Replace('Ề', 'E'); sb = sb.Replace('Ể', 'E'); sb = sb.Replace('Ễ', 'E'); sb = sb.Replace('Ệ', 'E'); sb = sb.Replace('Í', 'I'); sb = sb.Replace('Ì', 'I'); sb = sb.Replace('Ỉ', 'I'); sb = sb.Replace('Ĩ', 'I'); sb = sb.Replace('Ị', 'I'); sb = sb.Replace('Ó', 'O'); sb = sb.Replace('Ò', 'O'); sb = sb.Replace('Ỏ', 'O'); sb = sb.Replace('Õ', 'O'); sb = sb.Replace('Ọ', 'O'); sb = sb.Replace('Ô', 'O'); sb = sb.Replace('Ố', 'O'); sb = sb.Replace('Ồ', 'O'); sb = sb.Replace('Ổ', 'O'); sb = sb.Replace('Ỗ', 'O'); sb = sb.Replace('Ộ', 'O'); sb = sb.Replace('Ú', 'U'); sb = sb.Replace('Ù', 'U'); sb = sb.Replace('Ủ', 'U'); sb = sb.Replace('Ũ', 'U'); sb = sb.Replace('Ụ', 'U'); sb = sb.Replace('Ý', 'Y'); sb = sb.Replace('Ỳ', 'Y'); sb = sb.Replace('Ỷ', 'Y'); sb = sb.Replace('Ỹ', 'Y'); sb = sb.Replace('Ỵ', 'Y'); return(sb.ToString().Normalize(NormalizationForm.FormD)); }
IObservableParser <char, char> IStringObservableParser <char> .Character(System.Globalization.UnicodeCategory category) { return(Character(category)); }
/// <summary> /// Preparses a regular expression text returning a ParsedRegex class /// that can be used for further regular expressions. /// </summary> private static ParsedRegex PreParseRegex(string pattern) { ParsedRegex res = new ParsedRegex(pattern); //string newPattern; int cur = 0, nameIndex; int curGroup = 0; bool containsNamedGroup = false; for (; ;) { nameIndex = pattern.IndexOf("(", cur); if (nameIndex == -1) { break; } if (nameIndex == pattern.Length - 1) { break; } switch (pattern[++nameIndex]) { case '?': // extension syntax if (nameIndex == pattern.Length - 1) { throw ExceptionConverter.CreateThrowable(error, "unexpected end of regex"); } switch (pattern[++nameIndex]) { case 'P': // named regex, .NET doesn't expect the P so we'll remove it; // also, once we see a named group i.e. ?P then we need to start artificially // naming all unnamed groups from then on---this is to get around the fact that // the CLR RegEx support orders all the unnamed groups before all the named // groups, even if the named groups are before the unnamed ones in the pattern; // the artificial naming preserves the order of the groups and thus the order of // the matches containsNamedGroup = true; pattern = pattern.Remove(nameIndex, 1); break; case 'i': res.Options |= RegexOptions.IgnoreCase; break; case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break; case 'm': res.Options |= RegexOptions.Multiline; break; case 's': res.Options |= RegexOptions.Singleline; break; case 'u': break; case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break; case ':': break; // non-capturing case '=': break; // look ahead assertion case '<': break; // positive look behind assertion case '!': break; // negative look ahead assertion case '(': // yes/no if group exists, we don't support this default: throw ExceptionConverter.CreateThrowable(error, "Unrecognized extension " + pattern[nameIndex]); } break; default: // just another group curGroup++; if (containsNamedGroup) { // need to name this unnamed group pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">"); } break; } cur = nameIndex; } cur = 0; for (; ;) { nameIndex = pattern.IndexOf('\\', cur); if (nameIndex == -1 || nameIndex == pattern.Length - 1) { break; } char curChar = pattern[++nameIndex]; switch (curChar) { case 'x': case 'u': case 'a': case 'b': case 'e': case 'f': case 'n': case 'r': case 't': case 'v': case 'c': case 's': case 'W': case 'w': case 'p': case 'P': case 'S': case 'd': case 'D': // known escape sequences, leave escaped. break; default: System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar); switch (charClass) { // recognized word characters, always unescape. case System.Globalization.UnicodeCategory.ModifierLetter: case System.Globalization.UnicodeCategory.LowercaseLetter: case System.Globalization.UnicodeCategory.UppercaseLetter: case System.Globalization.UnicodeCategory.TitlecaseLetter: case System.Globalization.UnicodeCategory.OtherLetter: case System.Globalization.UnicodeCategory.DecimalDigitNumber: case System.Globalization.UnicodeCategory.LetterNumber: case System.Globalization.UnicodeCategory.OtherNumber: case System.Globalization.UnicodeCategory.ConnectorPunctuation: pattern = pattern.Remove(nameIndex - 1, 1); break; } break; } cur++; } res.Pattern = pattern; return(res); }