public string EscStringLiterals(string lineIn, EscapeStringType replacement, ref bool endedIrregular) { var encodedList = new StringBuilder(); var inDoubleQuotes = endedIrregular; var buffer = lineIn.ToCharArray(); for (var j = 0; j < buffer.Length; j++) { var isDblQuote = buffer[j] == '"'; var isEsc = j - 1 > 0 && buffer[j - 1] == (char)0x5C; isEsc = isEsc && j - 2 > 0 && buffer[j - 2] != (char)0x5C; if (isDblQuote && !isEsc) { encodedList.Append('"'); inDoubleQuotes = !inDoubleQuotes; continue; } if (inDoubleQuotes) { encodedList.Append(buffer[j].ToString(CultureInfo.InvariantCulture).EscapeString(replacement)); } else { encodedList.Append(buffer[j]); } } endedIrregular = inDoubleQuotes; return(encodedList.ToString()); }
/// <summary> /// Returns string <see cref="value"/> as an escape sequence. /// </summary> /// <param name="value">Any string which is to be escaped.</param> /// <param name="escapeType"> /// The kind of escape sequence to encode <see cref="value"/> into. /// The default is REGEX. /// </param> /// <example> /// <![CDATA[ /// Etc.EscapeString("I am decimal", EscapeStringType.DECIMAL); //"I am decimal" /// /// Etc.EscapeString("I am unicode", EscapeStringType.UNICODE);//"\u2049\u6D61\u7520\u696E\u6F63\u6564" /// /// Etc.EscapeString("[regex]", EscapeStringType.REGEX);//"\x5b\x72\x65\x67\x65\x78\x5d" /// /// Etc.EscapeString(" £¡¥§", EscapeStringType.HTML);//" £¡¥§" /// ]]> /// </example> /// <returns></returns> public static string EscapeString(this string value, EscapeStringType escapeType = EscapeStringType.REGEX) { var data = Encoding.GetEncoding("ISO-8859-1").GetBytes(value); var dataOut = new StringBuilder(); if (String.IsNullOrEmpty(value)) { return(String.Empty); } switch (escapeType) { case EscapeStringType.DECIMAL: foreach (var dex in data) { dataOut.AppendFormat("&#{0};", dex.ToString("G")); } break; case EscapeStringType.DECIMAL_LONG: foreach (var dex in data) { dataOut.AppendFormat("&#{0:0000000}", Int32.Parse(dex.ToString("G"))); } break; case EscapeStringType.HEXDECIMAL: foreach (var dex in data) { dataOut.AppendFormat("&#x{0}", dex.ToString("X2")); } break; case EscapeStringType.REGEX: data = Encoding.UTF8.GetBytes(value); foreach (var dex in data) { dataOut.AppendFormat(@"\x{0}", dex.ToString("x2")); } break; case EscapeStringType.UNICODE: foreach (var c in value.ToCharArray()) { dataOut.AppendFormat(@"\u{0}", Convert.ToUInt16(c).ToString("X4")); } break; case EscapeStringType.HTML: var htmlEsc = HtmlEscStrings; foreach (var dex in data) { if (htmlEsc.Select(t => t.Item1).Contains(dex)) { dataOut.Append(htmlEsc.First(t => t.Item1 == dex).Item2); } else { dataOut.AppendFormat("&#{0};", dex); } } break; case EscapeStringType.XML: //turn any existing esc-seq back into char literals var xmlEsc = XmlEscStrings; foreach (var t in xmlEsc) { if (value.Contains(t.Item2)) { value = value.Replace(t.Item2, t.Item1.ToString()); } } var chars = value.ToCharArray(); foreach (var c in chars) { if (xmlEsc.Select(x => x.Item1).Contains(c)) { dataOut.Append(xmlEsc.First(x => x.Item1 == c).Item2); } else { dataOut.Append(c); } } break; case EscapeStringType.URI: return(HttpUtility.UrlEncode(value)); case EscapeStringType.BLANK: return(new string(' ', data.Length)); } return(dataOut.ToString()); }
/// <summary> /// Returns string <see cref="value"/> as an escape sequence /// of various forms found in HTML. /// Which of these forms is based on <see cref="escapeType"/>. /// The <see cref="value"/> is first encoded to the ISO-8859-1 standard unless the /// <see cref="escapeType"/> is specified as REGEX in which the <see cref="value"/> is /// encoded into UTF8. /// </summary> /// <param name="value">Any string which is to be escaped.</param> /// <param name="escapeType"> /// The kind of escape sequence to encode <see cref="value"/> into. /// The default is REGEX. /// </param> /// <example> /// <![CDATA[ /// Etc.EscapeString("I am decimal", EscapeStringType.DECIMAL); //"I am decimal" /// /// Etc.EscapeString("I am unicode", EscapeStringType.HEXDECIMAL_LONG);//"\u2049\u6D61\u7520\u696E\u6F63\u6564" /// /// Etc.EscapeString("[regex]", EscapeStringType.REGEX);//"\x5b\x72\x65\x67\x65\x78\x5d" /// /// Etc.EscapeString(" £¡¥§", EscapeStringType.HTML);//" £¡¥§" /// ]]> /// </example> /// <returns></returns> public static string EscapeString(this string value, EscapeStringType escapeType = EscapeStringType.REGEX) { var data = Encoding.GetEncoding("ISO-8859-1").GetBytes(value); var dataOut = new StringBuilder(); if (string.IsNullOrEmpty(value)) return string.Empty; switch (escapeType) { case EscapeStringType.DECIMAL: foreach (var dex in data) { dataOut.AppendFormat("&#{0};", dex.ToString("G")); } break; case EscapeStringType.DECIMAL_LONG: foreach (var dex in data) { dataOut.AppendFormat("&#{0:0000000}", Int32.Parse(dex.ToString("G"))); } break; case EscapeStringType.HEXDECIMAL: foreach (var dex in data) { dataOut.AppendFormat("&#x{0}", dex.ToString("X2")); } break; case EscapeStringType.REGEX: data = Encoding.UTF8.GetBytes(value); foreach (var dex in data) { dataOut.AppendFormat(@"\x{0}", dex.ToString("x2")); } break; case EscapeStringType.UNICODE: foreach(var c in value.ToCharArray()) { dataOut.AppendFormat(@"\u{0}", Convert.ToUInt16(c).ToString("x4")); } break; case EscapeStringType.HTML: var htmlEsc = Net.HtmlEscStrings; foreach (var dex in data) { if (htmlEsc.Select(t => t.Item1).Contains(dex)) dataOut.Append(htmlEsc.First(t => t.Item1 == dex).Item2); else dataOut.AppendFormat("&#{0};", dex); } break; case EscapeStringType.XML: //turn any existing esc-seq back into char literals var xmlEsc = Net.XmlEscStrings; foreach (var t in xmlEsc) { if (value.Contains(t.Item2)) value = value.Replace(t.Item2, t.Item1.ToString()); } var chars = value.ToCharArray(); foreach (var c in chars) { if (xmlEsc.Select(x => x.Item1).Contains(c)) dataOut.Append(xmlEsc.First(x => x.Item1 == c).Item2); else dataOut.Append(c); } break; case EscapeStringType.BLANK: return new string(' ', data.Length); } return dataOut.ToString(); }