private static string GetSpecial(StringGetter r) { StringBuilder result = new StringBuilder(); r.Mark(1);//Mark the present position in the stream int c = r.Read(); while (IsLetter((char)c)) { result.Append((char)c); r.Mark(1); c = r.Read(); } if (c == ';') { result.Append(';'); } else { r.Reset(); } return(result.ToString()); }
private static string GetTag(StringGetter r) { StringBuilder result = new StringBuilder(); int level = 1; result.Append('<'); while (level > 0) { int c = r.Read(); if (c == -1) { break; // EOF } // EOF result.Append((char)c); if (c == '<') { level++; } else if (c == '>') { level--; } } return(result.ToString()); }
//////////////////////////////////////////////////////////////////////////// //--------------------------------- REVISIONS ------------------------------ // Date Name Tracking # Description // --------- ------------------- ------------- ---------------------- // 04JAN2009 James Shen Initial Creation //////////////////////////////////////////////////////////////////////////// /** * Convert \\uxxxx to it's string format * @param source string to Convert. * @return result string. */ public static string ConvertUTF8(string source) { StringBuilder result2 = new StringBuilder(); StringGetter input = new StringGetter(source); try { int c = input.Read(); while (c != -1) // Convert until EOF { string text; if (c == '\\') { c = input.Read(); switch ((char)c) { case 'u': text = ""; for (int i = 0; i < 4; i++) { text += ((char)input.Read()).ToString(); } text = int.Parse(text, NumberStyles.HexNumber).ToString(); break; case 'x': text = ""; for (int i = 0; i < 2; i++) { text += ((char)input.Read()); } text = int.Parse(text, NumberStyles.HexNumber).ToString(); break; default: text = "\\" + (char)c; break; } } else { text = "" + (char)c; } StringBuilder s = result2; s.Append(text); c = input.Read(); } } catch { input.Close(); } return result2.ToString().Trim(); }
//////////////////////////////////////////////////////////////////////////// //--------------------------------- REVISIONS ------------------------------ // Date Name Tracking # Description // --------- ------------------- ------------- ---------------------- // 04JAN2009 James Shen Initial Creation //////////////////////////////////////////////////////////////////////////// /** * Convert \\uxxxx to it's string format * @param source string to Convert. * @return result string. */ public static string ConvertUTF8(string source) { StringBuilder result2 = new StringBuilder(); StringGetter input = new StringGetter(source); try { int c = input.Read(); while (c != -1) // Convert until EOF { string text; if (c == '\\') { c = input.Read(); switch ((char)c) { case 'u': text = ""; for (int i = 0; i < 4; i++) { text += ((char)input.Read()).ToString(); } text = int.Parse(text, NumberStyles.HexNumber).ToString(); break; case 'x': text = ""; for (int i = 0; i < 2; i++) { text += ((char)input.Read()); } text = int.Parse(text, NumberStyles.HexNumber).ToString(); break; default: text = "\\" + (char)c; break; } } else { text = "" + (char)c; } StringBuilder s = result2; s.Append(text); c = input.Read(); } } catch { input.Close(); } return(result2.ToString().Trim()); }
//////////////////////////////////////////////////////////////////////////// //--------------------------------- REVISIONS ------------------------------ // Date Name Tracking # Description // --------- ------------------- ------------- ---------------------- // 04JAN2009 James Shen Initial Creation //////////////////////////////////////////////////////////////////////////// /** * Remove double "\\" and change to one "\" * @param source the string need to change * @return the result string. */ public static string RemoveDoubleBackSlash(string source) { StringBuilder result2 = new StringBuilder(); StringGetter input = new StringGetter(source); try { int c = input.Read(); while (c != -1) // Convert until EOF { string text; if (c == '\\') { c = input.Read(); if (c == '\\') { text = "\\"; } else { text = "\\" + (char)c; } } else { text = "" + (char)c; } StringBuilder s = result2; s.Append(text); c = input.Read(); } } catch { input.Close(); } return(result2.ToString().Trim()); }
private static string GetTag(StringGetter r) { StringBuilder result = new StringBuilder(); int level = 1; result.Append('<'); while (level > 0) { int c = r.Read(); if (c == -1) { break; // EOF } // EOF result.Append((char)c); if (c == '<') { level++; } else if (c == '>') { level--; } } return result.ToString(); }
private static string GetSpecial(StringGetter r) { StringBuilder result = new StringBuilder(); r.Mark(1);//Mark the present position in the stream int c = r.Read(); while (IsLetter((char)c)) { result.Append((char)c); r.Mark(1); c = r.Read(); } if (c == ';') { result.Append(';'); } else { r.Reset(); } return result.ToString(); }
//[------------------------------ PUBLIC METHODS --------------------------] //////////////////////////////////////////////////////////////////////////// //--------------------------------- REVISIONS ------------------------------ // Date Name Tracking # Description // --------- ------------------- ------------- ---------------------- // 04JAN2009 James Shen Initial Creation //////////////////////////////////////////////////////////////////////////// /** * Convert html to plain text * @param source HTML string. * @return plain text. */ public string Convert(string source) { StringBuilder result = new StringBuilder(); StringBuilder result2 = new StringBuilder(); StringGetter input = new StringGetter(source); try { int c = input.Read(); while (c != -1) // Convert until EOF { string text; if (c == '<') // It's a tag!! { string currentTag = GetTag(input); // Get the rest of the tag text = ConvertTag(currentTag); } else if (c == '&') { string specialchar = GetSpecial(input); if (specialchar.Equals("lt;") || specialchar.Equals("#60")) { text = "<"; } else if (specialchar.Equals("gt;") || specialchar.Equals("#62")) { text = ">"; } else if (specialchar.Equals("amp;") || specialchar.Equals("#38")) { text = "&"; } else if (specialchar.Equals("nbsp;")) { text = " "; } else if (specialchar.Equals("quot;") || specialchar.Equals("#34")) { text = "\""; } else if (specialchar.Equals("copy;") || specialchar.Equals("#169")) { text = "[Copyright]"; } else if (specialchar.Equals("reg;") || specialchar.Equals("#174")) { text = "[Registered]"; } else if (specialchar.Equals("trade;") || specialchar.Equals("#153")) { text = "[Trademark]"; } else { text = "&" + specialchar; } } else if (!_pre && IsWhitespace((char)c)) { StringBuilder s = _inBody ? result : result2; if (s.Length > 0 && IsWhitespace(s[s.Length - 1])) { text = ""; } else { text = " "; } } else { text = "" + (char)c; } StringBuilder s2 = _inBody ? result : result2; s2.Append(text); c = input.Read(); } } catch { input.Close(); } StringBuilder s1 = _bodyFound ? result : result2; return s1.ToString().Trim(); }
//////////////////////////////////////////////////////////////////////////// //--------------------------------- REVISIONS ------------------------------ // Date Name Tracking # Description // --------- ------------------- ------------- ---------------------- // 04JAN2009 James Shen Initial Creation //////////////////////////////////////////////////////////////////////////// /** * Remove double "\\" and change to one "\" * @param source the string need to change * @return the result string. */ public static string RemoveDoubleBackSlash(string source) { StringBuilder result2 = new StringBuilder(); StringGetter input = new StringGetter(source); try { int c = input.Read(); while (c != -1) // Convert until EOF { string text; if (c == '\\') { c = input.Read(); if (c == '\\') { text = "\\"; } else { text = "\\" + (char)c; } } else { text = "" + (char)c; } StringBuilder s = result2; s.Append(text); c = input.Read(); } } catch { input.Close(); } return result2.ToString().Trim(); }
//[------------------------------ PUBLIC METHODS --------------------------] //////////////////////////////////////////////////////////////////////////// //--------------------------------- REVISIONS ------------------------------ // Date Name Tracking # Description // --------- ------------------- ------------- ---------------------- // 04JAN2009 James Shen Initial Creation //////////////////////////////////////////////////////////////////////////// /** * Convert html to plain text * @param source HTML string. * @return plain text. */ public string Convert(string source) { StringBuilder result = new StringBuilder(); StringBuilder result2 = new StringBuilder(); StringGetter input = new StringGetter(source); try { int c = input.Read(); while (c != -1) // Convert until EOF { string text; if (c == '<') // It's a tag!! { string currentTag = GetTag(input); // Get the rest of the tag text = ConvertTag(currentTag); } else if (c == '&') { string specialchar = GetSpecial(input); if (specialchar.Equals("lt;") || specialchar.Equals("#60")) { text = "<"; } else if (specialchar.Equals("gt;") || specialchar.Equals("#62")) { text = ">"; } else if (specialchar.Equals("amp;") || specialchar.Equals("#38")) { text = "&"; } else if (specialchar.Equals("nbsp;")) { text = " "; } else if (specialchar.Equals("quot;") || specialchar.Equals("#34")) { text = "\""; } else if (specialchar.Equals("copy;") || specialchar.Equals("#169")) { text = "[Copyright]"; } else if (specialchar.Equals("reg;") || specialchar.Equals("#174")) { text = "[Registered]"; } else if (specialchar.Equals("trade;") || specialchar.Equals("#153")) { text = "[Trademark]"; } else { text = "&" + specialchar; } } else if (!_pre && IsWhitespace((char)c)) { StringBuilder s = _inBody ? result : result2; if (s.Length > 0 && IsWhitespace(s[s.Length - 1])) { text = ""; } else { text = " "; } } else { text = "" + (char)c; } StringBuilder s2 = _inBody ? result : result2; s2.Append(text); c = input.Read(); } } catch { input.Close(); } StringBuilder s1 = _bodyFound ? result : result2; return(s1.ToString().Trim()); }