public static String GetXmlFromHtmlString (String html) { using (SgmlReader sr = new SgmlReader()) { sr.InputStream = new StringReader(html); return sr.ReadOuterXml(); } }
//private static Value getDocXml (String uri, String path, HttpContext context) //{ // try // { // return Value.asValue(getXdmNode(uri, path, context).Unwrap()); // } // catch (Exception e) // { // throw; // } //} private static String getXmlReader (String uri, String path, HttpContext context) { string decodedUri = HttpUtility.UrlDecode(uri); string eTag = Context.GenerateETag(decodedUri, Nuxleus.Cryptography.HashAlgorithm.SHA1); String xhtml = String.Empty; try { if (m_CacheDictionary.ContainsKey(eTag)) { xhtml = m_CacheDictionary[eTag]; } else { using (SgmlReader sr = new SgmlReader()) { try { if ((bool)context.Application["usememcached"]) { MemcachedClient m_client = (MemcachedClient)context.Application["memcached"]; if (m_client.KeyExists(eTag)) { sr.InputStream = GetStreamFromHtmlString((string)m_client.Get(eTag)); } else { sr.Href = decodedUri; m_client.Add(eTag, sr.ReadOuterXml(), DateTime.Now.AddMinutes(60)); } } else { sr.Href = decodedUri; } } catch (Exception e) { Console.WriteLine(e.Message); Console.WriteLine(e.StackTrace); } xhtml = sr.ReadOuterXml(); m_CacheDictionary.Add(eTag, xhtml); } } return xhtml; } catch (Exception e) { Console.WriteLine(e.Message); Console.WriteLine(e.StackTrace); } return String.Format(@"<html xmlns='http://www.w3.org/1999/xhtml'> <head> <title>No Readable HTML</title> </head> <body> <h1>No readable HTML was located at the specified URL</h1> </body> </html>"); }
public static XNode GetXmlFromHtmlString(String html) { using (SgmlReader sr = new SgmlReader()) { string htmlWrapper = String.Format("<div xmlns=\"http://www.w3.org/1999/xhtml\">{0}</div>", html); try { sr.InputStream = new StringReader(htmlWrapper); return XElement.Parse(sr.ReadOuterXml(), LoadOptions.None); } catch { return new XText(HttpUtility.HtmlEncode(html)); } } }