示例#1
0
 private static XDoc FromHtml(TextReader reader)
 {
     Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader(XDoc.XmlNameTable)
     {
         Dtd                = _dtd,
         DocType            = "HTML",
         WhitespaceHandling = WhitespaceHandling.All,
         CaseFolding        = Sgml.CaseFolding.ToLower,
         InputStream        = reader
     };
     try {
         XmlDocument doc = XDoc.NewXmlDocument();
         doc.Load(sgmlReader);
         if (doc.DocumentElement == null)
         {
             return(XDoc.Empty);
         }
         if (_dtd == null)
         {
             _dtd = sgmlReader.Dtd;
         }
         return(new XDoc(doc));
     } catch (Exception) {
         return(XDoc.Empty);
     }
 }
        public static XDoc WebHtml(
            [DekiScriptParam("HTML source text or source uri (default: none)", true)] string source,
            [DekiScriptParam("xpath to value (default: none)", true)] string xpath,
            [DekiScriptParam("namespaces (default: none)", true)] Hashtable namespaces,
            [DekiScriptParam("caching duration in seconds (range: 60 - 86400; default: 300)", true)] double?ttl,
            [DekiScriptParam("return nil if source could not be loaded (default: text with error message)", true)] bool?nilIfMissing
            )
        {
            string text = WebText(source, xpath, namespaces, true, ttl, nilIfMissing);

            if (text == null)
            {
                return(null);
            }

            // convert text to html without a converter
            XDoc result = XDoc.Empty;

            using (TextReader reader = new StringReader("<html><body>" + text + "</body></html>")) {
                // NOTE (steveb): we create the sgml reader explicitly since we don't want a DTD to be associated with it; the DTD would force a potentially unwanted HTML structure

                // check if HTML entities DTD has already been loaded
                if (_htmlEntitiesDtd == null)
                {
                    using (StreamReader dtdReader = new StreamReader(Plug.New("resource://mindtouch.deki.script/MindTouch.Deki.Script.Resources.HtmlEntities.dtd").Get().AsStream())) {
                        _htmlEntitiesDtd = Sgml.SgmlDtd.Parse(null, "HTML", dtdReader, null, null, XDoc.XmlNameTable);
                    }
                }

                Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader(XDoc.XmlNameTable);
                sgmlReader.Dtd                = _htmlEntitiesDtd;
                sgmlReader.DocType            = "HTML";
                sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
                sgmlReader.CaseFolding        = Sgml.CaseFolding.ToLower;
                sgmlReader.InputStream        = reader;
                try {
                    XmlDocument doc = new XmlDocument(XDoc.XmlNameTable)
                    {
                        PreserveWhitespace = true,
                        XmlResolver        = null
                    };
                    doc.Load(sgmlReader);

                    // check if a valid document was created
                    if (doc.DocumentElement != null)
                    {
                        result = new XDoc(doc);
                    }
                } catch {
                    // swallow parsing exceptions
                }
            }
            return(CleanseHtmlDocument(result));
        }
        public XDoc WebHtml(
            [DekiExtParam("HTML source text or source uri (default: none)", true)] string source,
            [DekiExtParam("xpath to value (default: none)", true)] string xpath,
            [DekiExtParam("namespaces (default: none)", true)] Hashtable namespaces,
           [DekiExtParam("caching duration in seconds (range: 300+; default: 300)", true)] double? ttl
        ) {
            string text = WebText(source, xpath, namespaces, true, ttl);

            // convert text to html
            XDoc result = XDoc.Empty;
            using(TextReader reader = new StringReader("<html><body>" + text + "</body></html>")) {

                // NOTE (steveb): we create the sgml reader explicitly since we don't want a DTD to be associated with it; the DTD would force a potentially unwanted HTML structure

                // check if HTML entities DTD has already been loaded
                if(_htmlEntitiesDtd == null) {
                    using(StreamReader dtdReader = new StreamReader(Plug.New("resource://mindtouch.deki.script/MindTouch.Deki.Script.HtmlEntities.dtd").Get().AsStream())) {
                        _htmlEntitiesDtd = Sgml.SgmlDtd.Parse(null, "HTML", dtdReader, null, null, XDoc.XmlNameTable);
                    }
                }

                Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader(XDoc.XmlNameTable);
                sgmlReader.Dtd = _htmlEntitiesDtd;
                sgmlReader.DocType = "HTML";
                sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
                sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
                sgmlReader.InputStream = reader;
                try {
                    XmlDocument doc = new XmlDocument(XDoc.XmlNameTable);
                    doc.PreserveWhitespace = true;
                    doc.XmlResolver = null;
                    doc.Load(sgmlReader);

                    // check if a valid document was created
                    if(doc.DocumentElement != null) {
                        result = new XDoc(doc);
                    }
                } catch(Exception) {

                    // swallow parsing exceptions
                }
            }
            return DekiScriptLibrary.CleanseHtmlDocument(result);
        }
示例#4
0
 private static XDoc FromHtml(TextReader reader)
 {
     Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader(XDoc.XmlNameTable) {
         Dtd = _dtd,
         DocType = "HTML",
         WhitespaceHandling = WhitespaceHandling.All,
         CaseFolding = Sgml.CaseFolding.ToLower,
         InputStream = reader
     };
     try {
         XmlDocument doc = XDoc.NewXmlDocument();
         doc.Load(sgmlReader);
         if(doc.DocumentElement == null) {
             return XDoc.Empty;
         }
         if(_dtd == null) {
             _dtd = sgmlReader.Dtd;
         }
         return new XDoc(doc);
     } catch(Exception) {
         return XDoc.Empty;
     }
 }