예제 #1
0
        /// <summary>Parse a fragment of HTML into a list of nodes.</summary>
        /// <remarks>Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context.
        ///     </remarks>
        /// <param name="fragmentHtml">the fragment of HTML to parse</param>
        /// <param name="context">
        /// (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This
        /// provides stack context (for implicit element creation).
        /// </param>
        /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param>
        /// <returns>list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.
        ///     </returns>
        public static IList <iText.StyledXmlParser.Jsoup.Nodes.Node> ParseFragment(String fragmentHtml, iText.StyledXmlParser.Jsoup.Nodes.Element
                                                                                   context, String baseUri)
        {
            HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();

            return(treeBuilder.ParseFragment(fragmentHtml, context, baseUri, ParseErrorList.NoTracking()));
        }
예제 #2
0
        /// <summary>Parse a fragment of XML into a list of nodes.</summary>
        /// <param name="fragmentXml">the fragment of XML to parse</param>
        /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param>
        /// <returns>list of nodes parsed from the input XML.</returns>
        public static IList <iText.StyledXmlParser.Jsoup.Nodes.Node> ParseXmlFragment(String fragmentXml, String baseUri
                                                                                      )
        {
            XmlTreeBuilder treeBuilder = new XmlTreeBuilder();

            return(treeBuilder.ParseFragment(fragmentXml, baseUri, ParseErrorList.NoTracking()));
        }
예제 #3
0
 internal override void InitialiseParse(String input, String baseUri, ParseErrorList errors)
 {
     base.InitialiseParse(input, baseUri, errors);
     stack.Add(doc);
     // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
     doc.OutputSettings().Syntax(iText.StyledXmlParser.Jsoup.Nodes.Syntax.xml);
 }
예제 #4
0
 // current doc we are building into
 // the stack of open elements
 // current base uri, for creating new elements
 // currentToken is used only for error tracking.
 // null when not tracking errors
 // start tag to process
 internal virtual void InitialiseParse(String input, String baseUri, ParseErrorList errors)
 {
     Validate.NotNull(input, "String input must not be null");
     Validate.NotNull(baseUri, "BaseURI must not be null");
     doc          = new Document(baseUri);
     reader       = new CharacterReader(input);
     this.errors  = errors;
     tokeniser    = new Tokeniser(reader, errors);
     stack        = new List <iText.StyledXmlParser.Jsoup.Nodes.Element>(32);
     this.baseUri = baseUri;
 }
예제 #5
0
 internal Tokeniser(CharacterReader reader, ParseErrorList errors)
 {
     // html input
     // errors found while tokenising
     // current tokenisation state
     // the token we are about to emit on next read
     // characters pending an emit. Will fall to charsBuilder if more than one
     // buffers characters to output as one token, if more than one emit per read
     // buffers data looking for </script>
     // tag we are building up
     // doctype building up
     // comment building up
     // the last start tag emitted, to test appropriate end tag
     this.reader = reader;
     this.errors = errors;
 }
예제 #6
0
 internal virtual Document Parse(String input, String baseUri, ParseErrorList errors)
 {
     InitialiseParse(input, baseUri, errors);
     RunParser();
     return(doc);
 }
예제 #7
0
 internal virtual Document Parse(String input, String baseUri)
 {
     return(Parse(input, baseUri, ParseErrorList.NoTracking()));
 }
예제 #8
0
 public virtual Document ParseInput(String html, String baseUri)
 {
     errors = IsTrackErrors() ? ParseErrorList.Tracking(maxErrors) : ParseErrorList.NoTracking();
     return(treeBuilder.Parse(html, baseUri, errors));
 }
예제 #9
0
        /// <summary>Utility method to unescape HTML entities from a string</summary>
        /// <param name="string">HTML escaped string</param>
        /// <param name="inAttribute">if the string is to be escaped in strict mode (as attributes are)</param>
        /// <returns>an unescaped string</returns>
        public static String UnescapeEntities(String @string, bool inAttribute)
        {
            Tokeniser tokeniser = new Tokeniser(new CharacterReader(@string), ParseErrorList.NoTracking());

            return(tokeniser.UnescapeEntities(inAttribute));
        }
예제 #10
0
        /// <summary>Parse XML into a Document.</summary>
        /// <param name="xml">XML to parse</param>
        /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param>
        /// <returns>parsed Document</returns>
        public static Document ParseXml(String xml, String baseUri)
        {
            TreeBuilder treeBuilder = new XmlTreeBuilder();

            return(treeBuilder.Parse(xml, baseUri, ParseErrorList.NoTracking()));
        }
예제 #11
0
 internal virtual IList <iText.StyledXmlParser.Jsoup.Nodes.Node> ParseFragment(String inputFragment, String
                                                                               baseUri, ParseErrorList errors)
 {
     InitialiseParse(inputFragment, baseUri, errors);
     RunParser();
     return(doc.ChildNodes());
 }
예제 #12
0
 internal Tokeniser(CharacterReader reader, ParseErrorList errors)
 {
     this.reader = reader;
     this.errors = errors;
 }
예제 #13
0
 internal virtual IList <iText.StyledXmlParser.Jsoup.Nodes.Node> ParseFragment(String inputFragment, iText.StyledXmlParser.Jsoup.Nodes.Element
                                                                               context, String baseUri, ParseErrorList errors)
 {
     // context may be null
     state = HtmlTreeBuilderState.Initial;
     InitialiseParse(inputFragment, baseUri, errors);
     contextElement  = context;
     fragmentParsing = true;
     iText.StyledXmlParser.Jsoup.Nodes.Element root = null;
     if (context != null)
     {
         if (context.OwnerDocument() != null)
         {
             // quirks setup:
             doc.QuirksMode(context.OwnerDocument().QuirksMode());
         }
         // initialise the tokeniser state:
         String contextTag = context.TagName();
         if (iText.StyledXmlParser.Jsoup.Helper.StringUtil.In(contextTag, "title", "textarea"))
         {
             tokeniser.Transition(TokeniserState.Rcdata);
         }
         else
         {
             if (iText.StyledXmlParser.Jsoup.Helper.StringUtil.In(contextTag, "iframe", "noembed", "noframes", "style",
                                                                  "xmp"))
             {
                 tokeniser.Transition(TokeniserState.Rawtext);
             }
             else
             {
                 if (contextTag.Equals("script"))
                 {
                     tokeniser.Transition(TokeniserState.ScriptData);
                 }
                 else
                 {
                     if (contextTag.Equals(("noscript")))
                     {
                         tokeniser.Transition(TokeniserState.Data);
                     }
                     else
                     {
                         // if scripting enabled, rawtext
                         if (contextTag.Equals("plaintext"))
                         {
                             tokeniser.Transition(TokeniserState.Data);
                         }
                         else
                         {
                             tokeniser.Transition(TokeniserState.Data);
                         }
                     }
                 }
             }
         }
         // default
         root = new iText.StyledXmlParser.Jsoup.Nodes.Element(iText.StyledXmlParser.Jsoup.Parser.Tag.ValueOf("html"
                                                                                                             ), baseUri);
         doc.AppendChild(root);
         stack.Add(root);
         ResetInsertionMode();
         // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
         // with form correctly
         Elements contextChain = context.Parents();
         contextChain.Add(0, context);
         foreach (iText.StyledXmlParser.Jsoup.Nodes.Element parent in contextChain)
         {
             if (parent is FormElement)
             {
                 formElement = (FormElement)parent;
                 break;
             }
         }
     }
     RunParser();
     if (context != null && root != null)
     {
         return(root.ChildNodes());
     }
     else
     {
         return(doc.ChildNodes());
     }
 }
예제 #14
0
 // tag searches
 // the current state
 // original / marked state
 // the current head element
 // the current form element
 // fragment parse context -- could be null even if fragment parsing
 // active (open) formatting elements
 // chars in table to be shifted out
 // reused empty end tag
 // if ok to go into frameset
 // if next inserts should be fostered
 // if parsing a fragment of html
 internal override Document Parse(String input, String baseUri, ParseErrorList errors)
 {
     state             = HtmlTreeBuilderState.Initial;
     baseUriSetFromDoc = false;
     return(base.Parse(input, baseUri, errors));
 }