public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            CharacterToken characterToken = token as CharacterToken;
            if (characterToken != null)
            {
                INode node = InsertCharacter(characterToken, doc);
                return this;
            }

            if (token is EndOfFileToken)
            {
                ReportParseError();
                //TODO - If the current node is a script element, mark the script element as "already started".
                //TODO - Pop the current node off the stack of open elements.
                return TreeConstruction.Instance.GetOriginalInsertionModeState();
            }

            EndTagToken endTagToken = token as EndTagToken;
            if (endTagToken != null && endTagToken.TagName == "script")
            {
                /* TODO:
                Perform a microtask checkpoint. (http://www.w3.org/TR/html5/webappapis.html#perform-a-microtask-checkpoint)
                Provide a stable state. (http://www.w3.org/TR/html5/webappapis.html#provide-a-stable-state)
                Let script be the current node (which will be a script element).
                Pop the current node off the stack of open elements.
              v Switch the insertion mode to the original insertion mode.
                Let the old insertion point have the same value as the current insertion point. Let the insertion point be just before the next input character.
                Increment the parser's script nesting level (http://www.w3.org/TR/html5/syntax.html#script-nesting-level) by one.
                Prepare the script. This might cause some script to execute, which might cause new characters to be inserted into the tokenizer, and might cause the tokenizer to output more tokens, resulting in a reentrant invocation of the parser.
                Decrement the parser's script nesting level by one. If the parser's script nesting level is zero, then set the parser pause flag to false.
                Let the insertion point have the value of the old insertion point. (In other words, restore the insertion point to its previous value. This value might be the "undefined" value.)
                At this stage, if there is a pending parsing-blocking script, then:

                    If the script nesting level is not zero:
                        Set the parser pause flag to true, and abort the processing of any nested invocations of the tokenizer, yielding control back to the caller. (Tokenization will resume when the caller returns to the "outer" tree construction stage.)
                        NOTE: The tree construction stage of this particular parser is being called reentrantly, say from a call to document.write().

                    Otherwise:
                        Run these steps:
                            1. Let the script be the pending parsing-blocking script. There is no longer a pending parsing-blocking script.
                            2. Block the tokenizer for this instance of the HTML parser, such that the event loop will not run tasks that invoke the tokenizer.
                            3. If the parser's Document has a style sheet that is blocking scripts or the script's "ready to be parser-executed" flag is not set: spin the event loop until the parser's Document has no style sheet that is blocking scripts and the script's "ready to be parser-executed" flag is set.
                            4. If this parser has been aborted in the meantime, abort these steps.
                               NOTE: This could happen if, e.g., while the spin the event loop algorithm is running, the browsing context gets closed, or the document.open() method gets invoked on the Document.
                            5. Unblock the tokenizer for this instance of the HTML parser, such that tasks that invoke the tokenizer can again be run.
                            6. Let the insertion point be just before the next input character.
                            7. Increment the parser's script nesting level by one (it should be zero before this step, so this sets it to one).
                            8. Execute the script.
                            9. Decrement the parser's script nesting level by one. If the parser's script nesting level is zero (which it always should be at this point), then set the parser pause flag to false.
                            10. Let the insertion point be undefined again.
                            11. If there is once again a pending parsing-blocking script, then repeat these steps from step 1.
                    */
                return TreeConstruction.Instance.GetOriginalInsertionModeState();
            }

            TreeConstruction.Instance.StackOfOpenElements.Pop();
            return TreeConstruction.Instance.GetOriginalInsertionModeState();
        }
Exemplo n.º 2
0
        public StreamingNQuadsParser(GraphFactory factory, StreamReader reader)
        {
            this._factory = factory; ;

            NTriplesTokeniser tokeniser = new NTriplesTokeniser(reader);
            tokeniser.NQuadsMode = true;
            this._tokens = new BufferedTokenQueue(tokeniser);
            this._tokens.InitialiseBuffer();
        }
 private BaseInsertionModeState ProcessUsingRulesOf(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
 {
     BaseInsertionModeState nextState = InBodyInsertionModeState.Instance.ProcessToken(tokenizer, queue, token, doc);
     if (nextState != InBodyInsertionModeState.Instance)
     {
         return nextState;
     }
     return this;
 }
        public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            if (IsWhitespace(token))
            {
                return this;
            }

            CommentToken commentToken = token as CommentToken;
            if (commentToken != null)
            {
                InsertComment(commentToken, doc);
                return this;
            }

            if (token is DocTypeToken)
            {
                ReportParseError();
                return this;
            }

            StartTagToken startTagToken = token as StartTagToken;
            if (startTagToken != null)
            {
                if (startTagToken.TagName == "html")
                {
                    //TODO - Process the token using the rules for the "in body" insertion mode.
                }
                else if (startTagToken.TagName == "head")
                {
                    ((Document)doc).head = (IHTMLHeadElement)base.InsertHtmlElement(startTagToken, doc);
                    return InHeadInsertionModeState.Instance;
                }
            }

            EndTagToken endTagToken = token as EndTagToken;
            if ((endTagToken == null) ||
                (endTagToken != null &&
                 endTagToken.TagName != "head" &&
                 endTagToken.TagName != "body" &&
                 endTagToken.TagName != "html" &&
                 endTagToken.TagName != "br"))
            {
                ReportParseError();
                return this;
            }

            //Insert an HTML element for a "head" start tag token with no attributes.
            //Set the head element pointer to the newly created head element.
            StartTagToken dummyToken = new StartTagToken(){ TagName = "head" };
            ((Document)doc).head = (IHTMLHeadElement)InsertHtmlElement(dummyToken, doc);

            //Switch the insertion mode to "in head".
            //Reprocess the current token.
            queue.EnqueueTokenForReprocessing(token);
            return InHeadInsertionModeState.Instance;
        }
Exemplo n.º 5
0
        public StreamingNTriplesParser(IGraph g, StreamReader reader)
        {
            this._g = g;
            NTriplesTokeniser tokeniser = new NTriplesTokeniser(reader);
            this._tokens = new BufferedTokenQueue(tokeniser);
            this._tokens.InitialiseBuffer();

            //Expect a BOF
            IToken start = this._tokens.Dequeue();
            if (start.TokenType != Token.BOF)
            {
                throw ParserHelper.Error("Unexpected Token '" + start.GetType().ToString() + "' encountered, expected a Beginning of File Token", start);
            }
            IToken next = this._tokens.Peek();
            if (next.TokenType == Token.EOF) this._eof = true;
        }
        public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            if (IsWhitespace(token))
            {
                return ProcessUsingRulesOf(tokenizer, queue, token, doc);
            }

            CommentToken comment = token as CommentToken;
            if (comment != null)
            {
                //TODO - Insert a comment as the last child of the first element in the stack of open elements (the html element).
                return this;
            }

            if (token is DocTypeToken)
            {
                ReportParseError();
                return this;
            }

            StartTagToken startToken = token as StartTagToken;
            if (startToken != null && startToken.TagName == "html")
            {
                return ProcessUsingRulesOf(tokenizer, queue, token, doc);
            }

            EndTagToken endToken = token as EndTagToken;
            if (endToken != null && endToken.TagName == "html")
            {
                //TODO - If the parser was originally created as part of the HTML fragment parsing algorithm,
                //TODO - this is a parse error; ignore the token. (fragment case)
                //TODO - Otherwise, switch the insertion mode to "after after body".
                return AfterAfterBodyInsertionModeState.Instance;
            }

            if (token is EndOfFileToken)
            {
                base.StopParsing();
                return null;
            }

            ReportParseError();
            queue.EnqueueTokenForReprocessing(token);
            return InBodyInsertionModeState.Instance;
        }
Exemplo n.º 7
0
 /// <summary>
 /// Creates a new Tokenising Parser Context with custom settings
 /// </summary>
 /// <param name="handler">Results Handler</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 /// <param name="queueMode">Tokeniser Queue Mode</param>
 /// <param name="traceParsing">Whether to trace parsing</param>
 /// <param name="traceTokeniser">Whether to trace tokenisation</param>
 public TokenisingResultParserContext(ISparqlResultsHandler handler, ITokeniser tokeniser, TokenQueueMode queueMode, bool traceParsing, bool traceTokeniser)
     : base(handler, traceParsing)
 {
     switch (queueMode)
     {
         case TokenQueueMode.AsynchronousBufferDuringParsing:
             this._queue = new AsynchronousBufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.SynchronousBufferDuringParsing:
             this._queue = new BufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.QueueAllBeforeParsing:
         default:
             this._queue = new TokenQueue(tokeniser);
             break;
     }
     this._traceTokeniser = traceTokeniser;
     this._queue.Tracing = this._traceTokeniser;
 }
Exemplo n.º 8
0
 /// <summary>
 /// Creates a new Tokenising Parser Context with default settings
 /// </summary>
 /// <param name="handler">Results Handler</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 public TokenisingResultParserContext(ISparqlResultsHandler handler, ITokeniser tokeniser)
     : base(handler)
 {
     this._queue = new TokenQueue(tokeniser);
 }
        public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            //TODO - PERFORMANCE!
            CharacterToken charToken = token as CharacterToken;
            if (charToken != null)
            {
                if (charToken.Character == 0)
                {
                    ReportParseError();
                    return this;
                }

                if (IsWhitespace(charToken))
                {
                    //TODO - Reconstruct the active formatting elements, if any. (http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements)
                    //       Insert the token's character. (http://www.w3.org/TR/html5/syntax.html#insert-a-character)
                    InsertCharacter(charToken, doc);
                    return this;
                }

                //TODO - Reconstruct the active formatting elements, if any. (http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements)
                //       Insert the token's character. (http://www.w3.org/TR/html5/syntax.html#insert-a-character)
                InsertCharacter(charToken, doc);
                //TODO - Set the frameset-ok flag to "not ok". (http://www.w3.org/TR/html5/syntax.html#frameset-ok-flag)
                return this;
            }

            CommentToken comment = token as CommentToken;
            if (comment != null)
            {
                InsertComment(comment, doc);
                return this;
            }

            if (token is DocTypeToken)
            {
                ReportParseError();
                return this;
            }

            StartTagToken startTag = token as StartTagToken;
            EndTagToken endTag = token as EndTagToken;

            if (startTag != null && startTag.TagName == "html")
            {
                ReportParseError();
                //TODO - If there is a template element on the stack of open elements, then ignore the token.
                //TODO - Otherwise, for each attribute on the token, check to see if the attribute is already
                //TODO - present on the top element of the stack of open elements. If it is not, add the attribute
                //TODO - and its corresponding value to that element.
                return this;
            }

            if ((startTag != null && (
                    startTag.TagName == "base" ||
                    startTag.TagName == "basefont" ||
                    startTag.TagName == "bgsound" ||
                    startTag.TagName == "link" ||
                    startTag.TagName == "meta" ||
                    startTag.TagName == "noframes" ||
                    startTag.TagName == "script" ||
                    startTag.TagName == "style" ||
                    startTag.TagName == "template" ||
                    startTag.TagName == "title")) ||
                (endTag != null && endTag.TagName == "template"))
            {
                BaseInsertionModeState nextState = InHeadInsertionModeState.Instance.ProcessToken(tokenizer, queue, token, doc);
                if (nextState != InHeadInsertionModeState.Instance)
                {
                    return nextState;
                }
                return this;
            }

            if (startTag != null && startTag.TagName == "body")
            {
                ReportParseError();
                //TODO - If the second element on the stack of open elements is not a body element, if the stack of open elements
                //TODO - has only one node on it, or if there is a template element on the stack of open elements, then ignore the
                //TODO - token. (fragment case)
                //TODO - Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute on the token, check to see if
                //TODO - the attribute is already present on the body element (the second element) on the stack of open elements,
                //TODO - and if it is not, add the attribute and its corresponding value to that element.
                return this;
            }

            if (startTag != null && startTag.TagName == "frameset")
            {
                ReportParseError();
                //TODO - If the stack of open elements has only one node on it, or if the second element on the stack of open
                //TODO - elements is not a body element, then ignore the token. (fragment case)

                //TODO - If the frameset-ok flag is set to "not ok", ignore the token.
                //TODO - Otherwise, run the following steps:
                //TODO -     1. Remove the second element on the stack of open elements from its parent node, if it has one.
                //TODO -     2. Pop all the nodes from the bottom of the stack of open elements, from the current node up to,
                //TODO -        but not including, the root html element.
                //TODO -     3. Insert an HTML element for the token.
                //TODO -     4. Switch the insertion mode to "in frameset".
                return this;
            }

            if (token is EndOfFileToken)
            {
                //TODO - If there is a node in the stack of open elements that is not either a dd element, a dt element,
                //TODO - an li element, a p element, a tbody element, a td element, a tfoot element, a th element,
                //TODO - a thead element, a tr element, the body element, or the html element, then this is a parse error.

                //TODO - If the stack of template insertion modes is not empty, then process the token using the rules for
                //TODO - the "in template" insertion mode.

                //       Otherwise, stop parsing.
                base.StopParsing();
                return null;
            }

            if (endTag != null && endTag.TagName == "body")
            {
                return ProcessEndTagBodyOrHtml();
            }

            if (endTag != null && endTag.TagName == "html")
            {
                BaseInsertionModeState nextState = ProcessEndTagBodyOrHtml();
                queue.EnqueueTokenForReprocessing(token);
                return nextState;
            }

            if (startTag != null && (
                    startTag.TagName == "address" ||
                    startTag.TagName == "article" ||
                    startTag.TagName == "aside" ||
                    startTag.TagName == "blockquote" ||
                    startTag.TagName == "center" ||
                    startTag.TagName == "details" ||
                    startTag.TagName == "dialog" ||
                    startTag.TagName == "dir" ||
                    startTag.TagName == "div" ||
                    startTag.TagName == "dl" ||
                    startTag.TagName == "fieldset" ||
                    startTag.TagName == "figcaption" ||
                    startTag.TagName == "figure" ||
                    startTag.TagName == "footer" ||
                    startTag.TagName == "header" ||
                    startTag.TagName == "hgroup" ||
                    startTag.TagName == "main" ||
                    startTag.TagName == "nav" ||
                    startTag.TagName == "ol" ||
                    startTag.TagName == "p" ||
                    startTag.TagName == "section" ||
                    startTag.TagName == "summary" ||
                    startTag.TagName == "ul"))
            {
                //TODO - If the stack of open elements has a p element in button scope, then close a p element. (http://www.w3.org/TR/html5/syntax.html#close-a-p-element)
                //       Insert an HTML element for the token.
                InsertHtmlElement(startTag, doc);
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "h1" ||
                    startTag.TagName == "h2" ||
                    startTag.TagName == "h3" ||
                    startTag.TagName == "h4" ||
                    startTag.TagName == "h5" ||
                    startTag.TagName == "h6"))
            {
                //TODO - If the stack of open elements has a p element in button scope, then close a p element. (http://www.w3.org/TR/html5/syntax.html#close-a-p-element)
                //TODO - If the current node is an HTML element whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6",
                //TODO - then this is a parse error; pop the current node off the stack of open elements.
                InsertHtmlElement(startTag, doc);
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "pre" ||
                    startTag.TagName == "listing"))
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "form")
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "li")
            {
                //TODO
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "dd" ||
                    startTag.TagName == "dt"))
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "plaintext")
            {
                //TODO - If the stack of open elements has a p element in button scope, then close a p element. (http://www.w3.org/TR/html5/syntax.html#close-a-p-element)
                //       Insert an HTML element for the token.
                InsertHtmlElement(startTag, doc);
                //       Switch the tokenizer to the PLAINTEXT state.
                tokenizer.SetNextState(WebEngineSharp.Tokenizer.States.PLAINTEXTState.Instance);
                //TODO - NOTE: Once a start tag with the tag name "plaintext" has been seen, that will be the last
                //TODO - token ever seen other than character tokens (and the end-of-file token), because there is
                //TODO - no way to switch out of the PLAINTEXT state.
                return this;
            }

            if (startTag != null && startTag.TagName == "button")
            {
                //TODO
                return this;
            }

            if (endTag != null && (
                    endTag.TagName == "address" ||
                    endTag.TagName == "article" ||
                    endTag.TagName == "aside" ||
                    endTag.TagName == "blockquote" ||
                    endTag.TagName == "button" ||
                    endTag.TagName == "center" ||
                    endTag.TagName == "details" ||
                    endTag.TagName == "dialog" ||
                    endTag.TagName == "dir" ||
                    endTag.TagName == "div" ||
                    endTag.TagName == "dl" ||
                    endTag.TagName == "fieldset" ||
                    endTag.TagName == "figcaption" ||
                    endTag.TagName == "figure" ||
                    endTag.TagName == "footer" ||
                    endTag.TagName == "header" ||
                    endTag.TagName == "hgroup" ||
                    endTag.TagName == "listing" ||
                    endTag.TagName == "main" ||
                    endTag.TagName == "nav" ||
                    endTag.TagName == "ol" ||
                    endTag.TagName == "pre" ||
                    endTag.TagName == "section" ||
                    endTag.TagName == "summary" ||
                    endTag.TagName == "ul"))
            {
                //TODO
                return this;
            }

            if (endTag != null && endTag.TagName == "form")
            {
                //TODO
                return this;
            }

            if (endTag != null && endTag.TagName == "p")
            {
                //TODO
                return this;
            }

            if (endTag != null && endTag.TagName == "li")
            {
                //TODO
                return this;
            }

            if (endTag != null && (
                    endTag.TagName == "dd" ||
                    endTag.TagName == "dt"))
            {
                //TODO
                return this;
            }

            if (endTag != null && (
                    endTag.TagName == "h1" ||
                    endTag.TagName == "h2" ||
                    endTag.TagName == "h3" ||
                    endTag.TagName == "h4" ||
                    endTag.TagName == "h5" ||
                    endTag.TagName == "h6"))
            {
                //TODO
                return this;
            }

            if (endTag != null && endTag.TagName == "sarcasm")
            {
                //TODO - Take a deep breath, then act as described in the "any other end tag" entry below.
                return this;
            }

            if (startTag != null && startTag.TagName == "a")
            {
                //TODO
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "b" ||
                    startTag.TagName == "big" ||
                    startTag.TagName == "code" ||
                    startTag.TagName == "em" ||
                    startTag.TagName == "font" ||
                    startTag.TagName == "i" ||
                    startTag.TagName == "s" ||
                    startTag.TagName == "small" ||
                    startTag.TagName == "strike" ||
                    startTag.TagName == "strong" ||
                    startTag.TagName == "tt" ||
                    startTag.TagName == "u"))
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "nobr")
            {
                //TODO
                return this;
            }

            if (endTag != null && (
                    endTag.TagName == "a" ||
                    endTag.TagName == "b" ||
                    endTag.TagName == "big" ||
                    endTag.TagName == "code" ||
                    endTag.TagName == "em" ||
                    endTag.TagName == "font" ||
                    endTag.TagName == "i" ||
                    endTag.TagName == "nobr" ||
                    endTag.TagName == "s" ||
                    endTag.TagName == "small" ||
                    endTag.TagName == "strike" ||
                    endTag.TagName == "strong" ||
                    endTag.TagName == "tt" ||
                    endTag.TagName == "u"))
            {
                //TODO - Run the adoption agency algorithm (http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm) for the token's tag name.
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "applet" ||
                    startTag.TagName == "marquee" ||
                    startTag.TagName == "object"))
            {
                //TODO
                return this;
            }

            if (endTag != null && (
                    endTag.TagName == "applet" ||
                    endTag.TagName == "marquee" ||
                    endTag.TagName == "object"))
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "table")
            {
                //TODO
                return this;
            }

            if (endTag != null && endTag.TagName == "br")
            {
                //TODO
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "area" ||
                    startTag.TagName == "br" ||
                    startTag.TagName == "embed" ||
                    startTag.TagName == "img" ||
                    startTag.TagName == "keygen" ||
                    startTag.TagName == "wbr"))
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "input")
            {
                //TODO
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "param" ||
                    startTag.TagName == "source" ||
                    startTag.TagName == "track"))
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "hr")
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "image")
            {
                //Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.)
                ReportParseError();
                startTag.TagName = "img";
                queue.EnqueueTokenForReprocessing(startTag);
                return this;
            }

            if (startTag != null && startTag.TagName == "isindex")
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "textarea")
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "xmp")
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "iframe")
            {
                //TODO - Set the frameset-ok flag (http://www.w3.org/TR/html5/syntax.html#frameset-ok-flag) to "not ok".
                //TODO - Follow the generic raw text element parsing algorithm. (http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm)
                return this;
            }

            if (startTag != null &&
                (startTag.TagName == "noembed" ||
                (startTag.TagName == "noscript" && tokenizer.ScriptingEnabled)))
            {
                //TODO - Follow the generic raw text element parsing algorithm. (http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm)
                return this;
            }

            if (startTag != null && startTag.TagName == "select")
            {
                //TODO
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "optgroup" ||
                    startTag.TagName == "option"))
            {
                //TODO
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "rb" ||
                    startTag.TagName == "rp" ||
                    startTag.TagName == "rtc"))
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "rt")
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "math")
            {
                //TODO
                return this;
            }

            if (startTag != null && startTag.TagName == "svg")
            {
                //TODO
                return this;
            }

            if (startTag != null && (
                    startTag.TagName == "caption" ||
                    startTag.TagName == "col" ||
                    startTag.TagName == "colgroup" ||
                    startTag.TagName == "frame" ||
                    startTag.TagName == "head" ||
                    startTag.TagName == "tbody" ||
                    startTag.TagName == "td" ||
                    startTag.TagName == "tfoot" ||
                    startTag.TagName == "th" ||
                    startTag.TagName == "thead" ||
                    startTag.TagName == "tr"))
            {
                ReportParseError();
                return this;
            }

            if (startTag != null)
            {
                //TODO - Reconstruct the active formatting elements, if any.
                //       Insert an HTML element for the token.
                InsertHtmlElement(startTag, doc);
                //NOTE: This element will be an ordinary element.
                return this;
            }

            if (endTag != null)
            {
                //TODO
                return this;
            }

            return this;
        }
Exemplo n.º 10
0
        private Uri TryParseContext(ITokenQueue tokens)
        {
            IToken next = tokens.Dequeue();
            if (next.TokenType == Token.DOT)
            {
                return null;
            }
            else
            {
                INode context;
                switch (next.TokenType)
                {
                    case Token.BLANKNODEWITHID:
                        context = new BlankNode(null, next.Value.Substring(2));
                        break;
                    case Token.URI:
                        context = new UriNode(null, new Uri(next.Value));
                        break;
                    case Token.LITERAL:
                        //Check for Datatype/Language
                        IToken temp = tokens.Peek();
                        if (temp.TokenType == Token.LANGSPEC)
                        {
                            tokens.Dequeue();
                            context = new LiteralNode(null, next.Value, temp.Value);
                        }
                        else if (temp.TokenType == Token.DATATYPE)
                        {
                            tokens.Dequeue();
                            context = new LiteralNode(null, next.Value, new Uri(temp.Value.Substring(1, temp.Value.Length - 2)));
                        }
                        else
                        {
                            context = new LiteralNode(null, next.Value);
                        }
                        break;
                    default:
                        throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a Blank Node/Literal/URI as the Context of the Triple", next);
                }

                //Ensure we then see a . to terminate the Quad
                next = tokens.Dequeue();
                if (next.TokenType != Token.DOT)
                {
                    throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a Dot Token (Line Terminator) to terminate a Triple", next);
                }

                //Finally return the Context URI
                if (context.NodeType == NodeType.Uri)
                {
                    return ((IUriNode)context).Uri;
                }
                else if (context.NodeType == NodeType.Blank)
                {
                    return new Uri("nquads:bnode:" + context.GetHashCode());
                }
                else if (context.NodeType == NodeType.Literal)
                {
                    return new Uri("nquads:literal:" + context.GetHashCode());
                }
                else
                {
                    throw ParserHelper.Error("Cannot turn a Node of type '" + context.GetType().ToString() + "' into a Context URI for a Triple", next);
                }
            }
        }
Exemplo n.º 11
0
 private IToken TryParsePredicate(ITokenQueue tokens)
 {
     IToken next = tokens.Dequeue();
     switch (next.TokenType)
     {
         case Token.URI:
             //OK
             return next;
         default:
             throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a URI as the Predicate of a Triple", next);
     }
 }
Exemplo n.º 12
0
        private void Parse(IRdfHandler handler, ITokenQueue tokens)
        {
            IToken next;
            IToken s, p, o;

            try
            {
                handler.StartRdf();

                //Expect a BOF token at start
                next = tokens.Dequeue();
                if (next.TokenType != Token.BOF)
                {
                    throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a BOF token at the start of the input", next);
                }

                do
                {
                    next = tokens.Peek();
                    if (next.TokenType == Token.EOF) return;

                    s = this.TryParseSubject(tokens);
                    p = this.TryParsePredicate(tokens);
                    o = this.TryParseObject(tokens);
                    Uri context = this.TryParseContext(tokens);

                    this.TryParseTriple(handler, s, p, o, context);

                    next = tokens.Peek();
                } while (next.TokenType != Token.EOF);

                handler.EndRdf(true);
            }
            catch (RdfParsingTerminatedException)
            {
                handler.EndRdf(true);
                //Discard this - it justs means the Handler told us to stop
            }
            catch
            {
                handler.EndRdf(false);
                throw;
            }
        }
        public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            if (IsWhitespace(token))
            {
                InsertCharacter((CharacterToken)token, doc);
                return this;
            }

            CommentToken commentToken = token as CommentToken;
            if (commentToken != null)
            {
                InsertComment(commentToken, doc);
                return this;
            }

            if (token is DocTypeToken)
            {
                ReportParseError();
                return this;
            }

            StartTagToken startTagToken = token as StartTagToken;
            if (startTagToken != null)
            {
                switch (startTagToken.TagName)
                {
                    case "html":
                        //TODO - Process the token using the rules for the "in body" insertion mode.
                        break;

                    case "base":
                    case "basefont":
                    case "bgsound":
                    case "link":
                        //TODO - Insert an HTML element for the token. Immediately pop the current node off the stack of open elements.
                        //TODO - Acknowledge the token's self-closing flag, if it is set.
                        break;

                    case "meta":
                        //TODO - Insert an HTML element for the token. Immediately pop the current node off the stack of open elements.
                        //TODO - Acknowledge the token's self-closing flag, if it is set.
                        //TODO - If the element has a charset attribute, and getting an encoding from its value results in a supported ASCII-compatible character encoding or a UTF-16 encoding, and the confidence is currently tentative, then change the encoding to the resulting encoding.
                        //TODO - Otherwise, if the element has an http-equiv attribute whose value is an ASCII case-insensitive match for the string "Content-Type", and the element has a content attribute, and applying the algorithm for extracting a character encoding from a meta element to that attribute's value returns a supported ASCII-compatible character encoding or a UTF-16 encoding, and the confidence is currently tentative, then change the encoding to the extracted encoding.
                        break;

                    case "title":
                        //Follow the generic RCDATA element parsing algorithm. (http://www.w3.org/TR/html5/syntax.html#generic-rcdata-element-parsing-algorithm)
                        //IHTMLTitleElement titleElement = (IHTMLTitleElement)HtmlElementFactory.Instance.CreateElement("title", doc);
                        InsertHtmlElement(startTagToken, doc);
                        //doc.appendChild(titleElement); // TODO - Follow insert HTML element algorithm (http://www.w3.org/TR/html5/syntax.html#insert-an-html-element)
                        tokenizer.SetNextState(RCDATAState.Instance);
                        TreeConstruction.Instance.SaveCurrentInsertionModeState();
                        return TextInsertionModeState.Instance;

                    case "noframes":
                    case "style":
                        //TODO - Follow the generic raw text element parsing algorithm.
                        break;

                    case "noscript":
                        //TODO - If scripting flag (http://www.w3.org/TR/html5/syntax.html#scripting-flag) is enabled:
                        //TODO -     Follow the generic raw text element parsing algorithm.
                        //TODO - Else, if the flag is disabled:
                        //TODO -     Insert an HTML element for the token.
                        //TODO -     return InHeadNoScriptInsertionModeState.Instance;
                        break;

                    case "script":
                        //TODO - Run these steps:
                        //TODO - 1. Let the adjusted insertion location be the appropriate place for inserting a node.
                        //TODO - 2. Create an element for the token in the HTML namespace, with the intended parent being the element in which the adjusted insertion location finds itself.
                        //TODO - 3. Mark the element as being "parser-inserted" and unset the element's "force-async" flag.
                        //TODO -
                        //TODO -    NOTE: This ensures that, if the script is external, any document.write() calls in the script will execute in-line, instead of blowing the document away,
                        //TODO -          as would happen in most other cases. It also prevents the script from executing until the end tag is seen.
                        //TODO -
                        //TODO - 4. If the parser was originally created for the HTML fragment parsing algorithm, then mark the script element as "already started". (fragment case)
                        //TODO - 5. Insert the newly created element at the adjusted insertion location.
                        //TODO - 6. Push the element onto the stack of open elements so that it is the new current node.
                        //TODO - 7. Switch the tokenizer to the script data state.
                        //TODO - 8. Let the original insertion mode be the current insertion mode.
                        //       9. Switch the insertion mode to "text".
                        return TextInsertionModeState.Instance;

                    case "template":
                        //TODO - Insert an HTML element for the token.
                        //TODO - Insert a marker at the end of the list of active formatting elements.
                        //TODO - Set the frameset-ok flag to "not ok".
                        //TODO - Switch the insertion mode to "in template".
                        //TODO - Push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode.
                        break;

                    case "head":
                        ReportParseError();
                        return this;
                }
            }

            EndTagToken endTagToken = token as EndTagToken;
            if (endTagToken != null)
            {
                switch (endTagToken.TagName)
                {
                    case "head":
                        //TODO - Pop the current node (which will be the head element) off the stack of open elements.
                        //TODO - Switch the insertion mode to "after head".
                        break;

                    case "body":
                    case "html":
                    case "br":
                        // Act as described in the "anything else" entry below.
                        break;

                    case "template":
                        //TODO - If there is no template element on the stack of open elements, then this is a parse error; ignore the token.
                        //TODO - Otherwise, run these steps:
                        //TODO - 1. Generate implied end tags.
                        //TODO - 2. If the current node is not a template element, then this is a parse error.
                        //TODO - 3. Pop elements from the stack of open elements until a template element has been popped from the stack.
                        //TODO - 4. Clear the list of active formatting elements up to the last marker.
                        //TODO - 5. Pop the current template insertion mode off the stack of template insertion modes.
                        //TODO - 6. Reset the insertion mode appropriately.
                        break;
                }
            }

            // Anything else:
            // Pop the current node (which will be the head element) off the stack of open elements.
            // Switch the insertion mode to "after head".
            // Reprocess the token.
            TreeConstruction.Instance.StackOfOpenElements.Pop();
            queue.EnqueueTokenForReprocessing(token);
            return AfterHeadInsertionModeState.Instance;
        }
 /// <summary>
 /// Creates a new Query Parser Context from the given Token Queue
 /// </summary>
 /// <param name="tokens">Token Queue</param>
 protected internal SparqlQueryParserContext(ITokenQueue tokens)
     : base(new NullHandler(), null)
 {
     this._queue = tokens;
     this._query = new SparqlQuery(true);
 }
        public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            if (token is DocTypeToken)
            {
                ReportParseError();
                return this;
            }

            CommentToken commentToken = token as CommentToken;
            if (commentToken != null)
            {
                IComment commentNode = new Comment(doc, commentToken.Comment);
                doc.appendChild(commentNode);
                return this;
            }

            if (IsWhitespace(token))
            {
                return this;
            }

            // * A start tag whose tag name is "html":
            StartTagToken startTagToken = token as StartTagToken;
            if (startTagToken != null && startTagToken.TagName == "html")
            {

                //   Create an element for the token (http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token)
                //   in the HTML namespace, with the Document as the intended parent.
                //   Append it to the Document object. Put this element in the stack of open elements.
                //
                //   If the Document is being loaded as part of navigation of a browsing context,
                //   then: if the newly created element has a manifest attribute whose value is not the empty string,
                //   then resolve the value of that attribute to an absolute URL, relative to the newly created element,
                //   and if that is successful, run the application cache selection algorithm with the result of applying
                //   the URL serializer algorithm to the resulting parsed URL with the exclude fragment flag set; otherwise,
                //   if there is no such attribute, or its value is the empty string, or resolving its value fails, run the
                //   application cache selection algorithm with no manifest. The algorithm must be passed the Document object.

                // If the Document is being loaded as part of navigation of a browsing context:
                // RunApplicationCacheSelectionAlgorithm();
            }

            // * An end tag whose tag name is one of: "head", "body", "html", "br"
            //   Act as described in the "anything else" entry below.
            //
            // * Any other end tag
            //   Parse error. Ignore the token.
            EndTagToken endTagToken = token as EndTagToken;
            if (endTagToken != null &&
                endTagToken.TagName != "head" &&
                endTagToken.TagName != "body" &&
                endTagToken.TagName != "html" &&
                endTagToken.TagName != "br")
            {
                ReportParseError();
                return this;
            }

            // * Anything else
            //   Create an html element whose ownerDocument is the Document object. Append it to the Document object.
            //   Put this element in the stack of open elements.
            //
            //   If the Document is being loaded as part of navigation of a browsing context,
            //   then: run the application cache selection algorithm with no manifest, passing it the Document object.
            //
            //   Switch the insertion mode to "before head", then reprocess the token.
            //
            // The root element can end up being removed from the Document object, e.g. by scripts; nothing in particular
            // happens in such cases, content continues being appended to the nodes as described in the next section.

            HTMLElement htmlElement = new HTMLElement(doc, ((TagToken)token).TagName);
            doc.appendChild(htmlElement);
            TreeConstruction.Instance.StackOfOpenElements.Push(htmlElement);

            // If the Document is being loaded as part of navigation of a browsing context:
            // RunApplicationCacheSelectionAlgorithm();

            return BeforeHeadInsertionModeState.Instance;
        }
        public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            if (IsWhitespace(token))
            {
                return this;
            }

            CommentToken commentToken = token as CommentToken;
            if (commentToken != null)
            {
                IComment commentNode = new Comment(doc, commentToken.Comment);
                doc.appendChild(commentNode);
                return this;
            }

            DocTypeToken docTypeToken = token as DocTypeToken;
            if (docTypeToken != null)
            {
                if (docTypeToken.Name != "html" ||
                    docTypeToken.PublicIdentifier != null ||
                    (docTypeToken.SystemIdentifier != null && docTypeToken.SystemIdentifier != "about:legacy-compat"))
                {
                    if (docTypeToken.Name == "html")
                    {
                        string pair = string.Concat("<", docTypeToken.PublicIdentifier, ">-<", docTypeToken.SystemIdentifier, ">");
                        if (!s_allowedDoctypeSets.Contains(pair))
                        {
                            ReportParseError();
                            return this;
                        }
                    }
                }

                IDocumentType docTypeNode = new DocumentType(doc, docTypeToken.Name, docTypeToken.PublicIdentifier, docTypeToken.SystemIdentifier);
                doc.appendChild(docTypeNode);

                // Append a DocumentType node to the Document node,
                // with the name attribute set to the name given in the DOCTYPE token,
                //      or the empty string if the name was missing;
                // the publicId attribute set to the public identifier given in the DOCTYPE token,
                //      or the empty string if the public identifier was missing;
                // the systemId attribute set to the system identifier given in the DOCTYPE token,
                //      or the empty string if the system identifier was missing;

                // TODO - and the other attributes specific to DocumentType objects set to null and empty lists as appropriate.

                if (doc.nodeName != "iframe")
                {
                    if (docTypeToken.ForceQuirks ||
                        docTypeToken.Name != "html" ||
                        docTypeToken.PublicIdentifier.StartsWith(@"+//Silmaril//dtd html Pro v0r11 19970101//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//AS//DTD HTML 3.0 asWedit + extensions//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 2.0 Level 1//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 2.0 Level 2//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 2.0 Strict Level 1//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 2.0 Strict Level 2//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 2.0 Strict//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 2.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 2.1E//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 3.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 3.2 Final//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 3.2//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML 3//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Level 0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Level 1//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Level 2//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Level 3//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Strict Level 0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Strict Level 1//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Strict Level 2//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Strict Level 3//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML Strict//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//IETF//DTD HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Metrius//DTD Metrius Presentational//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Microsoft//DTD Internet Explorer 2.0 HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Microsoft//DTD Internet Explorer 2.0 Tables//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Microsoft//DTD Internet Explorer 3.0 HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Microsoft//DTD Internet Explorer 3.0 Tables//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Netscape Comm. Corp.//DTD HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Netscape Comm. Corp.//DTD Strict HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//O'Reilly and Associates//DTD HTML 2.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//O'Reilly and Associates//DTD HTML Extended 1.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Spyglass//DTD HTML 2.0 Extended//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Sun Microsystems Corp.//DTD HotJava HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 3 1995-03-24//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 3.2 Draft//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 3.2 Final//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 3.2//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 3.2S Draft//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 4.0 Frameset//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 4.0 Transitional//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML Experimental 19960712//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML Experimental 970421//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD W3 HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3O//DTD W3 HTML 3.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.Equals(@"-//W3O//DTD W3 HTML Strict 3.0//EN//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//WebTechs//DTD Mozilla HTML 2.0//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//WebTechs//DTD Mozilla HTML//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.Equals(@"-/W3C/DTD HTML 4.0 Transitional/EN", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.Equals(@"HTML", StringComparison.Ordinal) ||
                        docTypeToken.SystemIdentifier.Equals(@"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", StringComparison.Ordinal) ||
                        (docTypeToken.SystemIdentifier == null && docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 4.01 Frameset//", StringComparison.Ordinal)) ||
                        (docTypeToken.SystemIdentifier == null && docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 4.01 Transitional//", StringComparison.Ordinal)))
                    {
                        // TODO - set the Document to quirks mode
                    } else if (
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD XHTML 1.0 Frameset//", StringComparison.Ordinal) ||
                        docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD XHTML 1.0 Transitional//", StringComparison.Ordinal) ||
                        (docTypeToken.SystemIdentifier != null && docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 4.01 Frameset//", StringComparison.Ordinal)) ||
                        (docTypeToken.SystemIdentifier != null && docTypeToken.PublicIdentifier.StartsWith(@"-//W3C//DTD HTML 4.01 Transitional//", StringComparison.Ordinal)))
                    {
                        // TODO - set the Document to limited-quirks mode
                    }
                    return BeforeHTMLInsertionModeState.Instance;
                }
            }

            // TODO: If the document is not an iframe srcdoc document, then this is a parse error; set the Document to quirks mode.
            //if (this.Document.Type != "srcdoc")
            //{
            // TODO - set the Document to quirks mode
            //}

            // In any case, switch the insertion mode to "before html", then reprocess the token.
            queue.EnqueueTokenForReprocessing(token);
            return BeforeHTMLInsertionModeState.Instance;
        }
Exemplo n.º 17
0
 /// <summary>
 /// Creates a new Tokenising Parser Context with default settings
 /// </summary>
 /// <param name="g">Graph to parse into</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 public TokenisingParserContext(IGraph g, ITokeniser tokeniser)
     : base(g)
 {
     this._queue = new TokenQueue(tokeniser);
 }
        public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
        {
            if (IsWhitespace(token))
            {
                InsertCharacter((CharacterToken)token, doc);
                return this;
            }

            CommentToken commentToken = token as CommentToken;
            if (commentToken != null)
            {
                InsertComment(commentToken, doc);
                return this;
            }

            if (token is DocTypeToken)
            {
                ReportParseError();
                return this;
            }

            StartTagToken startTagToken = token as StartTagToken;
            if (startTagToken != null)
            {
                switch(startTagToken.TagName)
                {
                    case "html":
                        //TODO - Process the token using the rules for the "in body" insertion mode.
                        break;

                    case "body":
                        // TODO - Insert an HTML element for the token.
                        // TODO - Set the frameset-ok flag to "not ok".
                        return InBodyInsertionModeState.Instance;

                    case "frameset":
                        // TODO - Insert an HTML element for the token.
                        return InFramesetInsertionModeState.Instance;

                    case "base":
                    case "basefont":
                    case "bgsound":
                    case "link":
                    case "meta":
                    case "noframes":
                    case "script":
                    case "style":
                    case "template":
                    case "title":
                        ReportParseError();
                        // TODO - Push the node pointed to by the head element pointer onto the stack of open elements.
                        // TODO - Process the token using the rules for the "in head" insertion mode.
                        // TODO - Remove the node pointed to by the head element pointer from the stack of open elements. (It might not be the current node at this point.)
                        // TODO - NOTE: The head element pointer cannot be null at this point.
                        break;

                    case "head":
                        ReportParseError();
                        return this;
                }
            }
            EndTagToken endTagToken = token as EndTagToken;
            if (endTagToken != null)
            {
                switch (endTagToken.TagName)
                {
                    case "template":
                        // TODO - Process the token using the rules for the "in head" insertion mode.
                        break;

                    case "body":
                    case "html":
                    case "br":
                        // TODO - Act as described in the "anything else" entry below.
                        break;

                    default:
                        ReportParseError();
                        return this;
                }
            }

            StartTagToken dummyToken = new StartTagToken(){ TagName = "body" };
            InsertHtmlElement(dummyToken, doc);

            queue.EnqueueTokenForReprocessing(token);
            return InBodyInsertionModeState.Instance;
        }
 /// <summary>
 /// Creates a new SPARQL Query Parser Context for parsing sub-queries
 /// </summary>
 /// <param name="parent">Parent Query Parser Context</param>
 /// <param name="tokens">Tokens that need parsing to form a subquery</param>
 protected internal SparqlQueryParserContext(SparqlQueryParserContext parent, ITokenQueue tokens)
     : base(new NullHandler(), null)
 {
     this._traceParsing = parent.TraceParsing;
     this._traceTokeniser = parent.TraceTokeniser;
     this._queue = tokens;
     this._subqueryMode = true;
     this._query = new SparqlQuery(true);
     this._factories = parent.ExpressionFactories;
     this._syntax = parent.SyntaxMode;
     this._exprParser.SyntaxMode = this._syntax;
 }
 /// <summary>
 /// Creates a new Tokenising Store Parser Context with default settings
 /// </summary>
 /// <param name="store">Store to parse into</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 public TokenisingStoreParserContext(ITripleStore store, ITokeniser tokeniser)
     : base(store)
 {
     this._queue = new TokenQueue(tokeniser);
 }
 /// <summary>
 /// Creates a new Tokenising Store Parser Context with custom settings
 /// </summary>
 /// <param name="store">Store to parse into</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 /// <param name="queueMode">Tokeniser Queue Mode</param>
 /// <param name="traceParsing">Whether to trace parsing</param>
 /// <param name="traceTokeniser">Whether to trace tokenisation</param>
 public TokenisingStoreParserContext(ITripleStore store, ITokeniser tokeniser, TokenQueueMode queueMode, bool traceParsing, bool traceTokeniser)
     : base(store, traceParsing)
 {
     switch (queueMode)
     {
         case TokenQueueMode.AsynchronousBufferDuringParsing:
             this._queue = new AsynchronousBufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.SynchronousBufferDuringParsing:
             this._queue = new BufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.QueueAllBeforeParsing:
         default:
             this._queue = new TokenQueue(tokeniser);
             break;
     }
     this._traceTokeniser = traceTokeniser;
     this._queue.Tracing = this._traceTokeniser;
 }
 /// <summary>
 /// Creates a new Tokenising Store Parser Context with default settings
 /// </summary>
 /// <param name="handler">Store to parse into</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 public TokenisingStoreParserContext(IRdfHandler handler, ITokeniser tokeniser)
     : base(handler)
 {
     this._queue = new TokenQueue(tokeniser);
 }
 public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc)
 {
     throw new NotImplementedException();
 }
 /// <summary>
 /// Creates a new Tokenising Store Parser Context with custom settings
 /// </summary>
 /// <param name="handler">Store to parse into</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 /// <param name="queueMode">Tokeniser Queue Mode</param>
 public TokenisingStoreParserContext(IRdfHandler handler, ITokeniser tokeniser, TokenQueueMode queueMode)
     : base(handler)
 {
     switch (queueMode)
     {
         case TokenQueueMode.AsynchronousBufferDuringParsing:
             this._queue = new AsynchronousBufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.SynchronousBufferDuringParsing:
             this._queue = new BufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.QueueAllBeforeParsing:
         default:
             this._queue = new TokenQueue(tokeniser);
             break;
     }
 }
Exemplo n.º 25
0
        private IToken TryParseSubject(ITokenQueue tokens)
        {
            IToken next = tokens.Dequeue();
            switch (next.TokenType)
            {
                case Token.BLANKNODEWITHID:
                case Token.URI:
                    //OK
                    return next;

                default:
                    throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a Blank Node/URI as the Subject of a Triple", next);
            }
        }
Exemplo n.º 26
0
 /// <summary>
 /// Creates a new Tokenising Parser Context with default settings
 /// </summary>
 /// <param name="results">Result Set to parse into</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 public TokenisingResultParserContext(SparqlResultSet results, ITokeniser tokeniser)
     : base(results)
 {
     this._queue = new TokenQueue(tokeniser);
 }
Exemplo n.º 27
0
        private IToken TryParseObject(ITokenQueue tokens)
        {
            IToken next = tokens.Dequeue();
            switch (next.TokenType)
            {
                case Token.BLANKNODEWITHID:
                case Token.LITERALWITHDT:
                case Token.LITERALWITHLANG:
                case Token.URI:
                    //OK
                    return next;

                case Token.LITERAL:
                    //Check for Datatype/Language
                    IToken temp = tokens.Peek();
                    if (temp.TokenType == Token.DATATYPE)
                    {
                        tokens.Dequeue();
                        return new LiteralWithDataTypeToken(next, (DataTypeToken)temp);
                    }
                    else if (temp.TokenType == Token.LANGSPEC)
                    {
                        tokens.Dequeue();
                        return new LiteralWithLanguageSpecifierToken(next, (LanguageSpecifierToken)temp);
                    }
                    else
                    {
                        return next;
                    }
                default:
                    throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a Blank Node/Literal/URI as the Object of a Triple", next);
            }
        }
Exemplo n.º 28
0
 /// <summary>
 /// Creates a new Tokenising Parser Context with custom settings
 /// </summary>
 /// <param name="results">Result Set to parse into</param>
 /// <param name="tokeniser">Tokeniser to use</param>
 /// <param name="queueMode">Tokeniser Queue Mode</param>
 public TokenisingResultParserContext(SparqlResultSet results, ITokeniser tokeniser, TokenQueueMode queueMode)
     : base(results)
 {
     switch (queueMode)
     {
         case TokenQueueMode.AsynchronousBufferDuringParsing:
             this._queue = new AsynchronousBufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.SynchronousBufferDuringParsing:
             this._queue = new BufferedTokenQueue(tokeniser);
             break;
         case TokenQueueMode.QueueAllBeforeParsing:
         default:
             this._queue = new TokenQueue(tokeniser);
             break;
     }
 }
Exemplo n.º 29
0
 public abstract BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc);
Exemplo n.º 30
0
        private Uri TryParseContext(IRdfHandler handler, ITokenQueue tokens)
        {
            IToken next = tokens.Dequeue();

            if (next.TokenType == Token.DOT)
            {
                return(null);
            }
            INode context;

            switch (next.TokenType)
            {
            case Token.BLANKNODEWITHID:
                context = handler.CreateBlankNode(next.Value.Substring(2));
                break;

            case Token.URI:
                context = TryParseUri(handler, next.Value);
                break;

            case Token.LITERAL:
                if (Syntax != NQuadsSyntax.Original)
                {
                    throw new RdfParseException("Only a Blank Node/URI may be used as the graph name in RDF NQuads 1.1");
                }

                // Check for Datatype/Language
                IToken temp = tokens.Peek();
                switch (temp.TokenType)
                {
                case Token.LANGSPEC:
                    tokens.Dequeue();
                    context = handler.CreateLiteralNode(next.Value, temp.Value);
                    break;

                case Token.DATATYPE:
                    tokens.Dequeue();
                    context = handler.CreateLiteralNode(next.Value, ((IUriNode)TryParseUri(handler, temp.Value.Substring(1, temp.Value.Length - 2))).Uri);
                    break;

                default:
                    context = handler.CreateLiteralNode(next.Value);
                    break;
                }
                break;

            default:
                throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a Blank Node/Literal/URI as the Context of the Triple", next);
            }

            // Ensure we then see a . to terminate the Quad
            next = tokens.Dequeue();
            if (next.TokenType != Token.DOT)
            {
                throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a Dot Token (Line Terminator) to terminate a Triple", next);
            }

            // Finally return the Context URI
            if (context.NodeType == NodeType.Uri)
            {
                return(((IUriNode)context).Uri);
            }
            else if (context.NodeType == NodeType.Blank)
            {
                return(UriFactory.Create("nquads:bnode:" + context.GetHashCode()));
            }
            else if (context.NodeType == NodeType.Literal)
            {
                return(UriFactory.Create("nquads:literal:" + context.GetHashCode()));
            }
            else
            {
                throw ParserHelper.Error("Cannot turn a Node of type '" + context.GetType().ToString() + "' into a Context URI for a Triple", next);
            }
        }