示例#1
0
 /**
  * processes the tag.
  * @param start if true we are dealing with a tag that has just been opened; if false we are closing a tag.
  */
 private void ProcessTag(bool start)
 {
     if (start)
     {
         nested++;
         doc.StartElement(tag, attributes);
     }
     else
     {
         nested--;
         doc.EndElement(tag);
     }
 }
示例#2
0
 /**
  * processes the tag.
  * @param start if true we are dealing with a tag that has just been opened; if false we are closing a tag.
  */
 private void ProcessTag(bool start)
 {
     if (start)
     {
         nested++;
         doc.StartElement(tag, attributes);
     }
     else
     {
         // White spaces following new lines need to be ignored in HTML
         if (newLineHandler.IsNewLineTag(tag))
         {
             nowhite = false;
         }
         nested--;
         doc.EndElement(tag);
     }
 }
示例#3
0
        /**
        * Parses the XML document firing the events to the handler.
        * @param doc the document handler
        * @param r the document. The encoding is already resolved. The reader is not closed
        * @throws IOException on error
        */
        public static void Parse(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, TextReader reader, bool html)
        {
            Stack st = new Stack();
            int depth = 0;
            int mode = PRE;
            int c = 0;
            int quotec = '"';
            depth = 0;
            StringBuilder sb = new StringBuilder();
            StringBuilder etag = new StringBuilder();
            String tagName = null;
            String lvalue = null;
            String rvalue = null;
            Hashtable attrs = null;
            st = new Stack();
            doc.StartDocument();
            int line=1, col=0;
            bool eol = false;
            if (html)
                mode = TEXT;
            int pushBack = -1;
            while (true) {
                if (pushBack != -1) {
                    c = pushBack;
                    pushBack = -1;
                }
                else
                    c = reader.Read();
                if (c == -1)
                    break;

                // We need to map \r, \r\n, and \n to \n
                // See XML spec section 2.11
                if (c == '\n' && eol) {
                    eol = false;
                    continue;
                } else if (eol) {
                    eol = false;
                } else if (c == '\n') {
                    line++;
                    col=0;
                } else if (c == '\r') {
                    eol = true;
                    c = '\n';
                    line++;
                    col=0;
                } else {
                    col++;
                }

                if (mode == DONE) {
                    doc.EndDocument();
                    return;

                    // We are between tags collecting text.
                } else if (mode == TEXT) {
                    if (c == '<') {
                        st.Push(mode);
                        mode = START_TAG;
                        if (sb.Length > 0) {
                            doc.Text(sb.ToString());
                            sb.Length = 0;
                        }
                    } else if (c == '&') {
                        st.Push(mode);
                        mode = ENTITY;
                        etag.Length = 0;
                    } else
                        sb.Append((char)c);

                    // we are processing a closing tag: e.g. </foo>
                } else if (mode == CLOSE_TAG) {
                    if (c == '>') {
                        mode = PopMode(st);
                        tagName = sb.ToString();
                        if (html)
                            tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        depth--;
                        if (!html && depth==0)
                            mode = DONE;
                    doc.EndElement(tagName);
                    } else {
                        if (!char.IsWhiteSpace((char)c))
                            sb.Append((char)c);
                    }

                    // we are processing CDATA
                } else if (mode == CDATA) {
                    if (c == '>'
                    && sb.ToString().EndsWith("]]")) {
                        sb.Length = sb.Length-2;
                        doc.Text(sb.ToString());
                        sb.Length = 0;
                        mode = PopMode(st);
                    } else
                        sb.Append((char)c);

                    // we are processing a comment.  We are inside
                    // the <!-- .... --> looking for the -->.
                } else if (mode == COMMENT) {
                    if (c == '>'
                    && sb.ToString().EndsWith("--")) {
                        if (comment != null) {
                            sb.Length = sb.Length - 2;
                            comment.Comment(sb.ToString());
                        }
                        sb.Length = 0;
                        mode = PopMode(st);
                    } else
                        sb.Append((char)c);

                    // We are outside the root tag element
                } else if (mode == PRE) {
                    if (c == '<') {
                        mode = TEXT;
                        st.Push(mode);
                        mode = START_TAG;
                    }

                    // We are inside one of these <? ... ?>
                    // or one of these <!DOCTYPE ... >
                } else if (mode == DOCTYPE) {
                    if (c == '>') {
                        mode = PopMode(st);
                        if (mode == TEXT) mode = PRE;
                    }

                    // we have just seen a < and
                    // are wondering what we are looking at
                    // <foo>, </foo>, <!-- ... --->, etc.
                } else if (mode == START_TAG) {
                    mode = PopMode(st);
                    if (c == '/') {
                        st.Push(mode);
                        mode = CLOSE_TAG;
                    } else if (c == '?') {
                        mode = DOCTYPE;
                    } else {
                        st.Push(mode);
                        mode = OPEN_TAG;
                        tagName = null;
                        attrs = new Hashtable();
                        sb.Append((char)c);
                    }

                    // we are processing an entity, e.g. &lt;, &#187;, etc.
                } else if (mode == ENTITY) {
                    if (c == ';') {
                        mode = PopMode(st);
                        String cent = etag.ToString();
                        etag.Length = 0;
                        if (cent.StartsWith("#x")) {
                            try {
                                char ci = (char)int.Parse(cent.Substring(2), NumberStyles.AllowHexSpecifier);
                                sb.Append(ci);
                            }
                            catch  {
                                sb.Append('&').Append(cent).Append(';');
                            }
                        }
                        else if (cent.StartsWith("#")) {
                            try {
                                char ci = (char)int.Parse(cent.Substring(1));
                                sb.Append(ci);
                            }
                            catch  {
                                sb.Append('&').Append(cent).Append(';');
                            }
                        }
                        else {
                            char ce = EntitiesToUnicode.DecodeEntity(cent);
                            if (ce == '\0')
                                sb.Append('&').Append(cent).Append(';');
                            else
                            sb.Append(ce);
                        }
                    } else if ((c != '#' && (c < '0' || c > '9') && (c < 'a' || c > 'z')
                        && (c < 'A' || c > 'Z')) || etag.Length >= 7) {
                        mode = PopMode(st);
                        pushBack = c;
                        sb.Append('&').Append(etag.ToString());
                        etag.Length = 0;
                    }
                    else {
                        etag.Append((char)c);
                    }

                    // we have just seen something like this:
                    // <foo a="b"/
                    // and are looking for the final >.
                } else if (mode == SINGLE_TAG) {
                    if (tagName == null)
                        tagName = sb.ToString();
                    if (html)
                        tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                    if (c != '>')
                        Exc("Expected > for tag: <"+tagName+"/>",line,col);
                    doc.StartElement(tagName,attrs);
                    doc.EndElement(tagName);
                    if (!html && depth==0) {
                        doc.EndDocument();
                        return;
                    }
                    sb.Length = 0;
                    attrs = new Hashtable();
                    tagName = null;
                    mode = PopMode(st);

                    // we are processing something
                    // like this <foo ... >.  It could
                    // still be a <!-- ... --> or something.
                } else if (mode == OPEN_TAG) {
                    if (c == '>') {
                        if (tagName == null)
                            tagName = sb.ToString();
                        if (html)
                            tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        depth++;
                        doc.StartElement(tagName,attrs);
                        tagName = null;
                        attrs = new Hashtable();
                        mode = PopMode(st);
                    } else if (c == '/') {
                        mode = SINGLE_TAG;
                    } else if (c == '-' && sb.ToString().Equals("!-")) {
                        mode = COMMENT;
                        sb.Length = 0;
                    } else if (c == '[' && sb.ToString().Equals("![CDATA")) {
                        mode = CDATA;
                        sb.Length = 0;
                    } else if (c == 'E' && sb.ToString().Equals("!DOCTYP")) {
                        sb.Length = 0;
                        mode = DOCTYPE;
                    } else if (char.IsWhiteSpace((char)c)) {
                        tagName = sb.ToString();
                        if (html)
                            tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        mode = IN_TAG;
                    } else {
                        sb.Append((char)c);
                    }

                    // We are processing the quoted right-hand side
                    // of an element's attribute.
                } else if (mode == QUOTE) {
                    if (html && quotec == ' ' && c == '>') {
                        rvalue = sb.ToString();
                        sb.Length = 0;
                        attrs[lvalue] = rvalue;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    }
                    else if (html && quotec == ' ' && char.IsWhiteSpace((char)c)) {
                        rvalue = sb.ToString();
                        sb.Length = 0;
                        attrs[lvalue] = rvalue;
                        mode = IN_TAG;
                    }
                    else if (html && quotec == ' ') {
                        sb.Append((char)c);
                    }
                    else if (c == quotec) {
                        rvalue = sb.ToString();
                        sb.Length = 0;
                        attrs[lvalue] = rvalue;
                        mode = IN_TAG;
                        // See section the XML spec, section 3.3.3
                        // on normalization processing.
                    } else if (" \r\n\u0009".IndexOf((char)c)>=0) {
                        sb.Append(' ');
                    } else if (c == '&') {
                        st.Push(mode);
                        mode = ENTITY;
                        etag.Length = 0;
                    } else {
                        sb.Append((char)c);
                    }

                } else if (mode == ATTRIBUTE_RVALUE) {
                    if (c == '"' || c == '\'') {
                        quotec = c;
                        mode = QUOTE;
                    } else if (char.IsWhiteSpace((char)c)) {
                    } else if (html && c == '>') {
                        attrs[lvalue] = sb.ToString();
                        sb.Length = 0;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else if (html) {
                        sb.Append((char)c);
                        quotec = ' ';
                        mode = QUOTE;
                    } else {
                        Exc("Error in attribute processing",line,col);
                    }

                } else if (mode == ATTRIBUTE_LVALUE) {
                    if (char.IsWhiteSpace((char)c)) {
                        lvalue = sb.ToString();
                        if (html)
                            lvalue = lvalue.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        mode = ATTRIBUTE_EQUAL;
                    } else if (c == '=') {
                        lvalue = sb.ToString();
                        if (html)
                            lvalue = lvalue.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        mode = ATTRIBUTE_RVALUE;
                    } else if (html && c == '>') {
                        sb.Length = 0;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else {
                        sb.Append((char)c);
                    }

                } else if (mode == ATTRIBUTE_EQUAL) {
                    if (c == '=') {
                        mode = ATTRIBUTE_RVALUE;
                    } else if (char.IsWhiteSpace((char)c)) {
                    } else if (html && c == '>') {
                        sb.Length = 0;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else if (html && c == '/') {
                        sb.Length = 0;
                        mode = SINGLE_TAG;
                    } else if (html) {
                        sb.Length = 0;
                        sb.Append((char)c);
                        mode = ATTRIBUTE_LVALUE;
                    } else {
                        Exc("Error in attribute processing.",line,col);
                    }

                } else if (mode == IN_TAG) {
                    if (c == '>') {
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else if (c == '/') {
                        mode = SINGLE_TAG;
                    } else if (char.IsWhiteSpace((char)c)) {
                    } else {
                        mode = ATTRIBUTE_LVALUE;
                        sb.Append((char)c);
                    }
                }
            }
            if (html || mode == DONE) {
                if (html && mode == TEXT)
                    doc.Text(sb.ToString());
                doc.EndDocument();
            }
            else
                Exc("missing end tag",line,col);
        }