示例#1
0
 /**
  * Creates a Simple XML parser object.
  * Call Go(BufferedReader) immediately after creation.
  */
 private SimpleXMLParser(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, bool html)
 {
     this.doc     = doc;
     this.comment = comment;
     this.html    = html;
     stack        = new Stack();
     state        = html ? TEXT : UNKNOWN;
 }
示例#2
0
        /**
         * Parses the XML document firing the events to the handler.
         * @param doc the document handler
         * @param in the document. The encoding is deduced from the stream. The stream is not closed
         * @throws IOException on error
         */
        public static void Parse(ISimpleXMLDocHandler doc, Stream inp)
        {
            byte[] b4    = new byte[4];
            int    count = inp.Read(b4, 0, b4.Length);

            if (count != 4)
            {
                throw new IOException(MessageLocalization.GetComposedMessage("insufficient.length"));
            }
            String encoding = XMLUtil.GetEncodingName(b4);
            String decl     = null;

            if (encoding.Equals("UTF-8"))
            {
                StringBuilder sb = new StringBuilder();
                int           c;
                while ((c = inp.ReadByte()) != -1)
                {
                    if (c == '>')
                    {
                        break;
                    }
                    sb.Append((char)c);
                }
                decl = sb.ToString();
            }
            else if (encoding.Equals("CP037"))
            {
                MemoryStream bi = new MemoryStream();
                int          c;
                while ((c = inp.ReadByte()) != -1)
                {
                    if (c == 0x6e) // that's '>' in ebcdic
                    {
                        break;
                    }
                    bi.WriteByte((byte)c);
                }
                decl = Encoding.GetEncoding(37).GetString(bi.ToArray());//cp037 ebcdic
            }
            if (decl != null)
            {
                decl = GetDeclaredEncoding(decl);
                if (decl != null)
                {
                    encoding = decl;
                }
            }
            Parse(doc, new StreamReader(inp, IanaEncodings.GetEncodingEncoding(encoding)));
        }
示例#3
0
 /**
  * Creates a Simple XML parser object.
  * Call Go(BufferedReader) immediately after creation.
  */
 private SimpleXMLParser(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, bool html)
 {
     this.doc     = doc;
     this.comment = comment;
     this.html    = html;
     if (html)
     {
         this.newLineHandler = new HTMLNewLineHandler();
     }
     else
     {
         this.newLineHandler = new NeverNewLineHandler();
     }
     stack = new Stack <int>();
     state = html ? TEXT : UNKNOWN;
 }
示例#4
0
 public static void Parse(ISimpleXMLDocHandler doc, TextReader r)
 {
     Parse(doc, null, r, false);
 }
示例#5
0
        /**
         * Parses the XML document firing the events to the handler.
         * @param doc the document handler
         * @param r the document. The encoding is already resolved. The reader is not closed
         * @throws IOException on error
         */
        public static void Parse(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, TextReader r, bool html)
        {
            SimpleXMLParser parser = new SimpleXMLParser(doc, comment, html);

            parser.Go(r);
        }
 public static void Parse(ISimpleXMLDocHandler doc, TextReader r) {
     Parse(doc, null, r, false);
 }
 /**
 * Parses the XML document firing the events to the handler.
 * @param doc the document handler
 * @param in the document. The encoding is deduced from the stream. The stream is not closed
 * @throws IOException on error
 */    
 public static void Parse(ISimpleXMLDocHandler doc, Stream inp) {
     byte[] b4 = new byte[4];
     int count = inp.Read(b4, 0, b4.Length);
     if (count != 4)
         throw new IOException(MessageLocalization.GetComposedMessage("insufficient.length"));
     String encoding = XMLUtil.GetEncodingName(b4);
     String decl = null;
     if (encoding.Equals("UTF-8")) {
         StringBuilder sb = new StringBuilder();
         int c;
         while ((c = inp.ReadByte()) != -1) {
             if (c == '>')
                 break;
             sb.Append((char)c);
         }
         decl = sb.ToString();
     }
     else if (encoding.Equals("CP037")) {
         MemoryStream bi = new MemoryStream();
         int c;
         while ((c = inp.ReadByte()) != -1) {
             if (c == 0x6e) // that's '>' in ebcdic
                 break;
             bi.WriteByte((byte)c);
         }
         decl = Encoding.GetEncoding(37).GetString(bi.ToArray());//cp037 ebcdic
     }
     if (decl != null) {
         decl = GetDeclaredEncoding(decl);
         if (decl != null)
             encoding = decl;
     }
     Parse(doc, new StreamReader(inp, IanaEncodings.GetEncodingEncoding(encoding)));
 }
 /**
 * Parses the XML document firing the events to the handler.
 * @param doc the document handler
 * @param r the document. The encoding is already resolved. The reader is not closed
 * @throws IOException on error
 */
 public static void Parse(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, TextReader r, bool html) {
     SimpleXMLParser parser = new SimpleXMLParser(doc, comment, html);
     parser.Go(r);
 }
 /**
 * Creates a Simple XML parser object.
 * Call Go(BufferedReader) immediately after creation.
 */
 private SimpleXMLParser(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, bool html) {
     this.doc = doc;
     this.comment = comment;
     this.html = html;
     if (html) {
         this.newLineHandler = new HTMLNewLineHandler();
     } else {
         this.newLineHandler = new NeverNewLineHandler();
     }
     stack = new Stack<int>();
     state = html ? TEXT : UNKNOWN;
 }
示例#10
0
        /**
        * Parses the XML document firing the events to the handler.
        * @param doc the document handler
        * @param r the document. The encoding is already resolved. The reader is not closed
        * @throws IOException on error
        */
        public static void Parse(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, TextReader reader, bool html)
        {
            Stack st = new Stack();
            int depth = 0;
            int mode = PRE;
            int c = 0;
            int quotec = '"';
            depth = 0;
            StringBuilder sb = new StringBuilder();
            StringBuilder etag = new StringBuilder();
            String tagName = null;
            String lvalue = null;
            String rvalue = null;
            Hashtable attrs = null;
            st = new Stack();
            doc.StartDocument();
            int line=1, col=0;
            bool eol = false;
            if (html)
                mode = TEXT;
            int pushBack = -1;
            while (true) {
                if (pushBack != -1) {
                    c = pushBack;
                    pushBack = -1;
                }
                else
                    c = reader.Read();
                if (c == -1)
                    break;

                // We need to map \r, \r\n, and \n to \n
                // See XML spec section 2.11
                if (c == '\n' && eol) {
                    eol = false;
                    continue;
                } else if (eol) {
                    eol = false;
                } else if (c == '\n') {
                    line++;
                    col=0;
                } else if (c == '\r') {
                    eol = true;
                    c = '\n';
                    line++;
                    col=0;
                } else {
                    col++;
                }

                if (mode == DONE) {
                    doc.EndDocument();
                    return;

                    // We are between tags collecting text.
                } else if (mode == TEXT) {
                    if (c == '<') {
                        st.Push(mode);
                        mode = START_TAG;
                        if (sb.Length > 0) {
                            doc.Text(sb.ToString());
                            sb.Length = 0;
                        }
                    } else if (c == '&') {
                        st.Push(mode);
                        mode = ENTITY;
                        etag.Length = 0;
                    } else
                        sb.Append((char)c);

                    // we are processing a closing tag: e.g. </foo>
                } else if (mode == CLOSE_TAG) {
                    if (c == '>') {
                        mode = PopMode(st);
                        tagName = sb.ToString();
                        if (html)
                            tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        depth--;
                        if (!html && depth==0)
                            mode = DONE;
                    doc.EndElement(tagName);
                    } else {
                        if (!char.IsWhiteSpace((char)c))
                            sb.Append((char)c);
                    }

                    // we are processing CDATA
                } else if (mode == CDATA) {
                    if (c == '>'
                    && sb.ToString().EndsWith("]]")) {
                        sb.Length = sb.Length-2;
                        doc.Text(sb.ToString());
                        sb.Length = 0;
                        mode = PopMode(st);
                    } else
                        sb.Append((char)c);

                    // we are processing a comment.  We are inside
                    // the <!-- .... --> looking for the -->.
                } else if (mode == COMMENT) {
                    if (c == '>'
                    && sb.ToString().EndsWith("--")) {
                        if (comment != null) {
                            sb.Length = sb.Length - 2;
                            comment.Comment(sb.ToString());
                        }
                        sb.Length = 0;
                        mode = PopMode(st);
                    } else
                        sb.Append((char)c);

                    // We are outside the root tag element
                } else if (mode == PRE) {
                    if (c == '<') {
                        mode = TEXT;
                        st.Push(mode);
                        mode = START_TAG;
                    }

                    // We are inside one of these <? ... ?>
                    // or one of these <!DOCTYPE ... >
                } else if (mode == DOCTYPE) {
                    if (c == '>') {
                        mode = PopMode(st);
                        if (mode == TEXT) mode = PRE;
                    }

                    // we have just seen a < and
                    // are wondering what we are looking at
                    // <foo>, </foo>, <!-- ... --->, etc.
                } else if (mode == START_TAG) {
                    mode = PopMode(st);
                    if (c == '/') {
                        st.Push(mode);
                        mode = CLOSE_TAG;
                    } else if (c == '?') {
                        mode = DOCTYPE;
                    } else {
                        st.Push(mode);
                        mode = OPEN_TAG;
                        tagName = null;
                        attrs = new Hashtable();
                        sb.Append((char)c);
                    }

                    // we are processing an entity, e.g. &lt;, &#187;, etc.
                } else if (mode == ENTITY) {
                    if (c == ';') {
                        mode = PopMode(st);
                        String cent = etag.ToString();
                        etag.Length = 0;
                        if (cent.StartsWith("#x")) {
                            try {
                                char ci = (char)int.Parse(cent.Substring(2), NumberStyles.AllowHexSpecifier);
                                sb.Append(ci);
                            }
                            catch  {
                                sb.Append('&').Append(cent).Append(';');
                            }
                        }
                        else if (cent.StartsWith("#")) {
                            try {
                                char ci = (char)int.Parse(cent.Substring(1));
                                sb.Append(ci);
                            }
                            catch  {
                                sb.Append('&').Append(cent).Append(';');
                            }
                        }
                        else {
                            char ce = EntitiesToUnicode.DecodeEntity(cent);
                            if (ce == '\0')
                                sb.Append('&').Append(cent).Append(';');
                            else
                            sb.Append(ce);
                        }
                    } else if ((c != '#' && (c < '0' || c > '9') && (c < 'a' || c > 'z')
                        && (c < 'A' || c > 'Z')) || etag.Length >= 7) {
                        mode = PopMode(st);
                        pushBack = c;
                        sb.Append('&').Append(etag.ToString());
                        etag.Length = 0;
                    }
                    else {
                        etag.Append((char)c);
                    }

                    // we have just seen something like this:
                    // <foo a="b"/
                    // and are looking for the final >.
                } else if (mode == SINGLE_TAG) {
                    if (tagName == null)
                        tagName = sb.ToString();
                    if (html)
                        tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                    if (c != '>')
                        Exc("Expected > for tag: <"+tagName+"/>",line,col);
                    doc.StartElement(tagName,attrs);
                    doc.EndElement(tagName);
                    if (!html && depth==0) {
                        doc.EndDocument();
                        return;
                    }
                    sb.Length = 0;
                    attrs = new Hashtable();
                    tagName = null;
                    mode = PopMode(st);

                    // we are processing something
                    // like this <foo ... >.  It could
                    // still be a <!-- ... --> or something.
                } else if (mode == OPEN_TAG) {
                    if (c == '>') {
                        if (tagName == null)
                            tagName = sb.ToString();
                        if (html)
                            tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        depth++;
                        doc.StartElement(tagName,attrs);
                        tagName = null;
                        attrs = new Hashtable();
                        mode = PopMode(st);
                    } else if (c == '/') {
                        mode = SINGLE_TAG;
                    } else if (c == '-' && sb.ToString().Equals("!-")) {
                        mode = COMMENT;
                        sb.Length = 0;
                    } else if (c == '[' && sb.ToString().Equals("![CDATA")) {
                        mode = CDATA;
                        sb.Length = 0;
                    } else if (c == 'E' && sb.ToString().Equals("!DOCTYP")) {
                        sb.Length = 0;
                        mode = DOCTYPE;
                    } else if (char.IsWhiteSpace((char)c)) {
                        tagName = sb.ToString();
                        if (html)
                            tagName = tagName.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        mode = IN_TAG;
                    } else {
                        sb.Append((char)c);
                    }

                    // We are processing the quoted right-hand side
                    // of an element's attribute.
                } else if (mode == QUOTE) {
                    if (html && quotec == ' ' && c == '>') {
                        rvalue = sb.ToString();
                        sb.Length = 0;
                        attrs[lvalue] = rvalue;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    }
                    else if (html && quotec == ' ' && char.IsWhiteSpace((char)c)) {
                        rvalue = sb.ToString();
                        sb.Length = 0;
                        attrs[lvalue] = rvalue;
                        mode = IN_TAG;
                    }
                    else if (html && quotec == ' ') {
                        sb.Append((char)c);
                    }
                    else if (c == quotec) {
                        rvalue = sb.ToString();
                        sb.Length = 0;
                        attrs[lvalue] = rvalue;
                        mode = IN_TAG;
                        // See section the XML spec, section 3.3.3
                        // on normalization processing.
                    } else if (" \r\n\u0009".IndexOf((char)c)>=0) {
                        sb.Append(' ');
                    } else if (c == '&') {
                        st.Push(mode);
                        mode = ENTITY;
                        etag.Length = 0;
                    } else {
                        sb.Append((char)c);
                    }

                } else if (mode == ATTRIBUTE_RVALUE) {
                    if (c == '"' || c == '\'') {
                        quotec = c;
                        mode = QUOTE;
                    } else if (char.IsWhiteSpace((char)c)) {
                    } else if (html && c == '>') {
                        attrs[lvalue] = sb.ToString();
                        sb.Length = 0;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else if (html) {
                        sb.Append((char)c);
                        quotec = ' ';
                        mode = QUOTE;
                    } else {
                        Exc("Error in attribute processing",line,col);
                    }

                } else if (mode == ATTRIBUTE_LVALUE) {
                    if (char.IsWhiteSpace((char)c)) {
                        lvalue = sb.ToString();
                        if (html)
                            lvalue = lvalue.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        mode = ATTRIBUTE_EQUAL;
                    } else if (c == '=') {
                        lvalue = sb.ToString();
                        if (html)
                            lvalue = lvalue.ToLower(CultureInfo.InvariantCulture);
                        sb.Length = 0;
                        mode = ATTRIBUTE_RVALUE;
                    } else if (html && c == '>') {
                        sb.Length = 0;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else {
                        sb.Append((char)c);
                    }

                } else if (mode == ATTRIBUTE_EQUAL) {
                    if (c == '=') {
                        mode = ATTRIBUTE_RVALUE;
                    } else if (char.IsWhiteSpace((char)c)) {
                    } else if (html && c == '>') {
                        sb.Length = 0;
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else if (html && c == '/') {
                        sb.Length = 0;
                        mode = SINGLE_TAG;
                    } else if (html) {
                        sb.Length = 0;
                        sb.Append((char)c);
                        mode = ATTRIBUTE_LVALUE;
                    } else {
                        Exc("Error in attribute processing.",line,col);
                    }

                } else if (mode == IN_TAG) {
                    if (c == '>') {
                        mode = PopMode(st);
                        doc.StartElement(tagName,attrs);
                        depth++;
                        tagName = null;
                        attrs = new Hashtable();
                    } else if (c == '/') {
                        mode = SINGLE_TAG;
                    } else if (char.IsWhiteSpace((char)c)) {
                    } else {
                        mode = ATTRIBUTE_LVALUE;
                        sb.Append((char)c);
                    }
                }
            }
            if (html || mode == DONE) {
                if (html && mode == TEXT)
                    doc.Text(sb.ToString());
                doc.EndDocument();
            }
            else
                Exc("missing end tag",line,col);
        }
示例#11
0
 /**
 * Creates a Simple XML parser object.
 * Call Go(BufferedReader) immediately after creation.
 */
 private SimpleXMLParser(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, bool html)
 {
     this.doc = doc;
     this.comment = comment;
     this.html = html;
     stack = new Stack();
     state = html ? TEXT : UNKNOWN;
 }