Ejemplo n.º 1
0
 public void OnStartParsing(MiniParser parser)
 {
 }
Ejemplo n.º 2
0
 public void OnEndParsing(MiniParser parser)
 {
 }
Ejemplo n.º 3
0
        public void Parse(IReader reader, IHandler handler)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }
            if (handler == null)
            {
                handler = new HandlerAdapter();
            }

            AttrListImpl attrList     = new AttrListImpl();
            string       lastAttrName = null;
            Stack        tagStack     = new Stack();
            string       elementName  = null;

            line = 1;
            col  = 0;
            int           currCh        = 0;
            int           stateCode     = 0;
            StringBuilder sbChars       = new StringBuilder();
            bool          seenCData     = false;
            bool          isComment     = false;
            bool          isDTD         = false;
            int           bracketSwitch = 0;

            handler.OnStartParsing(this);

            while (true)
            {
                ++this.col;

                currCh = reader.Read();

                if (currCh == -1)
                {
                    if (stateCode != 0)
                    {
                        FatalErr("Unexpected EOF");
                    }
                    break;
                }

                int charCode = "<>/?=&'\"![ ]\t\r\n".IndexOf((char)currCh) & 0xF;
                if (charCode == (int)CharKind.CR)
                {
                    continue;                               // ignore
                }
                // whitepace ::= (#x20 | #x9 | #xd | #xa)+
                if (charCode == (int)CharKind.TAB)
                {
                    charCode = (int)CharKind.SPACE;                                // tab == space
                }
                if (charCode == (int)CharKind.EOL)
                {
                    this.col = 0;
                    this.line++;
                    charCode = (int)CharKind.SPACE;
                }

                int actionCode = MiniParser.Xlat(charCode, stateCode);
                stateCode = actionCode & 0xFF;
                // Ignore newline inside attribute value.
                if (currCh == '\n' && (stateCode == 0xE || stateCode == 0xF))
                {
                    continue;
                }
                actionCode >>= 8;

                if (stateCode >= 0x80)
                {
                    if (stateCode == 0xFF)
                    {
                        FatalErr("State dispatch error.");
                    }
                    else
                    {
                        FatalErr(errors[stateCode ^ 0x80]);
                    }
                }

                switch (actionCode)
                {
                case (int)ActionCode.START_ELEM:
                    handler.OnStartElement(elementName, attrList);
                    if (currCh != '/')
                    {
                        tagStack.Push(elementName);
                    }
                    else
                    {
                        handler.OnEndElement(elementName);
                    }
                    attrList.Clear();
                    break;

                case (int)ActionCode.END_ELEM:
                    elementName = sbChars.ToString();
                    sbChars     = new StringBuilder();
                    string endName = null;
                    if (tagStack.Count == 0 ||
                        elementName != (endName = tagStack.Pop() as string))
                    {
                        if (endName == null)
                        {
                            FatalErr("Tag stack underflow");
                        }
                        else
                        {
                            FatalErr(String.Format("Expected end tag '{0}' but found '{1}'", elementName, endName));
                        }
                    }
                    handler.OnEndElement(elementName);
                    break;

                case (int)ActionCode.END_NAME:
                    elementName = sbChars.ToString();
                    sbChars     = new StringBuilder();
                    if (currCh != '/' && currCh != '>')
                    {
                        break;
                    }
                    goto case (int)ActionCode.START_ELEM;

                case (int)ActionCode.SET_ATTR_NAME:
                    lastAttrName = sbChars.ToString();
                    sbChars      = new StringBuilder();
                    break;

                case (int)ActionCode.SET_ATTR_VAL:
                    if (lastAttrName == null)
                    {
                        FatalErr("Internal error.");
                    }
                    attrList.Add(lastAttrName, sbChars.ToString());
                    sbChars      = new StringBuilder();
                    lastAttrName = null;
                    break;

                case (int)ActionCode.SEND_CHARS:
                    handler.OnChars(sbChars.ToString());
                    sbChars = new StringBuilder();
                    break;

                case (int)ActionCode.START_CDATA:
                    string cdata = "CDATA[";
                    isComment = false;
                    isDTD     = false;

                    if (currCh == '-')
                    {
                        currCh = reader.Read();

                        if (currCh != '-')
                        {
                            FatalErr("Invalid comment");
                        }

                        this.col++;
                        isComment      = true;
                        twoCharBuff[0] = -1;
                        twoCharBuff[1] = -1;
                    }
                    else
                    {
                        if (currCh != '[')
                        {
                            isDTD         = true;
                            bracketSwitch = 0;
                            break;
                        }

                        for (int i = 0; i < cdata.Length; i++)
                        {
                            if (reader.Read() != cdata[i])
                            {
                                this.col += i + 1;
                                break;
                            }
                        }
                        this.col += cdata.Length;
                        seenCData = true;
                    }
                    break;

                case (int)ActionCode.END_CDATA:
                    int n = 0;
                    currCh = ']';

                    while (currCh == ']')
                    {
                        currCh = reader.Read();
                        n++;
                    }

                    if (currCh != '>')
                    {
                        for (int i = 0; i < n; i++)
                        {
                            sbChars.Append(']');
                        }
                        sbChars.Append((char)currCh);
                        stateCode = 0x12;
                    }
                    else
                    {
                        for (int i = 0; i < n - 2; i++)
                        {
                            sbChars.Append(']');
                        }
                        seenCData = false;
                    }

                    this.col += n;
                    break;

                case (int)ActionCode.ERROR:
                    FatalErr(String.Format("Error {0}", stateCode));
                    break;

                case (int)ActionCode.STATE_CHANGE:
                    break;

                case (int)ActionCode.FLUSH_CHARS_STATE_CHANGE:
                    sbChars = new StringBuilder();
                    if (currCh != '<')
                    {
                        goto case (int)ActionCode.ACC_CHARS_STATE_CHANGE;
                    }
                    break;

                case (int)ActionCode.ACC_CHARS_STATE_CHANGE:
                    sbChars.Append((char)currCh);
                    break;

                case (int)ActionCode.ACC_CDATA:
                    if (isComment)
                    {
                        if (currCh == '>' &&
                            twoCharBuff[0] == '-' &&
                            twoCharBuff[1] == '-')
                        {
                            isComment = false;
                            stateCode = 0;
                        }
                        else
                        {
                            twoCharBuff[0] = twoCharBuff[1];
                            twoCharBuff[1] = currCh;
                        }
                    }
                    else if (isDTD)
                    {
                        if (currCh == '<' || currCh == '>')
                        {
                            bracketSwitch ^= 1;
                        }
                        if (currCh == '>' && bracketSwitch != 0)
                        {
                            isDTD     = false;
                            stateCode = 0;
                        }
                    }
                    else
                    {
                        if (this.splitCData &&
                            sbChars.Length > 0 &&
                            seenCData)
                        {
                            handler.OnChars(sbChars.ToString());
                            sbChars = new StringBuilder();
                        }
                        seenCData = false;
                        sbChars.Append((char)currCh);
                    }
                    break;

                case (int)ActionCode.PROC_CHAR_REF:
                    currCh = reader.Read();
                    int cl = this.col + 1;
                    if (currCh == '#')
                    {        // character reference
                        int r       = 10;
                        int chCode  = 0;
                        int nDigits = 0;
                        currCh = reader.Read();
                        cl++;

                        if (currCh == 'x')
                        {
                            currCh = reader.Read();
                            cl++;
                            r = 16;
                        }

                        NumberStyles style = r == 16 ? NumberStyles.HexNumber : NumberStyles.Integer;

                        while (true)
                        {
                            int x = -1;
                            if (Char.IsNumber((char)currCh) || "abcdef".IndexOf(Char.ToLower((char)currCh)) != -1)
                            {
                                try
                                {
                                    x = Int32.Parse(new string((char)currCh, 1), style);
                                }
                                catch (FormatException) { x = -1; }
                            }
                            if (x == -1)
                            {
                                break;
                            }
                            chCode *= r;
                            chCode += x;
                            nDigits++;
                            currCh = reader.Read();
                            cl++;
                        }

                        if (currCh == ';' && nDigits > 0)
                        {
                            sbChars.Append((char)chCode);
                        }
                        else
                        {
                            FatalErr("Bad char ref");
                        }
                    }
                    else
                    {
                        // entity reference
                        string entityRefChars = "aglmopqstu";     // amp | apos | quot | gt | lt
                        string entities       = "&'\"><";

                        int pos       = 0;
                        int entIdx    = 0xF;
                        int predShift = 0;

                        int sbLen = sbChars.Length;

                        while (true)
                        {
                            if (pos != 0xF)
                            {
                                pos = entityRefChars.IndexOf((char)currCh) & 0xF;
                            }
                            if (pos == 0xF)
                            {
                                FatalErr(errors[7]);
                            }
                            sbChars.Append((char)currCh);

                            int path  = "\uFF35\u3F8F\u4F8F\u0F5F\uFF78\uE1F4\u2299\uEEFF\uEEFF\uFF4F"[pos];
                            int lBr   = (path >> 4) & 0xF;
                            int rBr   = path & 0xF;
                            int lPred = path >> 12;
                            int rPred = (path >> 8) & 0xF;
                            currCh = reader.Read();
                            cl++;
                            pos = 0xF;
                            if (lBr != 0xF && currCh == entityRefChars[lBr])
                            {
                                if (lPred < 0xE)
                                {
                                    entIdx = lPred;
                                }
                                //								pred = lPred;
                                predShift = 12;     // left
                            }
                            else if (rBr != 0xF && currCh == entityRefChars[rBr])
                            {
                                if (rPred < 0xE)
                                {
                                    entIdx = rPred;
                                }
                                //								pred = rPred;
                                predShift = 8;     // right
                            }
                            else if (currCh == ';')
                            {
                                if (entIdx != 0xF &&
                                    predShift != 0 &&
                                    ((path >> predShift) & 0xF) == 0xE)
                                {
                                    break;
                                }
                                continue;     // pos == 0xF
                            }

                            pos = 0;
                        }

                        int l = cl - this.col - 1;

                        if ((l > 0 && l < 5) &&
                            (StrEquals("amp", sbChars, sbLen, l) ||
                             StrEquals("apos", sbChars, sbLen, l) ||
                             StrEquals("quot", sbChars, sbLen, l) ||
                             StrEquals("lt", sbChars, sbLen, l) ||
                             StrEquals("gt", sbChars, sbLen, l))
                            )
                        {
                            sbChars.Length = sbLen;
                            sbChars.Append(entities[entIdx]);
                        }
                        else
                        {
                            FatalErr(errors[7]);
                        }
                    }

                    this.col = cl;
                    break;

                default:
                    FatalErr(String.Format("Unexpected action code - {0}.", actionCode));
                    break;
                }
            } // while (true)

            handler.OnEndParsing(this);
        } // Parse