public void OnStartParsing(MiniParser parser) { }
public void OnEndParsing(MiniParser parser) { }
public void Parse(IReader reader, IHandler handler) { if (reader == null) { throw new ArgumentNullException("reader"); } if (handler == null) { handler = new HandlerAdapter(); } AttrListImpl attrList = new AttrListImpl(); string lastAttrName = null; Stack tagStack = new Stack(); string elementName = null; line = 1; col = 0; int currCh = 0; int stateCode = 0; StringBuilder sbChars = new StringBuilder(); bool seenCData = false; bool isComment = false; bool isDTD = false; int bracketSwitch = 0; handler.OnStartParsing(this); while (true) { ++this.col; currCh = reader.Read(); if (currCh == -1) { if (stateCode != 0) { FatalErr("Unexpected EOF"); } break; } int charCode = "<>/?=&'\"![ ]\t\r\n".IndexOf((char)currCh) & 0xF; if (charCode == (int)CharKind.CR) { continue; // ignore } // whitepace ::= (#x20 | #x9 | #xd | #xa)+ if (charCode == (int)CharKind.TAB) { charCode = (int)CharKind.SPACE; // tab == space } if (charCode == (int)CharKind.EOL) { this.col = 0; this.line++; charCode = (int)CharKind.SPACE; } int actionCode = MiniParser.Xlat(charCode, stateCode); stateCode = actionCode & 0xFF; // Ignore newline inside attribute value. if (currCh == '\n' && (stateCode == 0xE || stateCode == 0xF)) { continue; } actionCode >>= 8; if (stateCode >= 0x80) { if (stateCode == 0xFF) { FatalErr("State dispatch error."); } else { FatalErr(errors[stateCode ^ 0x80]); } } switch (actionCode) { case (int)ActionCode.START_ELEM: handler.OnStartElement(elementName, attrList); if (currCh != '/') { tagStack.Push(elementName); } else { handler.OnEndElement(elementName); } attrList.Clear(); break; case (int)ActionCode.END_ELEM: elementName = sbChars.ToString(); sbChars = new StringBuilder(); string endName = null; if (tagStack.Count == 0 || elementName != (endName = tagStack.Pop() as string)) { if (endName == null) { FatalErr("Tag stack underflow"); } else { FatalErr(String.Format("Expected end tag '{0}' but found '{1}'", elementName, endName)); } } handler.OnEndElement(elementName); break; case (int)ActionCode.END_NAME: elementName = sbChars.ToString(); sbChars = new StringBuilder(); if (currCh != '/' && currCh != '>') { break; } goto case (int)ActionCode.START_ELEM; case (int)ActionCode.SET_ATTR_NAME: lastAttrName = sbChars.ToString(); sbChars = new StringBuilder(); break; case (int)ActionCode.SET_ATTR_VAL: if (lastAttrName == null) { FatalErr("Internal error."); } attrList.Add(lastAttrName, sbChars.ToString()); sbChars = new StringBuilder(); lastAttrName = null; break; case (int)ActionCode.SEND_CHARS: handler.OnChars(sbChars.ToString()); sbChars = new StringBuilder(); break; case (int)ActionCode.START_CDATA: string cdata = "CDATA["; isComment = false; isDTD = false; if (currCh == '-') { currCh = reader.Read(); if (currCh != '-') { FatalErr("Invalid comment"); } this.col++; isComment = true; twoCharBuff[0] = -1; twoCharBuff[1] = -1; } else { if (currCh != '[') { isDTD = true; bracketSwitch = 0; break; } for (int i = 0; i < cdata.Length; i++) { if (reader.Read() != cdata[i]) { this.col += i + 1; break; } } this.col += cdata.Length; seenCData = true; } break; case (int)ActionCode.END_CDATA: int n = 0; currCh = ']'; while (currCh == ']') { currCh = reader.Read(); n++; } if (currCh != '>') { for (int i = 0; i < n; i++) { sbChars.Append(']'); } sbChars.Append((char)currCh); stateCode = 0x12; } else { for (int i = 0; i < n - 2; i++) { sbChars.Append(']'); } seenCData = false; } this.col += n; break; case (int)ActionCode.ERROR: FatalErr(String.Format("Error {0}", stateCode)); break; case (int)ActionCode.STATE_CHANGE: break; case (int)ActionCode.FLUSH_CHARS_STATE_CHANGE: sbChars = new StringBuilder(); if (currCh != '<') { goto case (int)ActionCode.ACC_CHARS_STATE_CHANGE; } break; case (int)ActionCode.ACC_CHARS_STATE_CHANGE: sbChars.Append((char)currCh); break; case (int)ActionCode.ACC_CDATA: if (isComment) { if (currCh == '>' && twoCharBuff[0] == '-' && twoCharBuff[1] == '-') { isComment = false; stateCode = 0; } else { twoCharBuff[0] = twoCharBuff[1]; twoCharBuff[1] = currCh; } } else if (isDTD) { if (currCh == '<' || currCh == '>') { bracketSwitch ^= 1; } if (currCh == '>' && bracketSwitch != 0) { isDTD = false; stateCode = 0; } } else { if (this.splitCData && sbChars.Length > 0 && seenCData) { handler.OnChars(sbChars.ToString()); sbChars = new StringBuilder(); } seenCData = false; sbChars.Append((char)currCh); } break; case (int)ActionCode.PROC_CHAR_REF: currCh = reader.Read(); int cl = this.col + 1; if (currCh == '#') { // character reference int r = 10; int chCode = 0; int nDigits = 0; currCh = reader.Read(); cl++; if (currCh == 'x') { currCh = reader.Read(); cl++; r = 16; } NumberStyles style = r == 16 ? NumberStyles.HexNumber : NumberStyles.Integer; while (true) { int x = -1; if (Char.IsNumber((char)currCh) || "abcdef".IndexOf(Char.ToLower((char)currCh)) != -1) { try { x = Int32.Parse(new string((char)currCh, 1), style); } catch (FormatException) { x = -1; } } if (x == -1) { break; } chCode *= r; chCode += x; nDigits++; currCh = reader.Read(); cl++; } if (currCh == ';' && nDigits > 0) { sbChars.Append((char)chCode); } else { FatalErr("Bad char ref"); } } else { // entity reference string entityRefChars = "aglmopqstu"; // amp | apos | quot | gt | lt string entities = "&'\"><"; int pos = 0; int entIdx = 0xF; int predShift = 0; int sbLen = sbChars.Length; while (true) { if (pos != 0xF) { pos = entityRefChars.IndexOf((char)currCh) & 0xF; } if (pos == 0xF) { FatalErr(errors[7]); } sbChars.Append((char)currCh); int path = "\uFF35\u3F8F\u4F8F\u0F5F\uFF78\uE1F4\u2299\uEEFF\uEEFF\uFF4F"[pos]; int lBr = (path >> 4) & 0xF; int rBr = path & 0xF; int lPred = path >> 12; int rPred = (path >> 8) & 0xF; currCh = reader.Read(); cl++; pos = 0xF; if (lBr != 0xF && currCh == entityRefChars[lBr]) { if (lPred < 0xE) { entIdx = lPred; } // pred = lPred; predShift = 12; // left } else if (rBr != 0xF && currCh == entityRefChars[rBr]) { if (rPred < 0xE) { entIdx = rPred; } // pred = rPred; predShift = 8; // right } else if (currCh == ';') { if (entIdx != 0xF && predShift != 0 && ((path >> predShift) & 0xF) == 0xE) { break; } continue; // pos == 0xF } pos = 0; } int l = cl - this.col - 1; if ((l > 0 && l < 5) && (StrEquals("amp", sbChars, sbLen, l) || StrEquals("apos", sbChars, sbLen, l) || StrEquals("quot", sbChars, sbLen, l) || StrEquals("lt", sbChars, sbLen, l) || StrEquals("gt", sbChars, sbLen, l)) ) { sbChars.Length = sbLen; sbChars.Append(entities[entIdx]); } else { FatalErr(errors[7]); } } this.col = cl; break; default: FatalErr(String.Format("Unexpected action code - {0}.", actionCode)); break; } } // while (true) handler.OnEndParsing(this); } // Parse