public Document(ScannerState state) : this(state.Context) { }
public CData(ScannerState state) : this(state.Context) { }
public Text(ScannerState state) : this(state.Context) { }
public Element(ScannerState state) : this(state.Context) { }
public BaseElement(ScannerState state) : this(state.Context, new BasicData()) { }
public IHpricotDataContainer Scan(Object/*!*/ source, Hash/*!*/ options, Hash/*!*/ elementContent) { tag = new Object[1]; akey = new Object[1]; aval = new Object[1]; taint = _context.IsObjectTainted(source); bool sourceRespondsToRead = _context.RespondTo(source, "read"); RubyIOReadCallSite readIOCallSite = null; if (sourceRespondsToRead) { readIOCallSite = _readIOStorage.GetCallSite("read", 1); } else if (_context.RespondTo(source, "to_str")) { source = Protocols.CastToString(_toMutableString, source); } else { throw RubyExceptions.CreateArgumentError("bad Hpricot argument, String or IO only please."); } if (_blockParam == null) { var state = new ScannerState(_context); state.Doc = new Document(state); state.Focus = state.Doc; state.Xml = OPT(options, _optXml); state.Strict = OPT(options, _optXhtmlStrict); state.Fixup = state.Strict ? true : OPT(options, _optFixupTags); state.EC = elementContent; _context.SetInstanceVariable(state.Doc, "@options", options); _state = state; } Int32? rubyBufferSize = Utilities.GetBufferSize(_context); buffer_size = rubyBufferSize.HasValue ? rubyBufferSize.Value : DEFAULT_BUFFER_SIZE; buf = new char[buffer_size]; { cs = hpricot_scan_start; ts = -1; te = -1; act = 0; } while (!done) { p = have; int pe; int space = buffer_size - have; if (space == 0) { buffer_size += DEFAULT_BUFFER_SIZE; Array.Resize<char>(ref buf, buffer_size); space = buffer_size - have; } char[] chars; if (sourceRespondsToRead) { chars = BinaryEncoding.Instance.GetChars((readIOCallSite.Target(readIOCallSite, source, space) as MutableString).ToByteArray()); } else { MutableString str = source as MutableString; int end = Math.Min(str.Length, nread + space); chars = str.Encoding.Encoding.GetChars(str.GetBinarySlice(nread, end - nread)); } Array.Copy(chars, 0, buf, p, chars.Length); int len = chars.Length; nread += len; if (len < space) { len++; done = true; } pe = p + len; char[] data = buf; #region code generated by ragel { sbyte _klen; short _trans; byte _acts; sbyte _nacts; short _keys; if (p == pe) goto _test_eof; _resume: _acts = _hpricot_scan_from_state_actions[cs]; _nacts = _hpricot_scan_actions[_acts++]; while (_nacts-- > 0) { switch (_hpricot_scan_actions[_acts++]) { case 21: { ts = p; } break; default: break; } } _keys = _hpricot_scan_key_offsets[cs]; _trans = (short)_hpricot_scan_index_offsets[cs]; _klen = _hpricot_scan_single_lengths[cs]; if (_klen > 0) { short _lower = _keys; short _mid; short _upper = (short)(_keys + _klen - 1); while (true) { if (_upper < _lower) break; _mid = (short)(_lower + ((_upper - _lower) >> 1)); if (data[p] < _hpricot_scan_trans_keys[_mid]) _upper = (short)(_mid - 1); else if (data[p] > _hpricot_scan_trans_keys[_mid]) _lower = (short)(_mid + 1); else { _trans += (short)(_mid - _keys); goto _match; } } _keys += (short)_klen; _trans += (short)_klen; } _klen = _hpricot_scan_range_lengths[cs]; if (_klen > 0) { short _lower = _keys; short _mid; short _upper = (short)(_keys + (_klen << 1) - 2); while (true) { if (_upper < _lower) break; _mid = (short)(_lower + (((_upper - _lower) >> 1) & ~1)); if (data[p] < _hpricot_scan_trans_keys[_mid]) _upper = (short)(_mid - 2); else if (data[p] > _hpricot_scan_trans_keys[_mid + 1]) _lower = (short)(_mid + 2); else { _trans += (short)((_mid - _keys) >> 1); goto _match; } } _trans += (short)_klen; } _match: _eof_trans: cs = _hpricot_scan_trans_targs[_trans]; if (_hpricot_scan_trans_actions[_trans] == 0) goto _again; _acts = _hpricot_scan_trans_actions[_trans]; _nacts = _hpricot_scan_actions[_acts++]; while (_nacts-- > 0) { switch (_hpricot_scan_actions[_acts++]) { case 0: { if (text) { CAT(tag, p); ELE(sym_text); text = false; } attr = null; tag[0] = null; mark_tag = -1; ele_open = true; } break; case 1: { mark_tag = p; } break; case 2: { mark_aval = p; } break; case 3: { mark_akey = p; } break; case 4: { SET(tag, p); } break; case 5: { SET(aval, p); } break; case 6: { if (buf[p - 1] == '"' || buf[p - 1] == '\'') { SET(aval, p - 1); } else { SET(aval, p); } } break; case 7: { SET(akey, p); } break; case 8: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("version"), aval); } break; case 9: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("encoding"), aval); } break; case 10: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("standalone"), aval); } break; case 11: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("public_id"), aval); } break; case 12: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("system_id"), aval); } break; case 13: { akey[0] = null; aval[0] = null; mark_akey = -1; mark_aval = -1; } break; case 14: { ATTR(akey, aval); } break; case 15: { curline += 1; } break; case 16: { TEXT_PASS(); } break; case 17: { EBLK(sym_comment, 3); { cs = 204; if (true) goto _again; } } break; case 18: { EBLK(sym_cdata, 3); { cs = 204; if (true) goto _again; } } break; case 19: { EBLK(sym_procins, 2); { cs = 204; if (true) goto _again; } } break; case 22: { te = p + 1; } break; case 23: { te = p + 1; } break; case 24: { te = p + 1; { TEXT_PASS(); } } break; case 25: { te = p; p--; { TEXT_PASS(); } } break; case 26: { { p = ((te)) - 1; } { TEXT_PASS(); } } break; case 27: { te = p + 1; } break; case 28: { te = p + 1; { TEXT_PASS(); } } break; case 29: { te = p; p--; { TEXT_PASS(); } } break; case 30: { { p = ((te)) - 1; } { TEXT_PASS(); } } break; case 31: { te = p + 1; } break; case 32: { te = p + 1; { TEXT_PASS(); } } break; case 33: { te = p; p--; { TEXT_PASS(); } } break; case 34: { act = 8; } break; case 35: { act = 10; } break; case 36: { act = 12; } break; case 37: { act = 15; } break; case 38: { te = p + 1; { ELE(sym_xmldecl); } } break; case 39: { te = p + 1; { ELE(sym_doctype); } } break; case 40: { te = p + 1; { ELE(sym_stag); } } break; case 41: { te = p + 1; { ELE(sym_etag); } } break; case 42: { te = p + 1; { ELE(sym_emptytag); } } break; case 43: { te = p + 1; { { cs = 214; if (true) goto _again; } } } break; case 44: { te = p + 1; { { cs = 216; if (true) goto _again; } } } break; case 45: { te = p + 1; { TEXT_PASS(); } } break; case 46: { te = p; p--; { ELE(sym_doctype); } } break; case 47: { te = p; p--; { { cs = 218; if (true) goto _again; } } } break; case 48: { te = p; p--; { TEXT_PASS(); } } break; case 49: { { p = ((te)) - 1; } { { cs = 218; if (true) goto _again; } } } break; case 50: { { p = ((te)) - 1; } { TEXT_PASS(); } } break; case 51: { switch (act) { case 8: { { p = ((te)) - 1; } ELE(sym_doctype); } break; case 10: { { p = ((te)) - 1; } ELE(sym_stag); } break; case 12: { { p = ((te)) - 1; } ELE(sym_emptytag); } break; case 15: { { p = ((te)) - 1; } TEXT_PASS(); } break; } } break; default: break; } } _again: _acts = _hpricot_scan_to_state_actions[cs]; _nacts = _hpricot_scan_actions[_acts++]; while (_nacts-- > 0) { switch (_hpricot_scan_actions[_acts++]) { case 20: { ts = -1; } break; default: break; } } if (++p != pe) goto _resume; _test_eof: { } if (p == eof) { if (_hpricot_scan_eof_trans[cs] > 0) { _trans = (short)(_hpricot_scan_eof_trans[cs] - 1); goto _eof_trans; } } } #endregion if (cs == hpricot_scan_error) { String exceptionMessage; if (tag[0] != null) { exceptionMessage = String.Format("parse error on element <{0}>, starting on line {1}.\n{2}", tag.ToString(), curline, NO_WAY_SERIOUSLY); } else { exceptionMessage = String.Format("parse error on line {0}.\n{1}", curline, NO_WAY_SERIOUSLY); } throw new ParserException(exceptionMessage); } if (done && ele_open) { ele_open = false; if (ts > -1) { mark_tag = ts; ts = -1; text = true; } } if (ts == -1) { have = 0; /* text nodes have no ts because each byte is parsed alone */ if (mark_tag != -1 && text) { if (done) { if (mark_tag < p - 1) { CAT(tag, p - 1); ELE(sym_text); } } else { CAT(tag, p); } } mark_tag = 0; } else { have = pe - ts; Array.Copy(buf, ts, buf, 0, have); SLIDE(tag); SLIDE(akey); SLIDE(aval); te = (te - ts); ts = 0; } } if (_state != null) { return _state.Doc; } return null; }
public ProcedureInstruction(ScannerState state) : this(state.Context) { }
public XmlDeclaration(ScannerState state) : this(state.Context) { }
public BogusETag(ScannerState state) : this(state.Context) { }
public BaseElement(ScannerState state, BasicData data) : this(state.Context, data) { }
public DocumentType(ScannerState state) : this(state.Context) { }
private IHpricotDataContainer H_ELE(IHpricotDataContainer ele, ScannerState state, RubySymbol sym, MutableString tag, Object attr, Object ec, Int32 raw, Int32 rawlen) { if (ele is Element) { ElementData he = ele.GetData<ElementData>(); he.Name = 0; he.Tag = tag; he.Attr = attr; he.EC = ec; if (raw > -1 && (sym_emptytag.Equals(sym) || sym_stag.Equals(sym) || sym_etag.Equals(sym) || sym_doctype.Equals(sym))) { he.Raw = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen); } } else if (ele is DocumentType || ele is ProcedureInstruction || ele is XmlDeclaration || ele is ETag || ele is BogusETag) { AttributeData ha = ele.GetData<AttributeData>(); ha.Tag = tag; if (ele is ETag || ele is BogusETag) { if (raw > -1) { ha.Attr = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen); } } else { ha.Attr = attr; } } else { ele.GetData<BasicData>().Tag = tag; } state.Last = ele; return ele; }
private void rb_hpricot_token(ScannerState state, RubySymbol sym, MutableString tag, Object attr, int raw, int rawlen, bool taint) { Object ec = null; if (!state.Xml) { ElementData last = state.Focus.GetData<ElementData>(); if (sym_emptytag.Equals(sym) || sym_stag.Equals(sym) || sym_etag.Equals(sym)) { Debug.Assert(state.EC is Hash, "state.EC is not an instance of Hash"); if (state.EC.ContainsKey(tag)) { ec = rb_hash_lookup(state.EC, tag); } else { tag = MutableStringOps.DownCase(tag as MutableString); ec = rb_hash_aref(state.EC, tag); } } // TODO: tag.GetHashCode() == last.name.GetHashCode() ?? if (sym_CDATA.Equals(last.EC) && (!sym_procins.Equals(sym) && !sym_comment.Equals(sym) && !sym_cdata.Equals(sym) && !sym_text.Equals(sym)) && !(sym_etag.Equals(sym) && tag.GetHashCode() == last.Name.GetHashCode())) { sym = sym_text; tag = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen); } if (ec != null) { if (sym_emptytag.Equals(sym)) { if (!sym_EMPTY.Equals(ec)) { sym = sym_stag; } } else if (sym_stag.Equals(sym)) { if (sym_EMPTY.Equals(ec)) { sym = sym_emptytag; } } } } if (sym_emptytag.Equals(sym) || sym_stag.Equals(sym)) { var ele = H_ELE(new Element(state), state, sym, tag, attr, ec, raw, rawlen); ElementData he = ele.GetData<ElementData>(); he.Name = tag.GetHashCode(); if (!state.Xml) { IHpricotDataContainer match = null; IHpricotDataContainer e = state.Focus; while (e != state.Doc) { ElementData hee = e.GetData<ElementData>(); if (hee.EC is Hash) { Object has; if ((hee.EC as Hash).TryGetValue(he.Name, out has)) { if (has is bool && (bool) has == true) { if (match == null) { match = e; } } else if (symAllow.Equals(has)) { match = state.Focus; } else if (symDeny.Equals(has)) { match = null; } } } e = hee.Parent; } if (match == null) { match = state.Focus; } state.Focus = match; } rb_hpricot_add(state.Focus, ele); // // in the case of a start tag that should be empty, just // skip the step that focuses the element. focusing moves // us deeper into the document. // if (sym_stag.Equals(sym)) { if (state.Xml || !sym_EMPTY.Equals(ec)) { state.Focus = ele; state.Last = null; } } } else if (sym_etag.Equals(sym)) { int name; IHpricotDataContainer match = null; IHpricotDataContainer e = state.Focus; if (state.Strict) { Debug.Assert(state.EC is Hash, "state.EC is not an instance of Hash"); if (!state.EC.ContainsKey(tag)) { tag = MutableString.CreateAscii("div"); } } // // another optimization will be to improve this very simple // O(n) tag search, where n is the depth of the focused tag. // // (see also: the search above for fixups) // name = tag.GetHashCode(); while (e != state.Doc) { ElementData he = e.GetData<ElementData>(); if (he != null && he.Name == name) { match = e; break; } e = he.Parent; } if (match == null) { rb_hpricot_add(state.Focus, H_ELE(new BogusETag(state), state, sym, tag, attr, ec, raw, rawlen)); } else { var ele = H_ELE(new ETag(state), state, sym, tag, attr, ec, raw, rawlen); ElementData he = match.GetData<ElementData>(); // TODO: couldn't find this in the original implementation but it still sounds right. he.ETag = ele; state.Focus = he.Parent; state.Last = null; } } else if (sym_cdata.Equals(sym)) { rb_hpricot_add(state.Focus, H_ELE(new CData(state), state, sym, tag, attr, ec, raw, rawlen)); } else if (sym_comment.Equals(sym)) { rb_hpricot_add(state.Focus, H_ELE(new Comment(state), state, sym, tag, attr, ec, raw, rawlen)); } else if (sym_doctype.Equals(sym)) { if (state.Strict) { // TODO: need to check if attr is really an Hash instance Debug.Assert(attr is Hash, "attr is not an instance of Hash"); (attr as Hash).Add(state.Context.CreateAsciiSymbol("system_id"), MutableString.CreateAscii("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")); (attr as Hash).Add(state.Context.CreateAsciiSymbol("public_id"), MutableString.CreateAscii("-//W3C//DTD XHTML 1.0 Strict//EN")); } rb_hpricot_add(state.Focus, H_ELE(new DocumentType(state), state, sym, tag, attr, ec, raw, rawlen)); } else if (sym_procins.Equals(sym)) { Debug.Assert(tag is MutableString, "tag is not an instance of MutableString"); MatchData match = Utilities.ProcessInstructionParser.Match(RubyEncoding.Binary, tag as MutableString); Debug.Assert(match.GroupSuccess(0) && match.GroupCount == 3, "ProcInsParse failed to parse procins"); tag = match.GetGroupValue(1); attr = match.GetGroupValue(2); rb_hpricot_add(state.Focus, H_ELE(new ProcedureInstruction(state), state, sym, tag, attr, ec, raw, rawlen)); } else if (sym_text.Equals(sym)) { // TODO: add raw_string as well? if (state.Last != null && state.Last is Text) { BasicData he = state.Last.GetData<BasicData>(); Debug.Assert(tag is MutableString, "tag is not an instance of MutableString"); Debug.Assert(he.Tag is MutableString, "he.Tag is not an instance of MutableString"); (he.Tag as MutableString).Append(tag as MutableString); } else { rb_hpricot_add(state.Focus, H_ELE(new Text(state), state, sym, tag, attr, ec, raw, rawlen)); } } else if (sym_xmldecl.Equals(sym)) { rb_hpricot_add(state.Focus, H_ELE(new XmlDeclaration(state), state, sym, tag, attr, ec, raw, rawlen)); } }
public ETag(ScannerState state) : this(state.Context) { }