public Document(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #2
0
 public CData(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #3
0
 public Text(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #4
0
 public Element(ScannerState state)
     : this(state.Context)
 {
 }
 public BaseElement(ScannerState state)
     : this(state.Context, new BasicData())
 {
 }
Beispiel #6
0
        public IHpricotDataContainer Scan(Object/*!*/ source, Hash/*!*/ options, Hash/*!*/ elementContent)
        {
            tag = new Object[1];
            akey = new Object[1];
            aval = new Object[1];

            taint = _context.IsObjectTainted(source);

            bool sourceRespondsToRead = _context.RespondTo(source, "read");

            RubyIOReadCallSite readIOCallSite = null;
            if (sourceRespondsToRead) {
                readIOCallSite = _readIOStorage.GetCallSite("read", 1);
            }
            else if (_context.RespondTo(source, "to_str")) {
                source = Protocols.CastToString(_toMutableString, source);
            }
            else {
                throw RubyExceptions.CreateArgumentError("bad Hpricot argument, String or IO only please.");
            }

            if (_blockParam == null) {
                var state = new ScannerState(_context);
                state.Doc = new Document(state);
                state.Focus = state.Doc;
                state.Xml = OPT(options, _optXml);
                state.Strict = OPT(options, _optXhtmlStrict);
                state.Fixup = state.Strict ? true : OPT(options, _optFixupTags);
                state.EC = elementContent;
                _context.SetInstanceVariable(state.Doc, "@options", options);
                _state = state;
            }

            Int32? rubyBufferSize = Utilities.GetBufferSize(_context);
            buffer_size = rubyBufferSize.HasValue ? rubyBufferSize.Value : DEFAULT_BUFFER_SIZE;
            buf = new char[buffer_size];

            {
                cs = hpricot_scan_start;
                ts = -1;
                te = -1;
                act = 0;
            }

            while (!done) {
                p = have;
                int pe;
                int space = buffer_size - have;

                if (space == 0) {
                    buffer_size += DEFAULT_BUFFER_SIZE;
                    Array.Resize<char>(ref buf, buffer_size);
                    space = buffer_size - have;
                }

                char[] chars;
                if (sourceRespondsToRead) {
                    chars = BinaryEncoding.Instance.GetChars((readIOCallSite.Target(readIOCallSite, source, space) as MutableString).ToByteArray());
                }
                else {
                    MutableString str = source as MutableString;
                    int end = Math.Min(str.Length, nread + space);
                    chars = str.Encoding.Encoding.GetChars(str.GetBinarySlice(nread, end - nread));
                }

                Array.Copy(chars, 0, buf, p, chars.Length);

                int len = chars.Length;
                nread += len;

                if (len < space) {
                    len++;
                    done = true;
                }

                pe = p + len;
                char[] data = buf;

                #region code generated by ragel

                {
                    sbyte _klen;
                    short _trans;
                    byte _acts;
                    sbyte _nacts;
                    short _keys;

                    if (p == pe)
                        goto _test_eof;
                _resume:
                    _acts = _hpricot_scan_from_state_actions[cs];
                    _nacts = _hpricot_scan_actions[_acts++];
                    while (_nacts-- > 0) {
                        switch (_hpricot_scan_actions[_acts++]) {
                            case 21: { ts = p; }
                                break;

                            default:
                                break;
                        }
                    }

                    _keys = _hpricot_scan_key_offsets[cs];
                    _trans = (short)_hpricot_scan_index_offsets[cs];

                    _klen = _hpricot_scan_single_lengths[cs];
                    if (_klen > 0) {
                        short _lower = _keys;
                        short _mid;
                        short _upper = (short)(_keys + _klen - 1);
                        while (true) {
                            if (_upper < _lower)
                                break;

                            _mid = (short)(_lower + ((_upper - _lower) >> 1));
                            if (data[p] < _hpricot_scan_trans_keys[_mid])
                                _upper = (short)(_mid - 1);
                            else if (data[p] > _hpricot_scan_trans_keys[_mid])
                                _lower = (short)(_mid + 1);
                            else {
                                _trans += (short)(_mid - _keys);
                                goto _match;
                            }
                        }
                        _keys += (short)_klen;
                        _trans += (short)_klen;
                    }

                    _klen = _hpricot_scan_range_lengths[cs];
                    if (_klen > 0) {
                        short _lower = _keys;
                        short _mid;
                        short _upper = (short)(_keys + (_klen << 1) - 2);
                        while (true) {
                            if (_upper < _lower)
                                break;

                            _mid = (short)(_lower + (((_upper - _lower) >> 1) & ~1));
                            if (data[p] < _hpricot_scan_trans_keys[_mid])
                                _upper = (short)(_mid - 2);
                            else if (data[p] > _hpricot_scan_trans_keys[_mid + 1])
                                _lower = (short)(_mid + 2);
                            else {
                                _trans += (short)((_mid - _keys) >> 1);
                                goto _match;
                            }
                        }
                        _trans += (short)_klen;
                    }

                _match:
                _eof_trans:
                    cs = _hpricot_scan_trans_targs[_trans];

                    if (_hpricot_scan_trans_actions[_trans] == 0)
                        goto _again;

                    _acts = _hpricot_scan_trans_actions[_trans];
                    _nacts = _hpricot_scan_actions[_acts++];
                    while (_nacts-- > 0) {
                        switch (_hpricot_scan_actions[_acts++]) {
                            case 0: {
                                    if (text) {
                                        CAT(tag, p);
                                        ELE(sym_text);
                                        text = false;
                                    }
                                    attr = null;
                                    tag[0] = null;
                                    mark_tag = -1;
                                    ele_open = true;
                                }
                                break;
                            case 1: { mark_tag = p; }
                                break;
                            case 2: { mark_aval = p; }
                                break;
                            case 3: { mark_akey = p; }
                                break;
                            case 4: { SET(tag, p); }
                                break;
                            case 5: { SET(aval, p); }
                                break;
                            case 6: {
                                    if (buf[p - 1] == '"' || buf[p - 1] == '\'') { SET(aval, p - 1); }
                                    else { SET(aval, p); }
                                }
                                break;
                            case 7: { SET(akey, p); }
                                break;
                            case 8: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("version"), aval); }
                                break;
                            case 9: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("encoding"), aval); }
                                break;
                            case 10: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("standalone"), aval); }
                                break;
                            case 11: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("public_id"), aval); }
                                break;
                            case 12: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("system_id"), aval); }
                                break;
                            case 13: {
                                    akey[0] = null;
                                    aval[0] = null;
                                    mark_akey = -1;
                                    mark_aval = -1;
                                }
                                break;
                            case 14: {
                                    ATTR(akey, aval);
                                }
                                break;
                            case 15: { curline += 1; }
                                break;
                            case 16: { TEXT_PASS(); }
                                break;
                            case 17: { EBLK(sym_comment, 3); { cs = 204; if (true) goto _again; } }
                                break;
                            case 18: { EBLK(sym_cdata, 3); { cs = 204; if (true) goto _again; } }
                                break;
                            case 19: { EBLK(sym_procins, 2); { cs = 204; if (true) goto _again; } }
                                break;
                            case 22: { te = p + 1; }
                                break;
                            case 23: { te = p + 1; }
                                break;
                            case 24: { te = p + 1; { TEXT_PASS(); } }
                                break;
                            case 25: { te = p; p--; { TEXT_PASS(); } }
                                break;
                            case 26: { { p = ((te)) - 1; } { TEXT_PASS(); } }
                                break;
                            case 27: { te = p + 1; }
                                break;
                            case 28: { te = p + 1; { TEXT_PASS(); } }
                                break;
                            case 29: { te = p; p--; { TEXT_PASS(); } }
                                break;
                            case 30: { { p = ((te)) - 1; } { TEXT_PASS(); } }
                                break;
                            case 31: { te = p + 1; }
                                break;
                            case 32: { te = p + 1; { TEXT_PASS(); } }
                                break;
                            case 33: { te = p; p--; { TEXT_PASS(); } }
                                break;
                            case 34: { act = 8; }
                                break;
                            case 35: { act = 10; }
                                break;
                            case 36: { act = 12; }
                                break;
                            case 37: { act = 15; }
                                break;
                            case 38: { te = p + 1; { ELE(sym_xmldecl); } }
                                break;
                            case 39: { te = p + 1; { ELE(sym_doctype); } }
                                break;
                            case 40: { te = p + 1; { ELE(sym_stag); } }
                                break;
                            case 41: { te = p + 1; { ELE(sym_etag); } }
                                break;
                            case 42: { te = p + 1; { ELE(sym_emptytag); } }
                                break;
                            case 43: { te = p + 1; { { cs = 214; if (true) goto _again; } } }
                                break;
                            case 44: { te = p + 1; { { cs = 216; if (true) goto _again; } } }
                                break;
                            case 45: { te = p + 1; { TEXT_PASS(); } }
                                break;
                            case 46: { te = p; p--; { ELE(sym_doctype); } }
                                break;
                            case 47: { te = p; p--; { { cs = 218; if (true) goto _again; } } }
                                break;
                            case 48: { te = p; p--; { TEXT_PASS(); } }
                                break;
                            case 49: { { p = ((te)) - 1; } { { cs = 218; if (true) goto _again; } } }
                                break;
                            case 50: { { p = ((te)) - 1; } { TEXT_PASS(); } }
                                break;
                            case 51: {
                                    switch (act) {
                                        case 8: { { p = ((te)) - 1; } ELE(sym_doctype); }
                                            break;
                                        case 10: { { p = ((te)) - 1; } ELE(sym_stag); }
                                            break;
                                        case 12: { { p = ((te)) - 1; } ELE(sym_emptytag); }
                                            break;
                                        case 15: { { p = ((te)) - 1; } TEXT_PASS(); }
                                            break;
                                    }
                                }
                                break;

                            default:
                                break;
                        }
                    }

                _again:
                    _acts = _hpricot_scan_to_state_actions[cs];
                    _nacts = _hpricot_scan_actions[_acts++];
                    while (_nacts-- > 0) {
                        switch (_hpricot_scan_actions[_acts++]) {
                            case 20: { ts = -1; }
                                break;

                            default:
                                break;
                        }
                    }

                    if (++p != pe)
                        goto _resume;
                _test_eof: { }
                    if (p == eof) {
                        if (_hpricot_scan_eof_trans[cs] > 0) {
                            _trans = (short)(_hpricot_scan_eof_trans[cs] - 1);
                            goto _eof_trans;
                        }
                    }

                }

                #endregion

                if (cs == hpricot_scan_error) {
                    String exceptionMessage;
                    if (tag[0] != null) {
                        exceptionMessage = String.Format("parse error on element <{0}>, starting on line {1}.\n{2}", tag.ToString(), curline, NO_WAY_SERIOUSLY);
                    }
                    else {
                        exceptionMessage = String.Format("parse error on line {0}.\n{1}", curline, NO_WAY_SERIOUSLY);
                    }
                    throw new ParserException(exceptionMessage);
                }

                if (done && ele_open) {
                    ele_open = false;
                    if (ts > -1) {
                        mark_tag = ts;
                        ts = -1;
                        text = true;
                    }
                }

                if (ts == -1) {
                    have = 0;
                    /* text nodes have no ts because each byte is parsed alone */
                    if (mark_tag != -1 && text) {
                        if (done) {
                            if (mark_tag < p - 1) {
                                CAT(tag, p - 1);
                                ELE(sym_text);
                            }
                        }
                        else {
                            CAT(tag, p);
                        }
                    }
                    mark_tag = 0;
                }
                else {
                    have = pe - ts;
                    Array.Copy(buf, ts, buf, 0, have);
                    SLIDE(tag);
                    SLIDE(akey);
                    SLIDE(aval);
                    te = (te - ts);
                    ts = 0;
                }
            }

            if (_state != null) {
                return _state.Doc;
            }

            return null;
        }
 public ProcedureInstruction(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #8
0
 public XmlDeclaration(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #9
0
 public BogusETag(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #10
0
 public XmlDeclaration(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #11
0
 public Element(ScannerState state)
     : this(state.Context)
 {
 }
 public ProcedureInstruction(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #13
0
 public BaseElement(ScannerState state, BasicData data)
     : this(state.Context, data)
 {
 }
Beispiel #14
0
 public DocumentType(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #15
0
        private IHpricotDataContainer H_ELE(IHpricotDataContainer ele, ScannerState state, RubySymbol sym, MutableString tag, Object attr, Object ec, Int32 raw, Int32 rawlen)
        {
            if (ele is Element) {
                ElementData he = ele.GetData<ElementData>();

                he.Name = 0;
                he.Tag = tag;
                he.Attr = attr;
                he.EC = ec;

                if (raw > -1 && (sym_emptytag.Equals(sym) || sym_stag.Equals(sym) || sym_etag.Equals(sym) || sym_doctype.Equals(sym))) {
                    he.Raw = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen);
                }
            }
            else if (ele is DocumentType || ele is ProcedureInstruction || ele is XmlDeclaration || ele is ETag || ele is BogusETag) {
                AttributeData ha = ele.GetData<AttributeData>();

                ha.Tag = tag;
                if (ele is ETag || ele is BogusETag) {
                    if (raw > -1) {
                        ha.Attr = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen);
                    }
                }
                else {
                    ha.Attr = attr;
                }
            }
            else {
                ele.GetData<BasicData>().Tag = tag;
            }

            state.Last = ele;
            return ele;
        }
Beispiel #16
0
 public CData(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #17
0
        private void rb_hpricot_token(ScannerState state, RubySymbol sym, MutableString tag, Object attr, int raw, int rawlen, bool taint)
        {
            Object ec = null;

            if (!state.Xml) {
                ElementData last = state.Focus.GetData<ElementData>();

                if (sym_emptytag.Equals(sym) || sym_stag.Equals(sym) || sym_etag.Equals(sym)) {
                    Debug.Assert(state.EC is Hash, "state.EC is not an instance of Hash");
                    if (state.EC.ContainsKey(tag)) {
                        ec = rb_hash_lookup(state.EC, tag);
                    }
                    else {
                        tag = MutableStringOps.DownCase(tag as MutableString);
                        ec = rb_hash_aref(state.EC, tag);
                    }
                }

                // TODO: tag.GetHashCode() == last.name.GetHashCode() ??
                if (sym_CDATA.Equals(last.EC) &&
                        (!sym_procins.Equals(sym) && !sym_comment.Equals(sym) && !sym_cdata.Equals(sym) && !sym_text.Equals(sym)) &&
                        !(sym_etag.Equals(sym) && tag.GetHashCode() == last.Name.GetHashCode())) {

                    sym = sym_text;
                    tag = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen);
                }

                if (ec != null) {
                    if (sym_emptytag.Equals(sym)) {
                        if (!sym_EMPTY.Equals(ec)) {
                            sym = sym_stag;
                        }
                    }
                    else if (sym_stag.Equals(sym)) {
                        if (sym_EMPTY.Equals(ec)) {
                            sym = sym_emptytag;
                        }
                    }
                }
            }

            if (sym_emptytag.Equals(sym) || sym_stag.Equals(sym)) {
                var ele = H_ELE(new Element(state), state, sym, tag, attr, ec, raw, rawlen);
                ElementData he = ele.GetData<ElementData>();
                he.Name = tag.GetHashCode();

                if (!state.Xml) {
                    IHpricotDataContainer match = null;
                    IHpricotDataContainer e = state.Focus;

                    while (e != state.Doc) {
                        ElementData hee = e.GetData<ElementData>();

                        if (hee.EC is Hash) {
                            Object has;
                            if ((hee.EC as Hash).TryGetValue(he.Name, out has)) {
                                if (has is bool && (bool) has == true) {
                                    if (match == null) {
                                        match = e;
                                    }
                                }
                                else if (symAllow.Equals(has)) {
                                    match = state.Focus;
                                }
                                else if (symDeny.Equals(has)) {
                                    match = null;
                                }
                            }
                        }

                        e = hee.Parent;
                    }

                    if (match == null) {
                        match = state.Focus;
                    }

                    state.Focus = match;
                }

                rb_hpricot_add(state.Focus, ele);

                //
                // in the case of a start tag that should be empty, just
                // skip the step that focuses the element.  focusing moves
                // us deeper into the document.
                //
                if (sym_stag.Equals(sym)) {
                    if (state.Xml || !sym_EMPTY.Equals(ec)) {
                        state.Focus = ele;
                        state.Last = null;
                    }
                }
            }
            else if (sym_etag.Equals(sym)) {
                int name;
                IHpricotDataContainer match = null;
                IHpricotDataContainer e = state.Focus;

                if (state.Strict) {
                    Debug.Assert(state.EC is Hash, "state.EC is not an instance of Hash");
                    if (!state.EC.ContainsKey(tag)) {
                        tag = MutableString.CreateAscii("div");
                    }
                }

                //
                // another optimization will be to improve this very simple
                // O(n) tag search, where n is the depth of the focused tag.
                //
                // (see also: the search above for fixups)
                //
                name = tag.GetHashCode();

                while (e != state.Doc) {
                    ElementData he = e.GetData<ElementData>();
                    if (he != null && he.Name == name) {
                        match = e;
                        break;
                    }
                    e = he.Parent;
                }

                if (match == null) {
                    rb_hpricot_add(state.Focus, H_ELE(new BogusETag(state), state, sym, tag, attr, ec, raw, rawlen));
                }
                else {
                    var ele = H_ELE(new ETag(state), state, sym, tag, attr, ec, raw, rawlen);
                    ElementData he = match.GetData<ElementData>();

                    // TODO: couldn't find this in the original implementation but it still sounds right.
                    he.ETag = ele;

                    state.Focus = he.Parent;
                    state.Last = null;
                }

            }
            else if (sym_cdata.Equals(sym)) {
                rb_hpricot_add(state.Focus, H_ELE(new CData(state), state, sym, tag, attr, ec, raw, rawlen));
            }
            else if (sym_comment.Equals(sym)) {
                rb_hpricot_add(state.Focus, H_ELE(new Comment(state), state, sym, tag, attr, ec, raw, rawlen));
            }
            else if (sym_doctype.Equals(sym)) {
                if (state.Strict) {
                    // TODO: need to check if attr is really an Hash instance
                    Debug.Assert(attr is Hash, "attr is not an instance of Hash");
                    (attr as Hash).Add(state.Context.CreateAsciiSymbol("system_id"), MutableString.CreateAscii("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
                    (attr as Hash).Add(state.Context.CreateAsciiSymbol("public_id"), MutableString.CreateAscii("-//W3C//DTD XHTML 1.0 Strict//EN"));
                }
                rb_hpricot_add(state.Focus, H_ELE(new DocumentType(state), state, sym, tag, attr, ec, raw, rawlen));
            }
            else if (sym_procins.Equals(sym)) {
                Debug.Assert(tag is MutableString, "tag is not an instance of MutableString");

                MatchData match = Utilities.ProcessInstructionParser.Match(RubyEncoding.Binary, tag as MutableString);
                Debug.Assert(match.GroupSuccess(0) && match.GroupCount == 3, "ProcInsParse failed to parse procins");

                tag = match.GetGroupValue(1);
                attr = match.GetGroupValue(2);
                rb_hpricot_add(state.Focus, H_ELE(new ProcedureInstruction(state), state, sym, tag, attr, ec, raw, rawlen));
            }
            else if (sym_text.Equals(sym)) {
                // TODO: add raw_string as well?
                if (state.Last != null && state.Last is Text) {
                    BasicData he = state.Last.GetData<BasicData>();

                    Debug.Assert(tag is MutableString, "tag is not an instance of MutableString");
                    Debug.Assert(he.Tag is MutableString, "he.Tag is not an instance of MutableString");

                    (he.Tag as MutableString).Append(tag as MutableString);
                }
                else {
                    rb_hpricot_add(state.Focus, H_ELE(new Text(state), state, sym, tag, attr, ec, raw, rawlen));
                }
            }
            else if (sym_xmldecl.Equals(sym)) {
                rb_hpricot_add(state.Focus, H_ELE(new XmlDeclaration(state), state, sym, tag, attr, ec, raw, rawlen));
            }
        }
Beispiel #18
0
 public ETag(ScannerState state)
     : this(state.Context)
 {
 }
Beispiel #19
0
 public Text(ScannerState state)
     : this(state.Context)
 {
 }