private void compile(RegexpNode prog, Regcomp comp)
        {
            _ignoreCase    = (comp._flags & Regcomp.IGNORE_CASE) != 0;
            _isGlobal      = (comp._flags & Regcomp.GLOBAL) != 0;
            _isAnchorBegin = (comp._flags & Regcomp.ANCHORED) != 0;
            _isUtf8        = (comp._flags & Regcomp.UTF8) != 0;

            if (prog.isAnchorBegin())
            {
                _isAnchorBegin = true;
            }

            /*
             * if (_ignoreCase)
             * RegOptim.ignoreCase(prog);
             *
             * if (! _ignoreCase)
             * RegOptim.eliminateBacktrack(prog, null);
             */

            _minLength = prog.minLength();
            _firstChar = prog.firstChar();
            _firstSet  = prog.firstSet(new boolean[256]);
            _prefix    = new CharBuffer(prog.prefix());

            //this._prog = RegOptim.linkLoops(prog);

            _nGroup = comp._maxGroup;
            _nLoop  = comp._nLoop;

            _groupNames = new StringValue[_nGroup + 1];
            for (Map.Entry <Integer, StringValue> entry
                 : comp._groupNameMap.entrySet())
            {
                StringValue groupName = entry.getValue();

                if (_isUnicode)
                {
                }
                else if (isUTF8())
                {
                    groupName.toBinaryValue("UTF-8");
                }

                _groupNames[entry.getKey().intValue()] = groupName;
            }
        }
        public Regexp(StringValue rawRegexp)
        {
            _rawRegexp = rawRegexp;
            _pattern   = rawRegexp;

            try {
                init();

                Regcomp    comp       = new Regcomp(_flags);
                PeekStream peekString = new PeekString(_pattern);

                _prog = comp.parse(peekString);

                compile(_prog, comp);
            }
            catch (IllegalRegexpException e) {
                _exception = e;
            }
        }
Example #3
0
        /**
         * XXX: not proper behaviour with /g
         */
        public int exec(Env env, StringValue subject, int start)
        {
            try {
                if (log.isLoggable(Level.FINEST))
                {
                    log.finest(this + " exec(" + subject + ")");
                }

                subject = _regexp.convertSubject(env, subject);

                if (subject == null)
                {
                    if (log.isLoggable(Level.FINE))
                    {
                        log.fine(L.l("error converting subject to utf8"));
                    }

                    return(-1);
                }

                clearGroup();

                _start = start;
                _first = start;

                _subject = subject;
                int subjectLength = subject != null?subject.length() : 0;

                _subjectLength = subjectLength;

                int        minLength = _regexp._minLength;
                bool []    firstSet  = _regexp._firstSet;
                int        end       = subjectLength - minLength;
                RegexpNode prog      = _regexp._prog;

                if (_regexp._isAnchorBegin)
                {
                    end = start;
                }

                for (; start <= end; start++)
                {
                    if (firstSet != null && (start < end || minLength > 0))
                    {
                        char firstChar = subject[start];

                        if (firstChar < 256 && !firstSet[firstChar])
                        {
                            continue;
                        }
                    }

                    int value = prog.match(subject, subjectLength, start, this);

                    if (value >= 0)
                    {
                        _groupBegin[0] = start;
                        _groupEnd[0]   = value;

                        return(start);
                    }
                }

                return(-1);
            } catch (StackOverflowError e) {
                log.warning(L.l("regexp '{0}' produces a StackOverflowError for\n{1}",
                                _regexp, subject));

                throw new QuercusRuntimeException(
                          L.l("regexp '{0}' produces a StackOverflowError", _regexp), e);
            }
        }