private void compile(RegexpNode prog, Regcomp comp) { _ignoreCase = (comp._flags & Regcomp.IGNORE_CASE) != 0; _isGlobal = (comp._flags & Regcomp.GLOBAL) != 0; _isAnchorBegin = (comp._flags & Regcomp.ANCHORED) != 0; _isUtf8 = (comp._flags & Regcomp.UTF8) != 0; if (prog.isAnchorBegin()) { _isAnchorBegin = true; } /* * if (_ignoreCase) * RegOptim.ignoreCase(prog); * * if (! _ignoreCase) * RegOptim.eliminateBacktrack(prog, null); */ _minLength = prog.minLength(); _firstChar = prog.firstChar(); _firstSet = prog.firstSet(new boolean[256]); _prefix = new CharBuffer(prog.prefix()); //this._prog = RegOptim.linkLoops(prog); _nGroup = comp._maxGroup; _nLoop = comp._nLoop; _groupNames = new StringValue[_nGroup + 1]; for (Map.Entry <Integer, StringValue> entry : comp._groupNameMap.entrySet()) { StringValue groupName = entry.getValue(); if (_isUnicode) { } else if (isUTF8()) { groupName.toBinaryValue("UTF-8"); } _groupNames[entry.getKey().intValue()] = groupName; } }
public Regexp(StringValue rawRegexp) { _rawRegexp = rawRegexp; _pattern = rawRegexp; try { init(); Regcomp comp = new Regcomp(_flags); PeekStream peekString = new PeekString(_pattern); _prog = comp.parse(peekString); compile(_prog, comp); } catch (IllegalRegexpException e) { _exception = e; } }
/** * XXX: not proper behaviour with /g */ public int exec(Env env, StringValue subject, int start) { try { if (log.isLoggable(Level.FINEST)) { log.finest(this + " exec(" + subject + ")"); } subject = _regexp.convertSubject(env, subject); if (subject == null) { if (log.isLoggable(Level.FINE)) { log.fine(L.l("error converting subject to utf8")); } return(-1); } clearGroup(); _start = start; _first = start; _subject = subject; int subjectLength = subject != null?subject.length() : 0; _subjectLength = subjectLength; int minLength = _regexp._minLength; bool [] firstSet = _regexp._firstSet; int end = subjectLength - minLength; RegexpNode prog = _regexp._prog; if (_regexp._isAnchorBegin) { end = start; } for (; start <= end; start++) { if (firstSet != null && (start < end || minLength > 0)) { char firstChar = subject[start]; if (firstChar < 256 && !firstSet[firstChar]) { continue; } } int value = prog.match(subject, subjectLength, start, this); if (value >= 0) { _groupBegin[0] = start; _groupEnd[0] = value; return(start); } } return(-1); } catch (StackOverflowError e) { log.warning(L.l("regexp '{0}' produces a StackOverflowError for\n{1}", _regexp, subject)); throw new QuercusRuntimeException( L.l("regexp '{0}' produces a StackOverflowError", _regexp), e); } }