private static bool ParseTerm(CompilerState state) { char[] src = state.cpbegin; char c = src[state.cp++]; int nDigits = 2; int parenBaseCount = state.parenCount; int num; int tmp; RENode term; int termStart; switch (c) { case '^': { state.result = new RENode(REOP_BOL); state.progLength++; return true; } case '$': { state.result = new RENode(REOP_EOL); state.progLength++; return true; } case '\\': { if (state.cp < state.cpend) { c = src[state.cp++]; switch (c) { case 'b': { state.result = new RENode(REOP_WBDRY); state.progLength++; return true; } case 'B': { state.result = new RENode(REOP_WNONBDRY); state.progLength++; return true; } case '0': { ReportWarning(state.cx, "msg.bad.backref", string.Empty); num = 0; while (state.cp < state.cpend) { c = src[state.cp]; if ((c >= '0') && (c <= '7')) { state.cp++; tmp = 8 * num + (c - '0'); if (tmp > 0xff) { break; } num = tmp; } else { break; } } c = (char)(num); DoFlat(state, c); break; } case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { termStart = state.cp - 1; num = GetDecimalValue(c, state, unchecked((int)(0xFFFF)), "msg.overlarge.backref"); if (num > state.parenCount) { ReportWarning(state.cx, "msg.bad.backref", string.Empty); } if ((num > 9) && (num > state.parenCount)) { state.cp = termStart; num = 0; while (state.cp < state.cpend) { c = src[state.cp]; if ((c >= '0') && (c <= '7')) { state.cp++; tmp = 8 * num + (c - '0'); if (tmp > 0xff) { break; } num = tmp; } else { break; } } c = (char)(num); DoFlat(state, c); break; } state.result = new RENode(REOP_BACKREF); state.result.parenIndex = num - 1; state.progLength += 3; break; } case 'f': { c = (char)unchecked((int)(0xC)); DoFlat(state, c); break; } case 'n': { c = (char)unchecked((int)(0xA)); DoFlat(state, c); break; } case 'r': { c = (char)unchecked((int)(0xD)); DoFlat(state, c); break; } case 't': { c = (char)unchecked((int)(0x9)); DoFlat(state, c); break; } case 'v': { c = (char)unchecked((int)(0xB)); DoFlat(state, c); break; } case 'c': { if ((state.cp < state.cpend) && IsControlLetter(src[state.cp])) { c = (char)(src[state.cp++] & unchecked((int)(0x1F))); } else { --state.cp; c = '\\'; } DoFlat(state, c); break; } case 'u': { nDigits += 2; goto case 'x'; } case 'x': { // fall thru... int n = 0; int i; for (i = 0; (i < nDigits) && (state.cp < state.cpend); i++) { c = src[state.cp++]; n = Kit.XDigitToInt(c, n); if (n < 0) { // Back off to accepting the original // 'u' or 'x' as a literal state.cp -= (i + 2); n = src[state.cp++]; break; } } c = (char)(n); DoFlat(state, c); break; } case 'd': { state.result = new RENode(REOP_DIGIT); state.progLength++; break; } case 'D': { state.result = new RENode(REOP_NONDIGIT); state.progLength++; break; } case 's': { state.result = new RENode(REOP_SPACE); state.progLength++; break; } case 'S': { state.result = new RENode(REOP_NONSPACE); state.progLength++; break; } case 'w': { state.result = new RENode(REOP_ALNUM); state.progLength++; break; } case 'W': { state.result = new RENode(REOP_NONALNUM); state.progLength++; break; } default: { state.result = new RENode(REOP_FLAT); state.result.chr = c; state.result.length = 1; state.result.flatIndex = state.cp - 1; state.progLength += 3; break; } } break; } else { ReportError("msg.trail.backslash", string.Empty); return false; } goto case '('; } case '(': { RENode result = null; termStart = state.cp; if (state.cp + 1 < state.cpend && src[state.cp] == '?' && ((c = src[state.cp + 1]) == '=' || c == '!' || c == ':')) { state.cp += 2; if (c == '=') { result = new RENode(REOP_ASSERT); state.progLength += 4; } else { if (c == '!') { result = new RENode(REOP_ASSERT_NOT); state.progLength += 4; } } } else { result = new RENode(REOP_LPAREN); state.progLength += 6; result.parenIndex = state.parenCount++; } ++state.parenNesting; if (!ParseDisjunction(state)) { return false; } if (state.cp == state.cpend || src[state.cp] != ')') { ReportError("msg.unterm.paren", string.Empty); return false; } ++state.cp; --state.parenNesting; if (result != null) { result.kid = state.result; state.result = result; } break; } case ')': { ReportError("msg.re.unmatched.right.paren", string.Empty); return false; } case '[': { state.result = new RENode(REOP_CLASS); termStart = state.cp; state.result.startIndex = termStart; while (true) { if (state.cp == state.cpend) { ReportError("msg.unterm.class", string.Empty); return false; } if (src[state.cp] == '\\') { state.cp++; } else { if (src[state.cp] == ']') { state.result.kidlen = state.cp - termStart; break; } } state.cp++; } state.result.index = state.classCount++; if (!CalculateBitmapSize(state, state.result, src, termStart, state.cp++)) { return false; } state.progLength += 3; break; } case '.': { state.result = new RENode(REOP_DOT); state.progLength++; break; } case '*': case '+': case '?': { ReportError("msg.bad.quant", src[state.cp - 1].ToString()); return false; } default: { state.result = new RENode(REOP_FLAT); state.result.chr = c; state.result.length = 1; state.result.flatIndex = state.cp - 1; state.progLength += 3; break; } } term = state.result; if (state.cp == state.cpend) { return true; } bool hasQ = false; switch (src[state.cp]) { case '+': { state.result = new RENode(REOP_QUANT); state.result.min = 1; state.result.max = -1; state.progLength += 8; hasQ = true; break; } case '*': { state.result = new RENode(REOP_QUANT); state.result.min = 0; state.result.max = -1; state.progLength += 8; hasQ = true; break; } case '?': { state.result = new RENode(REOP_QUANT); state.result.min = 0; state.result.max = 1; state.progLength += 8; hasQ = true; break; } case '{': { int min = 0; int max = -1; int leftCurl = state.cp; if (++state.cp < src.Length && IsDigit(c = src[state.cp])) { ++state.cp; min = GetDecimalValue(c, state, unchecked((int)(0xFFFF)), "msg.overlarge.min"); c = src[state.cp]; if (c == ',') { c = src[++state.cp]; if (IsDigit(c)) { ++state.cp; max = GetDecimalValue(c, state, unchecked((int)(0xFFFF)), "msg.overlarge.max"); c = src[state.cp]; if (min > max) { ReportError("msg.max.lt.min", src[state.cp].ToString()); return false; } } } else { max = min; } if (c == '}') { state.result = new RENode(REOP_QUANT); state.result.min = min; state.result.max = max; // QUANT, <min>, <max>, <parencount>, // <parenindex>, <next> ... <ENDCHILD> state.progLength += 12; hasQ = true; } } if (!hasQ) { state.cp = leftCurl; } break; } } if (!hasQ) { return true; } ++state.cp; state.result.kid = term; state.result.parenIndex = parenBaseCount; state.result.parenCount = state.parenCount - parenBaseCount; if ((state.cp < state.cpend) && (src[state.cp] == '?')) { ++state.cp; state.result.greedy = false; } else { state.result.greedy = true; } return true; }
private static bool ParseDisjunction(CompilerState state) { if (!ParseAlternative(state)) { return false; } char[] source = state.cpbegin; int index = state.cp; if (index != source.Length && source[index] == '|') { RENode result; ++state.cp; result = new RENode(REOP_ALT); result.kid = state.result; if (!ParseDisjunction(state)) { return false; } result.kid2 = state.result; state.result = result; if (result.kid.op == REOP_FLAT && result.kid2.op == REOP_FLAT) { result.op = (state.flags & JSREG_FOLD) == 0 ? REOP_ALTPREREQ : REOP_ALTPREREQi; result.chr = result.kid.chr; result.index = result.kid2.chr; state.progLength += 13; } else { if (result.kid.op == REOP_CLASS && result.kid.index < 256 && result.kid2.op == REOP_FLAT && (state.flags & JSREG_FOLD) == 0) { result.op = REOP_ALTPREREQ2; result.chr = result.kid2.chr; result.index = result.kid.index; state.progLength += 13; } else { if (result.kid.op == REOP_FLAT && result.kid2.op == REOP_CLASS && result.kid2.index < 256 && (state.flags & JSREG_FOLD) == 0) { result.op = REOP_ALTPREREQ2; result.chr = result.kid.chr; result.index = result.kid2.index; state.progLength += 13; } else { state.progLength += 9; } } } } return true; }
private static bool CalculateBitmapSize(CompilerState state, RENode target, char[] src, int index, int end) { char rangeStart = 0; char c; int n; int nDigits; int i; int max = 0; bool inRange = false; target.bmsize = 0; target.sense = true; if (index == end) { return true; } if (src[index] == '^') { ++index; target.sense = false; } while (index != end) { int localMax = 0; nDigits = 2; switch (src[index]) { case '\\': { ++index; c = src[index++]; switch (c) { case 'b': { localMax = unchecked((int)(0x8)); break; } case 'f': { localMax = unchecked((int)(0xC)); break; } case 'n': { localMax = unchecked((int)(0xA)); break; } case 'r': { localMax = unchecked((int)(0xD)); break; } case 't': { localMax = unchecked((int)(0x9)); break; } case 'v': { localMax = unchecked((int)(0xB)); break; } case 'c': { if ((index < end) && IsControlLetter(src[index])) { localMax = (char)(src[index++] & unchecked((int)(0x1F))); } else { --index; } localMax = '\\'; break; } case 'u': { nDigits += 2; goto case 'x'; } case 'x': { // fall thru... n = 0; for (i = 0; (i < nDigits) && (index < end); i++) { c = src[index++]; n = Kit.XDigitToInt(c, n); if (n < 0) { // Back off to accepting the original // '\' as a literal index -= (i + 1); n = '\\'; break; } } localMax = n; break; } case 'd': { if (inRange) { ReportError("msg.bad.range", string.Empty); return false; } localMax = '9'; break; } case 'D': case 's': case 'S': case 'w': case 'W': { if (inRange) { ReportError("msg.bad.range", string.Empty); return false; } target.bmsize = 65536; return true; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { n = (c - '0'); c = src[index]; if ('0' <= c && c <= '7') { index++; n = 8 * n + (c - '0'); c = src[index]; if ('0' <= c && c <= '7') { index++; i = 8 * n + (c - '0'); if (i <= 0xff) { n = i; } else { index--; } } } localMax = n; break; } default: { localMax = c; break; } } break; } default: { localMax = src[index++]; break; } } if (inRange) { if (rangeStart > localMax) { ReportError("msg.bad.range", string.Empty); return false; } inRange = false; } else { if (index < (end - 1)) { if (src[index] == '-') { ++index; inRange = true; rangeStart = (char)localMax; continue; } } } if ((state.flags & JSREG_FOLD) != 0) { char cu = Upcase((char)localMax); char cd = Downcase((char)localMax); localMax = (cu >= cd) ? cu : cd; } if (localMax > max) { max = localMax; } } target.bmsize = max + 1; return true; }
private static int EmitREBytecode(CompilerState state, RECompiled re, int pc, RENode t) { RENode nextAlt; int nextAltFixup; int nextTermFixup; byte[] program = re.program; while (t != null) { program[pc++] = t.op; switch (t.op) { case REOP_EMPTY: { --pc; break; } case REOP_ALTPREREQ: case REOP_ALTPREREQi: case REOP_ALTPREREQ2: { bool ignoreCase = t.op == REOP_ALTPREREQi; AddIndex(program, pc, ignoreCase ? Upcase(t.chr) : t.chr); pc += INDEX_LEN; AddIndex(program, pc, ignoreCase ? Upcase((char)t.index) : t.index); pc += INDEX_LEN; goto case REOP_ALT; } case REOP_ALT: { // fall through to REOP_ALT nextAlt = t.kid2; nextAltFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_JUMP; nextTermFixup = pc; pc += INDEX_LEN; ResolveForwardJump(program, nextAltFixup, pc); pc = EmitREBytecode(state, re, pc, nextAlt); program[pc++] = REOP_JUMP; nextAltFixup = pc; pc += INDEX_LEN; ResolveForwardJump(program, nextTermFixup, pc); ResolveForwardJump(program, nextAltFixup, pc); break; } case REOP_FLAT: { if (t.flatIndex != -1) { while ((t.next != null) && (t.next.op == REOP_FLAT) && ((t.flatIndex + t.length) == t.next.flatIndex)) { t.length += t.next.length; t.next = t.next.next; } } if ((t.flatIndex != -1) && (t.length > 1)) { if ((state.flags & JSREG_FOLD) != 0) { program[pc - 1] = REOP_FLATi; } else { program[pc - 1] = REOP_FLAT; } pc = AddIndex(program, pc, t.flatIndex); pc = AddIndex(program, pc, t.length); } else { if (t.chr < 256) { if ((state.flags & JSREG_FOLD) != 0) { program[pc - 1] = REOP_FLAT1i; } else { program[pc - 1] = REOP_FLAT1; } program[pc++] = unchecked((byte)(t.chr)); } else { if ((state.flags & JSREG_FOLD) != 0) { program[pc - 1] = REOP_UCFLAT1i; } else { program[pc - 1] = REOP_UCFLAT1; } pc = AddIndex(program, pc, t.chr); } } break; } case REOP_LPAREN: { pc = AddIndex(program, pc, t.parenIndex); pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_RPAREN; pc = AddIndex(program, pc, t.parenIndex); break; } case REOP_BACKREF: { pc = AddIndex(program, pc, t.parenIndex); break; } case REOP_ASSERT: { nextTermFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_ASSERTTEST; ResolveForwardJump(program, nextTermFixup, pc); break; } case REOP_ASSERT_NOT: { nextTermFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_ASSERTNOTTEST; ResolveForwardJump(program, nextTermFixup, pc); break; } case REOP_QUANT: { if ((t.min == 0) && (t.max == -1)) { program[pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR; } else { if ((t.min == 0) && (t.max == 1)) { program[pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT; } else { if ((t.min == 1) && (t.max == -1)) { program[pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS; } else { if (!t.greedy) { program[pc - 1] = REOP_MINIMALQUANT; } pc = AddIndex(program, pc, t.min); // max can be -1 which addIndex does not accept pc = AddIndex(program, pc, t.max + 1); } } } pc = AddIndex(program, pc, t.parenCount); pc = AddIndex(program, pc, t.parenIndex); nextTermFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_ENDCHILD; ResolveForwardJump(program, nextTermFixup, pc); break; } case REOP_CLASS: { if (!t.sense) { program[pc - 1] = REOP_NCLASS; } pc = AddIndex(program, pc, t.index); re.classList[t.index] = new RECharSet(t.bmsize, t.startIndex, t.kidlen, t.sense); break; } default: { break; } } t = t.next; } return pc; }