/* * Initialize the character set if it this is the first call. * Test the bit - if the ^ flag was specified, non-inclusion is a success */ private static bool classMatcher(REGlobalData gData, RECharSet charSet, char ch) { if (!charSet.converted) { processCharSet (gData, charSet); } int byteIndex = ch / 8; if (charSet.sense) { if ((charSet.length == 0) || ((ch > charSet.length) || ((charSet.bits [byteIndex] & (1 << (ch & 0x7))) == 0))) return false; } else { if (!((charSet.length == 0) || ((ch > charSet.length) || ((charSet.bits [byteIndex] & (1 << (ch & 0x7))) == 0)))) return false; } return true; }
/* Add a single character to the RECharSet */ private static void addCharacterToCharSet(RECharSet cs, char c) { int byteIndex = (int)(c / 8); if (c > cs.length) throw new ApplicationException (); cs.bits [byteIndex] |= (sbyte)(1 << (c & 0x7)); }
private static void processCharSetImpl(REGlobalData gData, RECharSet charSet) { int src = charSet.startIndex; int end = src + charSet.strlength; char rangeStart = (char)(0), thisCh; int byteLength; char c; int n; int nDigits; int i; bool inRange = false; charSet.sense = true; byteLength = (charSet.length / 8) + 1; charSet.bits = new sbyte [byteLength]; if (src == end) return; if (gData.regexp.source [src] == '^') { charSet.sense = false; ++src; } while (src != end) { nDigits = 2; switch (gData.regexp.source [src]) { case '\\': ++src; c = gData.regexp.source [src++]; switch (c) { case 'b': thisCh = (char)(0x8); break; case 'f': thisCh = (char)(0xC); break; case 'n': thisCh = (char)(0xA); break; case 'r': thisCh = (char)(0xD); break; case 't': thisCh = (char)(0x9); break; case 'v': thisCh = (char)(0xB); break; case 'c': if (((src + 1) < end) && isWord (gData.regexp.source [src + 1])) thisCh = (char)(gData.regexp.source [src++] & 0x1F); else { --src; thisCh = '\\'; } break; case 'u': nDigits += 2; // fall thru goto case 'x'; case 'x': n = 0; for (i = 0; (i < nDigits) && (src < end); i++) { c = gData.regexp.source [src++]; int digit = toASCIIHexDigit (c); if (digit < 0) { /* back off to accepting the original '\' * as a literal */ src -= (i + 1); n = '\\'; break; } n = (n << 4) | digit; } thisCh = (char)(n); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* * This is a non-ECMA extension - decimal escapes (in this * case, octal!) are supposed to be an error inside class * ranges, but supported here for backwards compatibility. * */ n = (c - '0'); c = gData.regexp.source [src]; if ('0' <= c && c <= '7') { src++; n = 8 * n + (c - '0'); c = gData.regexp.source [src]; if ('0' <= c && c <= '7') { src++; i = 8 * n + (c - '0'); if (i <= 255) n = i; else src--; } } thisCh = (char)(n); break; case 'd': addCharacterRangeToCharSet (charSet, '0', '9'); continue; /* don't need range processing */ case 'D': addCharacterRangeToCharSet (charSet, (char)0, (char)('0' - 1)); addCharacterRangeToCharSet (charSet, (char)('9' + 1), (char)(charSet.length)); continue; case 's': for (i = (int)(charSet.length); i >= 0; i--) if (isREWhiteSpace (i)) addCharacterToCharSet (charSet, (char)(i)); continue; case 'S': for (i = (int)(charSet.length); i >= 0; i--) if (!isREWhiteSpace (i)) addCharacterToCharSet (charSet, (char)(i)); continue; case 'w': for (i = (int)(charSet.length); i >= 0; i--) if (isWord ((char)i)) addCharacterToCharSet (charSet, (char)(i)); continue; case 'W': for (i = (int)(charSet.length); i >= 0; i--) if (!isWord ((char)i)) addCharacterToCharSet (charSet, (char)(i)); continue; default: thisCh = c; break; } break; default: thisCh = gData.regexp.source [src++]; break; } if (inRange) { if ((gData.regexp.flags & JSREG_FOLD) != 0) { addCharacterRangeToCharSet (charSet, upcase (rangeStart), upcase (thisCh)); addCharacterRangeToCharSet (charSet, downcase (rangeStart), downcase (thisCh)); } else { addCharacterRangeToCharSet (charSet, rangeStart, thisCh); } inRange = false; } else { if ((gData.regexp.flags & JSREG_FOLD) != 0) { addCharacterToCharSet (charSet, upcase (thisCh)); addCharacterToCharSet (charSet, downcase (thisCh)); } else { addCharacterToCharSet (charSet, thisCh); } if (src < (end - 1)) { if (gData.regexp.source [src] == '-') { ++src; inRange = true; rangeStart = thisCh; } } } } }
/* Add a character range, c1 to c2 (inclusive) to the RECharSet */ private static void addCharacterRangeToCharSet(RECharSet cs, char c1, char c2) { int i; int byteIndex1 = (int)(c1 / 8); int byteIndex2 = (int)(c2 / 8); if ((c2 > cs.length) || (c1 > c2)) throw new ApplicationException (); c1 &= (char)(0x7); c2 &= (char)(0x7); if (byteIndex1 == byteIndex2) { cs.bits [byteIndex1] |= (sbyte)(((int)(0xFF) >> (int)(7 - (c2 - c1))) << (int)c1); } else { cs.bits [byteIndex1] |= (sbyte)(0xFF << (int)c1); for (i = byteIndex1 + 1; i < byteIndex2; i++) cs.bits [i] = unchecked ((sbyte)0xFF); cs.bits [byteIndex2] |= (sbyte)((int)(0xFF) >> (int)(7 - c2)); } }
/* Compile the source of the class into a RECharSet */ private static void processCharSet(REGlobalData gData, RECharSet charSet) { lock (charSet) { if (!charSet.converted) { processCharSetImpl (gData, charSet); charSet.converted = true; } } }