예제 #1
0
        /*
        *   Initialize the character set if it this is the first call.
        *   Test the bit - if the ^ flag was specified, non-inclusion is a success
        */
        private static bool classMatcher(REGlobalData gData, RECharSet charSet, char ch)
        {
            if (!charSet.converted) {
                processCharSet (gData, charSet);
            }

            int byteIndex = ch / 8;
            if (charSet.sense) {
                if ((charSet.length == 0) || ((ch > charSet.length) || ((charSet.bits [byteIndex] & (1 << (ch & 0x7))) == 0)))
                    return false;
            }
            else {
                if (!((charSet.length == 0) || ((ch > charSet.length) || ((charSet.bits [byteIndex] & (1 << (ch & 0x7))) == 0))))
                    return false;
            }
            return true;
        }
예제 #2
0
 /* Add a single character to the RECharSet */
 private static void addCharacterToCharSet(RECharSet cs, char c)
 {
     int byteIndex = (int)(c / 8);
     if (c > cs.length)
         throw new ApplicationException ();
     cs.bits [byteIndex] |= (sbyte)(1 << (c & 0x7));
 }
예제 #3
0
        private static void processCharSetImpl(REGlobalData gData, RECharSet charSet)
        {
            int src = charSet.startIndex;
            int end = src + charSet.strlength;

            char rangeStart = (char)(0), thisCh;
            int byteLength;
            char c;
            int n;
            int nDigits;
            int i;
            bool inRange = false;

            charSet.sense = true;
            byteLength = (charSet.length / 8) + 1;
            charSet.bits = new sbyte [byteLength];

            if (src == end)
                return;

            if (gData.regexp.source [src] == '^') {
                charSet.sense = false;
                ++src;
            }

            while (src != end) {
                nDigits = 2;
                switch (gData.regexp.source [src]) {

                    case '\\':
                        ++src;
                        c = gData.regexp.source [src++];
                        switch (c) {

                            case 'b':
                                thisCh = (char)(0x8);
                                break;

                            case 'f':
                                thisCh = (char)(0xC);
                                break;

                            case 'n':
                                thisCh = (char)(0xA);
                                break;

                            case 'r':
                                thisCh = (char)(0xD);
                                break;

                            case 't':
                                thisCh = (char)(0x9);
                                break;

                            case 'v':
                                thisCh = (char)(0xB);
                                break;

                            case 'c':
                                if (((src + 1) < end) && isWord (gData.regexp.source [src + 1]))
                                    thisCh = (char)(gData.regexp.source [src++] & 0x1F);
                                else {
                                    --src;
                                    thisCh = '\\';
                                }
                                break;

                            case 'u':
                                nDigits += 2;
                                // fall thru
                                goto case 'x';

                            case 'x':
                                n = 0;
                                for (i = 0; (i < nDigits) && (src < end); i++) {
                                    c = gData.regexp.source [src++];
                                    int digit = toASCIIHexDigit (c);
                                    if (digit < 0) {
                                        /* back off to accepting the original '\'
                                        * as a literal
                                        */
                                        src -= (i + 1);
                                        n = '\\';
                                        break;
                                    }
                                    n = (n << 4) | digit;
                                }
                                thisCh = (char)(n);
                                break;

                            case '0':
                            case '1':
                            case '2':
                            case '3':
                            case '4':
                            case '5':
                            case '6':
                            case '7':
                                /*
                                *  This is a non-ECMA extension - decimal escapes (in this
                                *  case, octal!) are supposed to be an error inside class
                                *  ranges, but supported here for backwards compatibility.
                                *
                                */
                                n = (c - '0');
                                c = gData.regexp.source [src];
                                if ('0' <= c && c <= '7') {
                                    src++;
                                    n = 8 * n + (c - '0');
                                    c = gData.regexp.source [src];
                                    if ('0' <= c && c <= '7') {
                                        src++;
                                        i = 8 * n + (c - '0');
                                        if (i <= 255)
                                            n = i;
                                        else
                                            src--;
                                    }
                                }
                                thisCh = (char)(n);
                                break;

                            case 'd':
                                addCharacterRangeToCharSet (charSet, '0', '9');
                                continue; /* don't need range processing */

                            case 'D':
                                addCharacterRangeToCharSet (charSet, (char)0, (char)('0' - 1));
                                addCharacterRangeToCharSet (charSet, (char)('9' + 1), (char)(charSet.length));
                                continue;

                            case 's':
                                for (i = (int)(charSet.length); i >= 0; i--)
                                    if (isREWhiteSpace (i))
                                        addCharacterToCharSet (charSet, (char)(i));
                                continue;

                            case 'S':
                                for (i = (int)(charSet.length); i >= 0; i--)
                                    if (!isREWhiteSpace (i))
                                        addCharacterToCharSet (charSet, (char)(i));
                                continue;

                            case 'w':
                                for (i = (int)(charSet.length); i >= 0; i--)
                                    if (isWord ((char)i))
                                        addCharacterToCharSet (charSet, (char)(i));
                                continue;

                            case 'W':
                                for (i = (int)(charSet.length); i >= 0; i--)
                                    if (!isWord ((char)i))
                                        addCharacterToCharSet (charSet, (char)(i));
                                continue;

                            default:
                                thisCh = c;
                                break;

                        }
                        break;

                    default:
                        thisCh = gData.regexp.source [src++];
                        break;

                }
                if (inRange) {
                    if ((gData.regexp.flags & JSREG_FOLD) != 0) {
                        addCharacterRangeToCharSet (charSet, upcase (rangeStart), upcase (thisCh));
                        addCharacterRangeToCharSet (charSet, downcase (rangeStart), downcase (thisCh));
                    }
                    else {
                        addCharacterRangeToCharSet (charSet, rangeStart, thisCh);
                    }
                    inRange = false;
                }
                else {
                    if ((gData.regexp.flags & JSREG_FOLD) != 0) {
                        addCharacterToCharSet (charSet, upcase (thisCh));
                        addCharacterToCharSet (charSet, downcase (thisCh));
                    }
                    else {
                        addCharacterToCharSet (charSet, thisCh);
                    }
                    if (src < (end - 1)) {
                        if (gData.regexp.source [src] == '-') {
                            ++src;
                            inRange = true;
                            rangeStart = thisCh;
                        }
                    }
                }
            }
        }
예제 #4
0
        /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
        private static void addCharacterRangeToCharSet(RECharSet cs, char c1, char c2)
        {
            int i;

            int byteIndex1 = (int)(c1 / 8);
            int byteIndex2 = (int)(c2 / 8);

            if ((c2 > cs.length) || (c1 > c2))
                throw new ApplicationException ();

            c1 &= (char)(0x7);
            c2 &= (char)(0x7);

            if (byteIndex1 == byteIndex2) {
                cs.bits [byteIndex1] |= (sbyte)(((int)(0xFF) >> (int)(7 - (c2 - c1))) << (int)c1);
            }
            else {
                cs.bits [byteIndex1] |= (sbyte)(0xFF << (int)c1);
                for (i = byteIndex1 + 1; i < byteIndex2; i++)
                    cs.bits [i] = unchecked ((sbyte)0xFF);
                cs.bits [byteIndex2] |= (sbyte)((int)(0xFF) >> (int)(7 - c2));
            }
        }
예제 #5
0
 /* Compile the source of the class into a RECharSet */
 private static void processCharSet(REGlobalData gData, RECharSet charSet)
 {
     lock (charSet) {
         if (!charSet.converted) {
             processCharSetImpl (gData, charSet);
             charSet.converted = true;
         }
     }
 }