Наследование: UnicodeConstants
Пример #1
0
 internal void Intersect(Bitset bs, bool inverse)
 {
     Subtract(bs,!inverse);
 }
Пример #2
0
 internal void Subtract(Bitset bs)
 {
     Subtract(bs,false);
 }
Пример #3
0
 internal void Add(Bitset bs, bool inverse)
 {
     weight += AddImpl(this,bs,!bs.positive^inverse);
 }
Пример #4
0
 internal void Intersect(Bitset bs)
 {
     Intersect(bs,false);
 }
Пример #5
0
 internal static int ParseClass(char[] data, int i, int _out, Term term, bool icase, bool skipspaces, bool unicode, bool xml)
 {
     Bitset bs = new Bitset();
     i = ParseClass(data,i,_out,bs,icase,skipspaces,unicode,xml);
     Bitset.Unify(bs,term);
     return i;
 }
Пример #6
0
 internal void Add(Bitset bs)
 {
     Add(bs,false);
 }
Пример #7
0
        private static void InitPosixClasses()
        {
            Bitset lower = new Bitset();
            lower.SetRange('a','z');
            RegisterClass("Lower", lower, posixClasses);

            Bitset upper = new Bitset();
            upper.SetRange('A','Z');
            RegisterClass("Upper",upper,posixClasses);

            Bitset ascii = new Bitset();
            ascii.SetRange((char)0,(char)0x7f);
            RegisterClass("ASCII",ascii,posixClasses);

            Bitset alpha = new Bitset();
            alpha.Add(lower);
            alpha.Add(upper);
            RegisterClass("Alpha",alpha,posixClasses);

            Bitset digit = new Bitset();
            digit.SetRange('0','9');
            RegisterClass("Digit",digit,posixClasses);

            Bitset alnum = new Bitset();
            alnum.Add(alpha);
            alnum.Add(digit);
            RegisterClass("Alnum",alnum,posixClasses);

            Bitset punct = new Bitset();
            punct.SetChars("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
            RegisterClass("Punct",punct,posixClasses);

            Bitset graph = new Bitset();
            graph.Add(alnum);
            graph.Add(punct);
            RegisterClass("Graph",graph,posixClasses);
            RegisterClass("Print",graph,posixClasses);

            Bitset blank = new Bitset();
            blank.SetChars(" \t");
            RegisterClass("Blank",blank,posixClasses);

            Bitset cntrl = new Bitset();
            cntrl.SetRange((char)0,(char)0x1f);
            cntrl.SetChar((char)0x7f);
            RegisterClass("Cntrl",cntrl,posixClasses);

            Bitset xdigit = new Bitset();
            xdigit.SetRange('0','9');
            xdigit.SetRange('a','f');
            xdigit.SetRange('A','F');
            RegisterClass("XDigit",xdigit,posixClasses);

            Bitset space = new Bitset();
            space.SetChars(" \t\n\r\f\u000b");
            RegisterClass("Space",space,posixClasses);
        }
Пример #8
0
 private static int SubtractImpl(Bitset bs1, Bitset bs2, bool inv)
 {
     int s=0;
     if(!bs1.isLarge && !bs2.isLarge && !inv){
         bool[] bits1,bits2;
         if((bits2=bs2.block0)!=null){
             bits1=bs1.block0;
             if(bits1==null) return 0;
             s += Subtract(bits1,bits2,0,BLOCK_SIZE-1,false);
         }
     }
     else {
         if(!bs1.isLarge) bs1.EnableLargeMode();
         if(!bs2.isLarge) bs2.EnableLargeMode();
         s += Block.Subtract(bs1.blocks,bs2.blocks,0,BLOCK_COUNT-1,inv);
     }
     return s;
 }
Пример #9
0
 private static void InitNamedCategory(string name, int[] cats)
 {
     Bitset bs = new Bitset();
     foreach(int cat in cats){
         bs.SetCategory(cat);
     }
     namedClasses[name] = bs;
 }
Пример #10
0
        private static void InitNames()
        {
            InitNamedCategory("C",new int[]{UnicodeConstants.Cn,UnicodeConstants.Cc,UnicodeConstants.Cf,UnicodeConstants.Co,UnicodeConstants.Cs});
            InitNamedCategory("Cn",UnicodeConstants.Cn);
            InitNamedCategory("Cc",UnicodeConstants.Cc);
            InitNamedCategory("Cf",UnicodeConstants.Cf);
            InitNamedCategory("Co",UnicodeConstants.Co);
            InitNamedCategory("Cs",UnicodeConstants.Cs);

            InitNamedCategory("L",new int[]{UnicodeConstants.Lu,UnicodeConstants.Ll,UnicodeConstants.Lt,UnicodeConstants.Lm,UnicodeConstants.Lo});
            InitNamedCategory("Lu",UnicodeConstants.Lu);
            InitNamedCategory("Ll",UnicodeConstants.Ll);
            InitNamedCategory("Lt",UnicodeConstants.Lt);
            InitNamedCategory("Lm",UnicodeConstants.Lm);
            InitNamedCategory("Lo",UnicodeConstants.Lo);

            InitNamedCategory("M",new int[]{UnicodeConstants.Mn,UnicodeConstants.Me,UnicodeConstants.Mc});
            InitNamedCategory("Mn",UnicodeConstants.Mn);
            InitNamedCategory("Me",UnicodeConstants.Me);
            InitNamedCategory("Mc",UnicodeConstants.Mc);

            InitNamedCategory("N",new int[]{UnicodeConstants.Nd,UnicodeConstants.Nl,UnicodeConstants.No});
            InitNamedCategory("Nd",UnicodeConstants.Nd);
            InitNamedCategory("Nl",UnicodeConstants.Nl);
            InitNamedCategory("No",UnicodeConstants.No);

            InitNamedCategory("Z",new int[]{UnicodeConstants.Zs,UnicodeConstants.Zl,UnicodeConstants.Zp});
            InitNamedCategory("Zs",UnicodeConstants.Zs);
            InitNamedCategory("Zl",UnicodeConstants.Zl);
            InitNamedCategory("Zp",UnicodeConstants.Zp);

            InitNamedCategory("P",new int[]{UnicodeConstants.Pd,UnicodeConstants.Ps,UnicodeConstants.Pi,UnicodeConstants.Pe,UnicodeConstants.Pf,UnicodeConstants.Pc,UnicodeConstants.Po});
            InitNamedCategory("Pd",UnicodeConstants.Pd);
            InitNamedCategory("Ps",UnicodeConstants.Ps);
            InitNamedCategory("Pi",UnicodeConstants.Pi);
            InitNamedCategory("Pe",UnicodeConstants.Pe);
            InitNamedCategory("Pf",UnicodeConstants.Pf);
            InitNamedCategory("Pc",UnicodeConstants.Pc);
            InitNamedCategory("Po",UnicodeConstants.Po);

            InitNamedCategory("S",new int[]{UnicodeConstants.Sm,UnicodeConstants.Sc,UnicodeConstants.Sk,UnicodeConstants.So});
            InitNamedCategory("Sm",UnicodeConstants.Sm);
            InitNamedCategory("Sc",UnicodeConstants.Sc);
            InitNamedCategory("Sk",UnicodeConstants.Sk);
            InitNamedCategory("So",UnicodeConstants.So);

            Bitset bs = new Bitset();
            bs.SetCategory(UnicodeConstants.Cn);
            RegisterClass("UNASSIGNED",bs,unicodeCategories);
            bs = new Bitset();
            bs.SetCategory(UnicodeConstants.Cn);
            bs.SetPositive(false);
            RegisterClass("ASSIGNED",bs,unicodeCategories);

            string[] results = blockData.Split(new char[]{'.', ',', ':', ';'}, System.StringSplitOptions.RemoveEmptyEntries);
            int ix = 0;
            while(ix < results.Length) {
                int first = Convert.ToInt32(results[ix++], 16);
                int last = Convert.ToInt32(results[ix++], 16);
                string name = results[ix++];
                InitNamedBlock(name,first,last);
            }

            InitNamedBlock("ALL",0,0xffff);

            namesInitialized=true;
        }
Пример #11
0
 private static void InitNamedCategory(string name, int cat)
 {
     Bitset bs = new Bitset();
     bs.SetCategory(cat);
     RegisterClass(name,bs,unicodeCategories);
 }
Пример #12
0
        private static void InitNamedBlock(string name, int first, int last)
        {
            if(first<char.MinValue || first>char.MaxValue) throw new ArgumentException("wrong start code ("+first+") in block "+name);
            if(last<char.MinValue || last>char.MaxValue) throw new ArgumentException("wrong end code ("+last+") in block "+name);
            if(last<first) throw new ArgumentException("end code < start code in block "+name);

            Bitset bs;
            if(namedClasses.ContainsKey(name)) {
                bs = namedClasses[name];
            } else {
                bs = new Bitset();
                RegisterClass(name,bs,unicodeBlocks);
            }
            bs.SetRange((char)first,(char)last);
        }
Пример #13
0
 internal static void ParseGroup(char[] data, int i, int _out, Term term, bool icase, bool skipspaces, bool unicode, bool xml)
 {
     Bitset sum = new Bitset();
     Bitset bs = new Bitset();
     int mode = ADD;
     for(;i<_out;){
         switch(data[i++]){
         case '+':
             mode=ADD;
             continue;
         case '-':
             mode=SUBTRACT;
             continue;
         case '&':
             mode=INTERSECT;
             continue;
         case '[':
             bs.Reset();
             i=ParseClass(data,i,_out,bs,icase,skipspaces,unicode,xml);
             switch(mode){
             case ADD:
                 sum.Add(bs);
                 break;
             case SUBTRACT:
                 sum.Subtract(bs);
                 break;
             case INTERSECT:
                 sum.Intersect(bs);
                 break;
             }
             continue;
         case ')':
             throw new  PatternSyntaxException("unbalanced class group");
         }
     }
     Bitset.Unify(sum,term);
 }
Пример #14
0
 internal void Subtract(Bitset bs, bool inverse)
 {
     weight += SubtractImpl(this, bs, !bs.positive^inverse);
 }
Пример #15
0
        private static int ParseClass(char[] data, int i, int _out, Bitset bs, bool icase, bool skipspaces, bool unicode, bool xml)
        {
            char c;
            int prev=-1;
            bool isFirst=true, setFirst=false, inRange=false;
            Bitset bs1=null;
            StringBuilder sb=null;
            for(;i<_out;isFirst=setFirst,setFirst=false){
                switch(c=data[i++]){
                case ']':
                    if(isFirst) break; //treat as normal char
                    if(inRange){
                        bs.SetChar('-');
                    }
                    if(prev>=0){
                        char c1=(char)prev;
                        if(icase){
                            bs.SetChar(char.ToLower(c1));
                            bs.SetChar(char.ToUpper(c1));
                        }
                        else bs.SetChar(c1);
                    }
                    return i;

                case '-':
                    if(isFirst) break;
                    if(inRange) break;
                    inRange=true;
                    continue;

                case '[':
                    if(inRange && xml) {
                        if(prev>=0) bs.SetChar((char)prev);
                        if(bs1==null) bs1 = new Bitset();
                        else bs1.Reset();
                        i=ParseClass(data,i,_out,bs1,icase,skipspaces,unicode,xml);
                        bs.Subtract(bs1);
                        inRange=false;
                        prev=-1;
                        continue;
                    }
                    else break;

                case '^':
                    if(isFirst){
                        bs.SetPositive(false);
                        setFirst=true;
                        continue;
                    }
                    break;

                case ' ':
                case '\r':
                case '\n':
                case '\t':
                case '\f':
                    if(skipspaces) continue;
                    else break;
                case '\\':
                    Bitset negatigeClass = null;
                    bool inv = false;
                    bool handle_special = false;
                    switch(c=data[i++]){
                    case 'r':
                        c='\r';
                        handle_special = true;
                        break;

                    case 'n':
                        c='\n';
                        handle_special = true;
                        break;

                    case 'e':
                        c='\u001B';
                        handle_special = true;
                        break;

                    case 't':
                        c='\t';
                        handle_special = true;
                        break;

                    case 'f':
                        c='\f';
                        handle_special = true;
                        break;

                    case 'u':
                        if(i>=_out-4) throw  new PatternSyntaxException("incomplete escape sequence \\uXXXX");
                        c=(char)((ToHexDigit(c)<<12)
                                 +(ToHexDigit(data[i++])<<8)
                                 +(ToHexDigit(data[i++])<<4)
                                 +ToHexDigit(data[i++]));
                        handle_special = true;
                        break;

                    case 'v':
                        c=(char)((ToHexDigit(c)<<24)+
                                 (ToHexDigit(data[i++])<<16)+
                                 (ToHexDigit(data[i++])<<12)+
                                 (ToHexDigit(data[i++])<<8)+
                                 (ToHexDigit(data[i++])<<4)+
                                 ToHexDigit(data[i++]));
                        handle_special = true;
                        break;

                    case 'b':
                        c=(char)8; // backspace
                        handle_special = true;
                        break;

                    case 'x':{   // hex 2-digit number
                        int hex=0;
                        char d;
                        if((d=data[i++])=='{'){
                            while((d=data[i++])!='}'){
                                hex=(hex<<4)+ToHexDigit(d);
                            }
                            if(hex>0xffff) throw new PatternSyntaxException("\\x{<out of range>}");
                        }
                        else{
                            hex=(ToHexDigit(d)<<4)+ToHexDigit(data[i++]);
                        }
                        c=(char)hex;
                        handle_special = true;
                        break;
                    }
                    case '0':   // oct 2- or 3-digit number
                    case 'o':   // oct 2- or 3-digit number
                        int oct=0;
                        for(;;){
                            char d=data[i++];
                            if(d>='0' && d<='7'){
                                oct*=8;
                                oct+=d-'0';
                                if(oct>0xffff) break;
                            }
                            else {
                                i--;
                                break;
                            }
                        }
                        c=(char)oct;
                        handle_special = true;
                        break;

                    case 'm':   // decimal number -> char
                        int dec=0;
                        for(;;){
                            char d=data[i++];
                            if(d>='0' && d<='9'){
                                dec*=10;
                                dec+=d-'0';
                                if(dec>0xffff) break;
                            } else {
                                i--;
                                break;
                            }
                        }
                        c=(char)dec;
                        handle_special = true;
                        break;

                    case 'c':   // ctrl-char
                        c=(char)(data[i++]&0x1f);
                        handle_special = true;
                        break;

                    case 'D':   // non-digit
                        negatigeClass = unicode ? UNONDIGIT : NONDIGIT;
                    break;

                    case 'S':   // space
                        negatigeClass =unicode ? UNONSPACE : NONSPACE;
                    break;

                    case 'W':   // space
                        negatigeClass = unicode ? UNONWORDCHAR : NONWORDCHAR;
                    break;

                    case 'd':   // digit
                        if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\d...]");
                        bs.SetDigit(unicode);
                        continue;

                    case 's':   // digit
                        if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\s...]");
                        bs.SetSpace(unicode);
                        continue;

                    case 'w':   // digit
                        if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]");
                        bs.SetWordChar(unicode);
                        continue;

                    case 'P':   // \\P{..}
                        inv=true;
                        goto case 'p';
                    case 'p':   // \\p{..}
                        if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]");
                        if(sb==null) sb = new StringBuilder();
                        else sb.Length = 0;
                        i=ParseName(data,i,_out,sb,skipspaces);
                        Bitset nc=GetNamedClass(sb.ToString());
                        if(nc==null) throw new PatternSyntaxException("unknown named class: {"+sb+"}");
                        bs.Add(nc,inv);
                        continue;

                    default:
                        handle_special = true;
                        break;
                    }
                    if(handle_special) break;
                    if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\"+c+"...]");
                    bs.Add(negatigeClass);
                    continue;
                default:
                    break;
                }
                if(prev<0){
                    prev=c;
                    inRange=false;
                    continue;
                }
                if(!inRange){
                    char c1=(char)prev;
                    if(icase){
                        bs.SetChar(char.ToLower(c1));
                        bs.SetChar(char.ToUpper(c1));
                    }
                    else bs.SetChar(c1);
                    prev=c;
                }
                else {
                    if(prev>c) throw new PatternSyntaxException("illegal range: "+prev+">"+c);
                    char c0=(char)prev;
                    inRange=false;
                    prev=-1;
                    if(icase){
                        bs.SetRange(char.ToLower(c0),char.ToLower(c));
                        bs.SetRange(char.ToUpper(c0),char.ToUpper(c));
                    }
                    else bs.SetRange(c0,c);
                }
            }
            throw new PatternSyntaxException("unbalanced brackets in a class def");
        }
Пример #16
0
 private static int AddImpl(Bitset bs1, Bitset bs2, bool inv)
 {
     int s=0;
     if(!bs1.isLarge && !bs2.isLarge && !inv){
         if(bs2.block0!=null){
             bool[] bits = bs1.block0;
             if(bits==null) bs1.block0 = bits = new bool[BLOCK_SIZE];
             s += Add(bits,bs2.block0,0,BLOCK_SIZE-1,false);
         }
     }
     else {
         if(!bs1.isLarge) bs1.EnableLargeMode();
         if(!bs2.isLarge) bs2.EnableLargeMode();
         s+=Block.Add(bs1.blocks,bs2.blocks,0,BLOCK_COUNT-1,inv);
     }
     return s;
 }
Пример #17
0
 private static void RegisterClass(string name, Bitset cls, IList<string> realm)
 {
     namedClasses[name] = cls;
     if(!realm.Contains(name)) realm.Add(name);
 }
Пример #18
0
 internal static void Unify(Bitset bs, Term term)
 {
     if(bs.isLarge){
         term.type = Term.TermType.BITSET2;
         term.bitset2 = Block.ToBitset2(bs.blocks);
     }
     else{
         term.type = Term.TermType.BITSET;
         term.bitset = bs.block0 == null ? emptyBlock0 : bs.block0;
     }
     term.inverse = !bs.positive;
     term.weight = bs.positive ? bs.weight : MAX_WEIGHT-bs.weight;
 }
Пример #19
0
 internal static void MakeICase(Term term, char c)
 {
     Bitset bs = new Bitset();
     bs.SetChar(char.ToLower(c));
     bs.SetChar(char.ToUpper(c));
     Bitset.Unify(bs,term);
 }