private static int ParseClass(char[] data, int i, int _out, Bitset bs, bool icase, bool skipspaces, bool unicode, bool xml) { char c; int prev=-1; bool isFirst=true, setFirst=false, inRange=false; Bitset bs1=null; StringBuilder sb=null; for(;i<_out;isFirst=setFirst,setFirst=false){ switch(c=data[i++]){ case ']': if(isFirst) break; //treat as normal char if(inRange){ bs.SetChar('-'); } if(prev>=0){ char c1=(char)prev; if(icase){ bs.SetChar(char.ToLower(c1)); bs.SetChar(char.ToUpper(c1)); } else bs.SetChar(c1); } return i; case '-': if(isFirst) break; if(inRange) break; inRange=true; continue; case '[': if(inRange && xml) { if(prev>=0) bs.SetChar((char)prev); if(bs1==null) bs1 = new Bitset(); else bs1.Reset(); i=ParseClass(data,i,_out,bs1,icase,skipspaces,unicode,xml); bs.Subtract(bs1); inRange=false; prev=-1; continue; } else break; case '^': if(isFirst){ bs.SetPositive(false); setFirst=true; continue; } break; case ' ': case '\r': case '\n': case '\t': case '\f': if(skipspaces) continue; else break; case '\\': Bitset negatigeClass = null; bool inv = false; bool handle_special = false; switch(c=data[i++]){ case 'r': c='\r'; handle_special = true; break; case 'n': c='\n'; handle_special = true; break; case 'e': c='\u001B'; handle_special = true; break; case 't': c='\t'; handle_special = true; break; case 'f': c='\f'; handle_special = true; break; case 'u': if(i>=_out-4) throw new PatternSyntaxException("incomplete escape sequence \\uXXXX"); c=(char)((ToHexDigit(c)<<12) +(ToHexDigit(data[i++])<<8) +(ToHexDigit(data[i++])<<4) +ToHexDigit(data[i++])); handle_special = true; break; case 'v': c=(char)((ToHexDigit(c)<<24)+ (ToHexDigit(data[i++])<<16)+ (ToHexDigit(data[i++])<<12)+ (ToHexDigit(data[i++])<<8)+ (ToHexDigit(data[i++])<<4)+ ToHexDigit(data[i++])); handle_special = true; break; case 'b': c=(char)8; // backspace handle_special = true; break; case 'x':{ // hex 2-digit number int hex=0; char d; if((d=data[i++])=='{'){ while((d=data[i++])!='}'){ hex=(hex<<4)+ToHexDigit(d); } if(hex>0xffff) throw new PatternSyntaxException("\\x{<out of range>}"); } else{ hex=(ToHexDigit(d)<<4)+ToHexDigit(data[i++]); } c=(char)hex; handle_special = true; break; } case '0': // oct 2- or 3-digit number case 'o': // oct 2- or 3-digit number int oct=0; for(;;){ char d=data[i++]; if(d>='0' && d<='7'){ oct*=8; oct+=d-'0'; if(oct>0xffff) break; } else { i--; break; } } c=(char)oct; handle_special = true; break; case 'm': // decimal number -> char int dec=0; for(;;){ char d=data[i++]; if(d>='0' && d<='9'){ dec*=10; dec+=d-'0'; if(dec>0xffff) break; } else { i--; break; } } c=(char)dec; handle_special = true; break; case 'c': // ctrl-char c=(char)(data[i++]&0x1f); handle_special = true; break; case 'D': // non-digit negatigeClass = unicode ? UNONDIGIT : NONDIGIT; break; case 'S': // space negatigeClass =unicode ? UNONSPACE : NONSPACE; break; case 'W': // space negatigeClass = unicode ? UNONWORDCHAR : NONWORDCHAR; break; case 'd': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\d...]"); bs.SetDigit(unicode); continue; case 's': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\s...]"); bs.SetSpace(unicode); continue; case 'w': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]"); bs.SetWordChar(unicode); continue; case 'P': // \\P{..} inv=true; goto case 'p'; case 'p': // \\p{..} if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]"); if(sb==null) sb = new StringBuilder(); else sb.Length = 0; i=ParseName(data,i,_out,sb,skipspaces); Bitset nc=GetNamedClass(sb.ToString()); if(nc==null) throw new PatternSyntaxException("unknown named class: {"+sb+"}"); bs.Add(nc,inv); continue; default: handle_special = true; break; } if(handle_special) break; if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\"+c+"...]"); bs.Add(negatigeClass); continue; default: break; } if(prev<0){ prev=c; inRange=false; continue; } if(!inRange){ char c1=(char)prev; if(icase){ bs.SetChar(char.ToLower(c1)); bs.SetChar(char.ToUpper(c1)); } else bs.SetChar(c1); prev=c; } else { if(prev>c) throw new PatternSyntaxException("illegal range: "+prev+">"+c); char c0=(char)prev; inRange=false; prev=-1; if(icase){ bs.SetRange(char.ToLower(c0),char.ToLower(c)); bs.SetRange(char.ToUpper(c0),char.ToUpper(c)); } else bs.SetRange(c0,c); } } throw new PatternSyntaxException("unbalanced brackets in a class def"); }
private static void InitPosixClasses() { Bitset lower = new Bitset(); lower.SetRange('a','z'); RegisterClass("Lower", lower, posixClasses); Bitset upper = new Bitset(); upper.SetRange('A','Z'); RegisterClass("Upper",upper,posixClasses); Bitset ascii = new Bitset(); ascii.SetRange((char)0,(char)0x7f); RegisterClass("ASCII",ascii,posixClasses); Bitset alpha = new Bitset(); alpha.Add(lower); alpha.Add(upper); RegisterClass("Alpha",alpha,posixClasses); Bitset digit = new Bitset(); digit.SetRange('0','9'); RegisterClass("Digit",digit,posixClasses); Bitset alnum = new Bitset(); alnum.Add(alpha); alnum.Add(digit); RegisterClass("Alnum",alnum,posixClasses); Bitset punct = new Bitset(); punct.SetChars("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"); RegisterClass("Punct",punct,posixClasses); Bitset graph = new Bitset(); graph.Add(alnum); graph.Add(punct); RegisterClass("Graph",graph,posixClasses); RegisterClass("Print",graph,posixClasses); Bitset blank = new Bitset(); blank.SetChars(" \t"); RegisterClass("Blank",blank,posixClasses); Bitset cntrl = new Bitset(); cntrl.SetRange((char)0,(char)0x1f); cntrl.SetChar((char)0x7f); RegisterClass("Cntrl",cntrl,posixClasses); Bitset xdigit = new Bitset(); xdigit.SetRange('0','9'); xdigit.SetRange('a','f'); xdigit.SetRange('A','F'); RegisterClass("XDigit",xdigit,posixClasses); Bitset space = new Bitset(); space.SetChars(" \t\n\r\f\u000b"); RegisterClass("Space",space,posixClasses); }