private static int ParseClass(char[] data, int i, int _out, Bitset bs, bool icase, bool skipspaces, bool unicode, bool xml) { char c; int prev=-1; bool isFirst=true, setFirst=false, inRange=false; Bitset bs1=null; StringBuilder sb=null; for(;i<_out;isFirst=setFirst,setFirst=false){ switch(c=data[i++]){ case ']': if(isFirst) break; //treat as normal char if(inRange){ bs.SetChar('-'); } if(prev>=0){ char c1=(char)prev; if(icase){ bs.SetChar(char.ToLower(c1)); bs.SetChar(char.ToUpper(c1)); } else bs.SetChar(c1); } return i; case '-': if(isFirst) break; if(inRange) break; inRange=true; continue; case '[': if(inRange && xml) { if(prev>=0) bs.SetChar((char)prev); if(bs1==null) bs1 = new Bitset(); else bs1.Reset(); i=ParseClass(data,i,_out,bs1,icase,skipspaces,unicode,xml); bs.Subtract(bs1); inRange=false; prev=-1; continue; } else break; case '^': if(isFirst){ bs.SetPositive(false); setFirst=true; continue; } break; case ' ': case '\r': case '\n': case '\t': case '\f': if(skipspaces) continue; else break; case '\\': Bitset negatigeClass = null; bool inv = false; bool handle_special = false; switch(c=data[i++]){ case 'r': c='\r'; handle_special = true; break; case 'n': c='\n'; handle_special = true; break; case 'e': c='\u001B'; handle_special = true; break; case 't': c='\t'; handle_special = true; break; case 'f': c='\f'; handle_special = true; break; case 'u': if(i>=_out-4) throw new PatternSyntaxException("incomplete escape sequence \\uXXXX"); c=(char)((ToHexDigit(c)<<12) +(ToHexDigit(data[i++])<<8) +(ToHexDigit(data[i++])<<4) +ToHexDigit(data[i++])); handle_special = true; break; case 'v': c=(char)((ToHexDigit(c)<<24)+ (ToHexDigit(data[i++])<<16)+ (ToHexDigit(data[i++])<<12)+ (ToHexDigit(data[i++])<<8)+ (ToHexDigit(data[i++])<<4)+ ToHexDigit(data[i++])); handle_special = true; break; case 'b': c=(char)8; // backspace handle_special = true; break; case 'x':{ // hex 2-digit number int hex=0; char d; if((d=data[i++])=='{'){ while((d=data[i++])!='}'){ hex=(hex<<4)+ToHexDigit(d); } if(hex>0xffff) throw new PatternSyntaxException("\\x{<out of range>}"); } else{ hex=(ToHexDigit(d)<<4)+ToHexDigit(data[i++]); } c=(char)hex; handle_special = true; break; } case '0': // oct 2- or 3-digit number case 'o': // oct 2- or 3-digit number int oct=0; for(;;){ char d=data[i++]; if(d>='0' && d<='7'){ oct*=8; oct+=d-'0'; if(oct>0xffff) break; } else { i--; break; } } c=(char)oct; handle_special = true; break; case 'm': // decimal number -> char int dec=0; for(;;){ char d=data[i++]; if(d>='0' && d<='9'){ dec*=10; dec+=d-'0'; if(dec>0xffff) break; } else { i--; break; } } c=(char)dec; handle_special = true; break; case 'c': // ctrl-char c=(char)(data[i++]&0x1f); handle_special = true; break; case 'D': // non-digit negatigeClass = unicode ? UNONDIGIT : NONDIGIT; break; case 'S': // space negatigeClass =unicode ? UNONSPACE : NONSPACE; break; case 'W': // space negatigeClass = unicode ? UNONWORDCHAR : NONWORDCHAR; break; case 'd': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\d...]"); bs.SetDigit(unicode); continue; case 's': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\s...]"); bs.SetSpace(unicode); continue; case 'w': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]"); bs.SetWordChar(unicode); continue; case 'P': // \\P{..} inv=true; goto case 'p'; case 'p': // \\p{..} if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]"); if(sb==null) sb = new StringBuilder(); else sb.Length = 0; i=ParseName(data,i,_out,sb,skipspaces); Bitset nc=GetNamedClass(sb.ToString()); if(nc==null) throw new PatternSyntaxException("unknown named class: {"+sb+"}"); bs.Add(nc,inv); continue; default: handle_special = true; break; } if(handle_special) break; if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\"+c+"...]"); bs.Add(negatigeClass); continue; default: break; } if(prev<0){ prev=c; inRange=false; continue; } if(!inRange){ char c1=(char)prev; if(icase){ bs.SetChar(char.ToLower(c1)); bs.SetChar(char.ToUpper(c1)); } else bs.SetChar(c1); prev=c; } else { if(prev>c) throw new PatternSyntaxException("illegal range: "+prev+">"+c); char c0=(char)prev; inRange=false; prev=-1; if(icase){ bs.SetRange(char.ToLower(c0),char.ToLower(c)); bs.SetRange(char.ToUpper(c0),char.ToUpper(c)); } else bs.SetRange(c0,c); } } throw new PatternSyntaxException("unbalanced brackets in a class def"); }
private static void InitNames() { InitNamedCategory("C",new int[]{UnicodeConstants.Cn,UnicodeConstants.Cc,UnicodeConstants.Cf,UnicodeConstants.Co,UnicodeConstants.Cs}); InitNamedCategory("Cn",UnicodeConstants.Cn); InitNamedCategory("Cc",UnicodeConstants.Cc); InitNamedCategory("Cf",UnicodeConstants.Cf); InitNamedCategory("Co",UnicodeConstants.Co); InitNamedCategory("Cs",UnicodeConstants.Cs); InitNamedCategory("L",new int[]{UnicodeConstants.Lu,UnicodeConstants.Ll,UnicodeConstants.Lt,UnicodeConstants.Lm,UnicodeConstants.Lo}); InitNamedCategory("Lu",UnicodeConstants.Lu); InitNamedCategory("Ll",UnicodeConstants.Ll); InitNamedCategory("Lt",UnicodeConstants.Lt); InitNamedCategory("Lm",UnicodeConstants.Lm); InitNamedCategory("Lo",UnicodeConstants.Lo); InitNamedCategory("M",new int[]{UnicodeConstants.Mn,UnicodeConstants.Me,UnicodeConstants.Mc}); InitNamedCategory("Mn",UnicodeConstants.Mn); InitNamedCategory("Me",UnicodeConstants.Me); InitNamedCategory("Mc",UnicodeConstants.Mc); InitNamedCategory("N",new int[]{UnicodeConstants.Nd,UnicodeConstants.Nl,UnicodeConstants.No}); InitNamedCategory("Nd",UnicodeConstants.Nd); InitNamedCategory("Nl",UnicodeConstants.Nl); InitNamedCategory("No",UnicodeConstants.No); InitNamedCategory("Z",new int[]{UnicodeConstants.Zs,UnicodeConstants.Zl,UnicodeConstants.Zp}); InitNamedCategory("Zs",UnicodeConstants.Zs); InitNamedCategory("Zl",UnicodeConstants.Zl); InitNamedCategory("Zp",UnicodeConstants.Zp); InitNamedCategory("P",new int[]{UnicodeConstants.Pd,UnicodeConstants.Ps,UnicodeConstants.Pi,UnicodeConstants.Pe,UnicodeConstants.Pf,UnicodeConstants.Pc,UnicodeConstants.Po}); InitNamedCategory("Pd",UnicodeConstants.Pd); InitNamedCategory("Ps",UnicodeConstants.Ps); InitNamedCategory("Pi",UnicodeConstants.Pi); InitNamedCategory("Pe",UnicodeConstants.Pe); InitNamedCategory("Pf",UnicodeConstants.Pf); InitNamedCategory("Pc",UnicodeConstants.Pc); InitNamedCategory("Po",UnicodeConstants.Po); InitNamedCategory("S",new int[]{UnicodeConstants.Sm,UnicodeConstants.Sc,UnicodeConstants.Sk,UnicodeConstants.So}); InitNamedCategory("Sm",UnicodeConstants.Sm); InitNamedCategory("Sc",UnicodeConstants.Sc); InitNamedCategory("Sk",UnicodeConstants.Sk); InitNamedCategory("So",UnicodeConstants.So); Bitset bs = new Bitset(); bs.SetCategory(UnicodeConstants.Cn); RegisterClass("UNASSIGNED",bs,unicodeCategories); bs = new Bitset(); bs.SetCategory(UnicodeConstants.Cn); bs.SetPositive(false); RegisterClass("ASSIGNED",bs,unicodeCategories); string[] results = blockData.Split(new char[]{'.', ',', ':', ';'}, System.StringSplitOptions.RemoveEmptyEntries); int ix = 0; while(ix < results.Length) { int first = Convert.ToInt32(results[ix++], 16); int last = Convert.ToInt32(results[ix++], 16); string name = results[ix++]; InitNamedBlock(name,first,last); } InitNamedBlock("ALL",0,0xffff); namesInitialized=true; }