internal void Intersect(Bitset bs, bool inverse) { Subtract(bs,!inverse); }
internal void Subtract(Bitset bs) { Subtract(bs,false); }
internal void Add(Bitset bs, bool inverse) { weight += AddImpl(this,bs,!bs.positive^inverse); }
internal void Intersect(Bitset bs) { Intersect(bs,false); }
internal static int ParseClass(char[] data, int i, int _out, Term term, bool icase, bool skipspaces, bool unicode, bool xml) { Bitset bs = new Bitset(); i = ParseClass(data,i,_out,bs,icase,skipspaces,unicode,xml); Bitset.Unify(bs,term); return i; }
internal void Add(Bitset bs) { Add(bs,false); }
private static void InitPosixClasses() { Bitset lower = new Bitset(); lower.SetRange('a','z'); RegisterClass("Lower", lower, posixClasses); Bitset upper = new Bitset(); upper.SetRange('A','Z'); RegisterClass("Upper",upper,posixClasses); Bitset ascii = new Bitset(); ascii.SetRange((char)0,(char)0x7f); RegisterClass("ASCII",ascii,posixClasses); Bitset alpha = new Bitset(); alpha.Add(lower); alpha.Add(upper); RegisterClass("Alpha",alpha,posixClasses); Bitset digit = new Bitset(); digit.SetRange('0','9'); RegisterClass("Digit",digit,posixClasses); Bitset alnum = new Bitset(); alnum.Add(alpha); alnum.Add(digit); RegisterClass("Alnum",alnum,posixClasses); Bitset punct = new Bitset(); punct.SetChars("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"); RegisterClass("Punct",punct,posixClasses); Bitset graph = new Bitset(); graph.Add(alnum); graph.Add(punct); RegisterClass("Graph",graph,posixClasses); RegisterClass("Print",graph,posixClasses); Bitset blank = new Bitset(); blank.SetChars(" \t"); RegisterClass("Blank",blank,posixClasses); Bitset cntrl = new Bitset(); cntrl.SetRange((char)0,(char)0x1f); cntrl.SetChar((char)0x7f); RegisterClass("Cntrl",cntrl,posixClasses); Bitset xdigit = new Bitset(); xdigit.SetRange('0','9'); xdigit.SetRange('a','f'); xdigit.SetRange('A','F'); RegisterClass("XDigit",xdigit,posixClasses); Bitset space = new Bitset(); space.SetChars(" \t\n\r\f\u000b"); RegisterClass("Space",space,posixClasses); }
private static int SubtractImpl(Bitset bs1, Bitset bs2, bool inv) { int s=0; if(!bs1.isLarge && !bs2.isLarge && !inv){ bool[] bits1,bits2; if((bits2=bs2.block0)!=null){ bits1=bs1.block0; if(bits1==null) return 0; s += Subtract(bits1,bits2,0,BLOCK_SIZE-1,false); } } else { if(!bs1.isLarge) bs1.EnableLargeMode(); if(!bs2.isLarge) bs2.EnableLargeMode(); s += Block.Subtract(bs1.blocks,bs2.blocks,0,BLOCK_COUNT-1,inv); } return s; }
private static void InitNamedCategory(string name, int[] cats) { Bitset bs = new Bitset(); foreach(int cat in cats){ bs.SetCategory(cat); } namedClasses[name] = bs; }
private static void InitNames() { InitNamedCategory("C",new int[]{UnicodeConstants.Cn,UnicodeConstants.Cc,UnicodeConstants.Cf,UnicodeConstants.Co,UnicodeConstants.Cs}); InitNamedCategory("Cn",UnicodeConstants.Cn); InitNamedCategory("Cc",UnicodeConstants.Cc); InitNamedCategory("Cf",UnicodeConstants.Cf); InitNamedCategory("Co",UnicodeConstants.Co); InitNamedCategory("Cs",UnicodeConstants.Cs); InitNamedCategory("L",new int[]{UnicodeConstants.Lu,UnicodeConstants.Ll,UnicodeConstants.Lt,UnicodeConstants.Lm,UnicodeConstants.Lo}); InitNamedCategory("Lu",UnicodeConstants.Lu); InitNamedCategory("Ll",UnicodeConstants.Ll); InitNamedCategory("Lt",UnicodeConstants.Lt); InitNamedCategory("Lm",UnicodeConstants.Lm); InitNamedCategory("Lo",UnicodeConstants.Lo); InitNamedCategory("M",new int[]{UnicodeConstants.Mn,UnicodeConstants.Me,UnicodeConstants.Mc}); InitNamedCategory("Mn",UnicodeConstants.Mn); InitNamedCategory("Me",UnicodeConstants.Me); InitNamedCategory("Mc",UnicodeConstants.Mc); InitNamedCategory("N",new int[]{UnicodeConstants.Nd,UnicodeConstants.Nl,UnicodeConstants.No}); InitNamedCategory("Nd",UnicodeConstants.Nd); InitNamedCategory("Nl",UnicodeConstants.Nl); InitNamedCategory("No",UnicodeConstants.No); InitNamedCategory("Z",new int[]{UnicodeConstants.Zs,UnicodeConstants.Zl,UnicodeConstants.Zp}); InitNamedCategory("Zs",UnicodeConstants.Zs); InitNamedCategory("Zl",UnicodeConstants.Zl); InitNamedCategory("Zp",UnicodeConstants.Zp); InitNamedCategory("P",new int[]{UnicodeConstants.Pd,UnicodeConstants.Ps,UnicodeConstants.Pi,UnicodeConstants.Pe,UnicodeConstants.Pf,UnicodeConstants.Pc,UnicodeConstants.Po}); InitNamedCategory("Pd",UnicodeConstants.Pd); InitNamedCategory("Ps",UnicodeConstants.Ps); InitNamedCategory("Pi",UnicodeConstants.Pi); InitNamedCategory("Pe",UnicodeConstants.Pe); InitNamedCategory("Pf",UnicodeConstants.Pf); InitNamedCategory("Pc",UnicodeConstants.Pc); InitNamedCategory("Po",UnicodeConstants.Po); InitNamedCategory("S",new int[]{UnicodeConstants.Sm,UnicodeConstants.Sc,UnicodeConstants.Sk,UnicodeConstants.So}); InitNamedCategory("Sm",UnicodeConstants.Sm); InitNamedCategory("Sc",UnicodeConstants.Sc); InitNamedCategory("Sk",UnicodeConstants.Sk); InitNamedCategory("So",UnicodeConstants.So); Bitset bs = new Bitset(); bs.SetCategory(UnicodeConstants.Cn); RegisterClass("UNASSIGNED",bs,unicodeCategories); bs = new Bitset(); bs.SetCategory(UnicodeConstants.Cn); bs.SetPositive(false); RegisterClass("ASSIGNED",bs,unicodeCategories); string[] results = blockData.Split(new char[]{'.', ',', ':', ';'}, System.StringSplitOptions.RemoveEmptyEntries); int ix = 0; while(ix < results.Length) { int first = Convert.ToInt32(results[ix++], 16); int last = Convert.ToInt32(results[ix++], 16); string name = results[ix++]; InitNamedBlock(name,first,last); } InitNamedBlock("ALL",0,0xffff); namesInitialized=true; }
private static void InitNamedCategory(string name, int cat) { Bitset bs = new Bitset(); bs.SetCategory(cat); RegisterClass(name,bs,unicodeCategories); }
private static void InitNamedBlock(string name, int first, int last) { if(first<char.MinValue || first>char.MaxValue) throw new ArgumentException("wrong start code ("+first+") in block "+name); if(last<char.MinValue || last>char.MaxValue) throw new ArgumentException("wrong end code ("+last+") in block "+name); if(last<first) throw new ArgumentException("end code < start code in block "+name); Bitset bs; if(namedClasses.ContainsKey(name)) { bs = namedClasses[name]; } else { bs = new Bitset(); RegisterClass(name,bs,unicodeBlocks); } bs.SetRange((char)first,(char)last); }
internal static void ParseGroup(char[] data, int i, int _out, Term term, bool icase, bool skipspaces, bool unicode, bool xml) { Bitset sum = new Bitset(); Bitset bs = new Bitset(); int mode = ADD; for(;i<_out;){ switch(data[i++]){ case '+': mode=ADD; continue; case '-': mode=SUBTRACT; continue; case '&': mode=INTERSECT; continue; case '[': bs.Reset(); i=ParseClass(data,i,_out,bs,icase,skipspaces,unicode,xml); switch(mode){ case ADD: sum.Add(bs); break; case SUBTRACT: sum.Subtract(bs); break; case INTERSECT: sum.Intersect(bs); break; } continue; case ')': throw new PatternSyntaxException("unbalanced class group"); } } Bitset.Unify(sum,term); }
internal void Subtract(Bitset bs, bool inverse) { weight += SubtractImpl(this, bs, !bs.positive^inverse); }
private static int ParseClass(char[] data, int i, int _out, Bitset bs, bool icase, bool skipspaces, bool unicode, bool xml) { char c; int prev=-1; bool isFirst=true, setFirst=false, inRange=false; Bitset bs1=null; StringBuilder sb=null; for(;i<_out;isFirst=setFirst,setFirst=false){ switch(c=data[i++]){ case ']': if(isFirst) break; //treat as normal char if(inRange){ bs.SetChar('-'); } if(prev>=0){ char c1=(char)prev; if(icase){ bs.SetChar(char.ToLower(c1)); bs.SetChar(char.ToUpper(c1)); } else bs.SetChar(c1); } return i; case '-': if(isFirst) break; if(inRange) break; inRange=true; continue; case '[': if(inRange && xml) { if(prev>=0) bs.SetChar((char)prev); if(bs1==null) bs1 = new Bitset(); else bs1.Reset(); i=ParseClass(data,i,_out,bs1,icase,skipspaces,unicode,xml); bs.Subtract(bs1); inRange=false; prev=-1; continue; } else break; case '^': if(isFirst){ bs.SetPositive(false); setFirst=true; continue; } break; case ' ': case '\r': case '\n': case '\t': case '\f': if(skipspaces) continue; else break; case '\\': Bitset negatigeClass = null; bool inv = false; bool handle_special = false; switch(c=data[i++]){ case 'r': c='\r'; handle_special = true; break; case 'n': c='\n'; handle_special = true; break; case 'e': c='\u001B'; handle_special = true; break; case 't': c='\t'; handle_special = true; break; case 'f': c='\f'; handle_special = true; break; case 'u': if(i>=_out-4) throw new PatternSyntaxException("incomplete escape sequence \\uXXXX"); c=(char)((ToHexDigit(c)<<12) +(ToHexDigit(data[i++])<<8) +(ToHexDigit(data[i++])<<4) +ToHexDigit(data[i++])); handle_special = true; break; case 'v': c=(char)((ToHexDigit(c)<<24)+ (ToHexDigit(data[i++])<<16)+ (ToHexDigit(data[i++])<<12)+ (ToHexDigit(data[i++])<<8)+ (ToHexDigit(data[i++])<<4)+ ToHexDigit(data[i++])); handle_special = true; break; case 'b': c=(char)8; // backspace handle_special = true; break; case 'x':{ // hex 2-digit number int hex=0; char d; if((d=data[i++])=='{'){ while((d=data[i++])!='}'){ hex=(hex<<4)+ToHexDigit(d); } if(hex>0xffff) throw new PatternSyntaxException("\\x{<out of range>}"); } else{ hex=(ToHexDigit(d)<<4)+ToHexDigit(data[i++]); } c=(char)hex; handle_special = true; break; } case '0': // oct 2- or 3-digit number case 'o': // oct 2- or 3-digit number int oct=0; for(;;){ char d=data[i++]; if(d>='0' && d<='7'){ oct*=8; oct+=d-'0'; if(oct>0xffff) break; } else { i--; break; } } c=(char)oct; handle_special = true; break; case 'm': // decimal number -> char int dec=0; for(;;){ char d=data[i++]; if(d>='0' && d<='9'){ dec*=10; dec+=d-'0'; if(dec>0xffff) break; } else { i--; break; } } c=(char)dec; handle_special = true; break; case 'c': // ctrl-char c=(char)(data[i++]&0x1f); handle_special = true; break; case 'D': // non-digit negatigeClass = unicode ? UNONDIGIT : NONDIGIT; break; case 'S': // space negatigeClass =unicode ? UNONSPACE : NONSPACE; break; case 'W': // space negatigeClass = unicode ? UNONWORDCHAR : NONWORDCHAR; break; case 'd': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\d...]"); bs.SetDigit(unicode); continue; case 's': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\s...]"); bs.SetSpace(unicode); continue; case 'w': // digit if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]"); bs.SetWordChar(unicode); continue; case 'P': // \\P{..} inv=true; goto case 'p'; case 'p': // \\p{..} if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]"); if(sb==null) sb = new StringBuilder(); else sb.Length = 0; i=ParseName(data,i,_out,sb,skipspaces); Bitset nc=GetNamedClass(sb.ToString()); if(nc==null) throw new PatternSyntaxException("unknown named class: {"+sb+"}"); bs.Add(nc,inv); continue; default: handle_special = true; break; } if(handle_special) break; if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\"+c+"...]"); bs.Add(negatigeClass); continue; default: break; } if(prev<0){ prev=c; inRange=false; continue; } if(!inRange){ char c1=(char)prev; if(icase){ bs.SetChar(char.ToLower(c1)); bs.SetChar(char.ToUpper(c1)); } else bs.SetChar(c1); prev=c; } else { if(prev>c) throw new PatternSyntaxException("illegal range: "+prev+">"+c); char c0=(char)prev; inRange=false; prev=-1; if(icase){ bs.SetRange(char.ToLower(c0),char.ToLower(c)); bs.SetRange(char.ToUpper(c0),char.ToUpper(c)); } else bs.SetRange(c0,c); } } throw new PatternSyntaxException("unbalanced brackets in a class def"); }
private static int AddImpl(Bitset bs1, Bitset bs2, bool inv) { int s=0; if(!bs1.isLarge && !bs2.isLarge && !inv){ if(bs2.block0!=null){ bool[] bits = bs1.block0; if(bits==null) bs1.block0 = bits = new bool[BLOCK_SIZE]; s += Add(bits,bs2.block0,0,BLOCK_SIZE-1,false); } } else { if(!bs1.isLarge) bs1.EnableLargeMode(); if(!bs2.isLarge) bs2.EnableLargeMode(); s+=Block.Add(bs1.blocks,bs2.blocks,0,BLOCK_COUNT-1,inv); } return s; }
private static void RegisterClass(string name, Bitset cls, IList<string> realm) { namedClasses[name] = cls; if(!realm.Contains(name)) realm.Add(name); }
internal static void Unify(Bitset bs, Term term) { if(bs.isLarge){ term.type = Term.TermType.BITSET2; term.bitset2 = Block.ToBitset2(bs.blocks); } else{ term.type = Term.TermType.BITSET; term.bitset = bs.block0 == null ? emptyBlock0 : bs.block0; } term.inverse = !bs.positive; term.weight = bs.positive ? bs.weight : MAX_WEIGHT-bs.weight; }
internal static void MakeICase(Term term, char c) { Bitset bs = new Bitset(); bs.SetChar(char.ToLower(c)); bs.SetChar(char.ToUpper(c)); Bitset.Unify(bs,term); }