SearchEntry top; //stack entry #endregion Fields #region Constructors internal Matcher(Pattern regex) { this.re=regex; int memregCount, counterCount, lookaheadCount; if((memregCount=regex.memregs)>0) { MemReg[] memregs = new MemReg[memregCount]; for(int i=0;i<memregCount;i++){ memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition } this.memregs=memregs; } if((counterCount=regex.counters)>0) counters = new int[counterCount]; if((lookaheadCount=regex.lookaheads)>0) { LAEntry[] lookaheads=new LAEntry[lookaheadCount]; for(int i=0;i<lookaheadCount;i++){ lookaheads[i] = new LAEntry(); } this.lookaheads = lookaheads; } first = new SearchEntry(); defaultEntry = new SearchEntry(); minQueueLength = regex.stringRepr.Length/2; // just evaluation!!! }
private bool Search(int anchors) { called=true; int end=this.end; int offset=this.offset; char[] data=this.data; int wOffset=this.wOffset; int wEnd=this.wEnd; MemReg[] memregs=this.memregs; int[] counters=this.counters; LAEntry[] lookaheads=this.lookaheads; SearchEntry defaultEntry=this.defaultEntry; SearchEntry first=this.first; SearchEntry top=this.top; SearchEntry actual=null; int cnt,regLen; int i; bool matchEnd=(anchors&ANCHOR_END)>0; bool allowIncomplete=(anchors&ACCEPT_INCOMPLETE)>0; Pattern re=this.re; Term root=re.root; Term term; if(top==null) { if((anchors&ANCHOR_START)>0) { term=re.root0; //raw root root=startAnchor; } else if((anchors&ANCHOR_LASTMATCH)>0) { term=re.root0; //raw root root=lastMatchAnchor; } else { term=root; //optimized root } i=wOffset; actual=first; SearchEntry.PopState(defaultEntry,memregs,counters); } else { top=(actual=top).sub; term=actual.term; i=actual.index; SearchEntry.PopState(actual,memregs,counters); } cnt=actual.cnt; regLen=actual.regLen; while(wOffset<=end){ matchHere: for(;;){ int memreg,cntreg; char c; switch(term.type){ case Term.TermType.FIND:{ int jump=Find(data,i+term.distance,end,term.target); //don't eat the last match if(jump<0) goto breakMain; //return false i+=jump; wOffset=i; //force window to move if(term.eat){ if(i==end) break; i++; } term=term.next; goto matchHere; } case Term.TermType.FINDREG:{ MemReg mr=memregs[term.target.memreg]; int sampleOff=mr._in; int sampleLen=mr._out-sampleOff; if(sampleOff<0 || sampleLen<0) { break; } else if (sampleLen==0) { term=term.next; goto matchHere; } int jump=FindReg(data,i+term.distance,sampleOff,sampleLen,term.target,end); //don't eat the last match if(jump<0) goto breakMain; //return false i+=jump; wOffset=i; //force window to move if(term.eat) { i+=sampleLen; if(i>end) break; } term=term.next; goto matchHere; } case Term.TermType.VOID: term=term.next; goto matchHere; case Term.TermType.CHAR: if(i>=end || data[i]!=term.c) break; i++; term=term.next; goto matchHere; case Term.TermType.ANY_CHAR: if(i>=end) break; i++; term=term.next; goto matchHere; case Term.TermType.ANY_CHAR_NE: if(i>=end || data[i]=='\n') break; i++; term=term.next; goto matchHere; case Term.TermType.END: if(i>=end) { //meets term=term.next; goto matchHere; } break; case Term.TermType.END_EOL: //perl's $ if(i>=end) { //meets term=term.next; goto matchHere; } else { bool matches= i>=end | ((i+1)==end && data[i]=='\n'); if(matches) { term=term.next; goto matchHere; } else break; } case Term.TermType.LINE_END: if(i>=end) { //meets term=term.next; goto matchHere; } else { if(data[i]=='\n'){ term=term.next; goto matchHere; } } break; case Term.TermType.START: //Perl's "^" if(i==offset) { //meets term=term.next; goto matchHere; } if(top!=null) break; if(term!=startAnchor) break; else goto breakMain; case Term.TermType.LAST_MATCH_END: if(i==wEnd || wEnd == -1) { //meets term=term.next; goto matchHere; } goto breakMain; //return false case Term.TermType.LINE_START: if(i==offset) { //meets term=term.next; goto matchHere; } else if(i<end) { if((c=data[i-1])=='\n') { term=term.next; goto matchHere; } } break; case Term.TermType.BITSET:{ if(i>=end) break; c=data[i]; if(!(c<=255 && term.bitset[c])^term.inverse) break; i++; term=term.next; goto matchHere; } case Term.TermType.BITSET2:{ if(i>=end) break; c=data[i]; bool[] arr=term.bitset2[c>>8]; if(arr==null || !arr[c&255]^term.inverse) break; i++; term=term.next; goto matchHere; } case Term.TermType.BOUNDARY:{ bool ch1Meets=false,ch2Meets=false; bool[] bitset=term.bitset; { int j=i-1; if(j<offset) goto test1; c = data[j]; ch1Meets= (c<256 && bitset[c]); } test1: { if(i>=end) goto test2; c = data[i]; ch2Meets = (c<256 && bitset[c]); } test2: if(ch1Meets^ch2Meets^term.inverse) { //meets term=term.next; goto matchHere; } else break; } case Term.TermType.UBOUNDARY:{ bool ch1Meets=false,ch2Meets=false; bool[][] bitset2=term.bitset2; { int j=i-1; if(j<offset) goto test1; c= data[j]; bool[] bits=bitset2[c>>8]; ch1Meets= bits!=null && bits[c&0xff]; } test1: { if(i>=end) goto test2; c= data[i]; bool[] bits=bitset2[c>>8]; ch2Meets= bits!=null && bits[c&0xff]; } test2: if(ch1Meets^ch2Meets^term.inverse){ //is boundary ^ inv term=term.next; goto matchHere; } else break; } case Term.TermType.DIRECTION:{ bool ch1Meets=false,ch2Meets=false; bool[] bitset=term.bitset; bool inv=term.inverse; int j=i-1; if(j>=offset){ c = data[j]; ch1Meets = c<256 && bitset[c]; } if(ch1Meets^inv) break; if(i<end){ c = data[i]; ch2Meets= c<256 && bitset[c]; } if(!ch2Meets^inv) break; term=term.next; goto matchHere; } case Term.TermType.UDIRECTION:{ bool ch1Meets=false,ch2Meets=false; bool[][] bitset2=term.bitset2; bool inv=term.inverse; int j=i-1; if(j>=offset) { c = data[j]; bool[] bits=bitset2[c>>8]; ch1Meets= bits!=null && bits[c&0xff]; } if(ch1Meets^inv) break; if(i<end) { c= data[i]; bool[] bits=bitset2[c>>8]; ch2Meets= bits!=null && bits[c&0xff]; } if(!ch2Meets^inv) break; term=term.next; goto matchHere; } case Term.TermType.REG:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr._in; int sampleOutside=mr._out; int rLen; if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0) { break; } else if(rLen==0) { term=term.next; goto matchHere; } if((i+rLen)>end) break; if(CompareRegions(data,sampleOffset,i,rLen,end)){ i+=rLen; term=term.next; goto matchHere; } break; } case Term.TermType.REG_I:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr._in; int sampleOutside=mr._out; int rLen; if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){ break; } else if(rLen==0) { term=term.next; goto matchHere; } if((i+rLen)>end) break; if(CompareRegionsI(data,sampleOffset,i,rLen,end)) { i+=rLen; term=term.next; goto matchHere; } break; } case Term.TermType.REPEAT_0_INF:{ if((cnt=Repeat(data,i,end,term.target))<=0){ term=term.next; continue; } i+=cnt; actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.REPEAT_MIN_INF:{ cnt=Repeat(data,i,end,term.target); if(cnt<term.minCount) break; i+=cnt; actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.REPEAT_MIN_MAX:{ int out1=end; int out2=i+term.maxCount; cnt=Repeat(data,i,out1<out2? out1: out2,term.target); if(cnt<term.minCount) break; i+=cnt; actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.REPEAT_REG_MIN_INF:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr._in; int sampleOutside=mr._out; int bitset; if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0) { break; } else if(bitset==0) { term=term.next; goto matchHere; } cnt=0; while(CompareRegions(data,i,sampleOffset,bitset,end)){ cnt++; i+=bitset; } if(cnt<term.minCount) break; actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual.regLen=bitset; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.REPEAT_REG_MIN_MAX:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr._in; int sampleOutside=mr._out; int bitset; if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0){ break; } else if(bitset==0) { term=term.next; goto matchHere; } cnt=0; int countBack=term.maxCount; while(countBack>0 && CompareRegions(data,i,sampleOffset,bitset,end)){ cnt++; i+=bitset; countBack--; } if(cnt<term.minCount) break; actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual.regLen=bitset; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.BACKTRACK_0: cnt=actual.cnt; if(cnt>0){ cnt--; i--; actual.cnt=cnt; actual.index=i; actual.term=term; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; case Term.TermType.BACKTRACK_MIN: cnt=actual.cnt; if(cnt>term.minCount) { cnt--; i--; actual.cnt=cnt; actual.index=i; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; case Term.TermType.BACKTRACK_FIND_MIN:{ cnt=actual.cnt; int minCnt; if(cnt>(minCnt=term.minCount)) { int start=i+term.distance; if(start>end){ int exceed=start-end; cnt-=exceed; if(cnt<=minCnt) break; i-=exceed; start=end; } int back=FindBack(data,i+term.distance,cnt-minCnt,term.target); if(back<0) break; if((cnt-=back)<=minCnt) { i-=back; if(term.eat)i++; term=term.next; continue; } i-=back; actual.cnt=cnt; actual.index=i; if(term.eat)i++; actual.term=term; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; } case Term.TermType.BACKTRACK_FINDREG_MIN:{ cnt=actual.cnt; int minCnt; if(cnt>(minCnt=term.minCount)){ int start=i+term.distance; if(start>end) { int exceed=start-end; cnt-=exceed; if(cnt<=minCnt) break; i-=exceed; start=end; } MemReg mr=memregs[term.target.memreg]; int sampleOff=mr._in; int sampleLen=mr._out-sampleOff; int back; if(sampleOff<0 || sampleLen<0) { cnt--; i--; actual.cnt=cnt; actual.index=i; actual.term=term; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else if(sampleLen==0) { back=1; } else { back=FindBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end); if(back<0) break; } cnt-=back; i-=back; actual.cnt=cnt; actual.index=i; if(term.eat)i+=sampleLen; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; } case Term.TermType.BACKTRACK_REG_MIN: cnt=actual.cnt; if(cnt>term.minCount) { regLen=actual.regLen; cnt--; i-=regLen; actual.cnt=cnt; actual.index=i; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; case Term.TermType.GROUP_IN:{ memreg=term.memreg; if(memreg>0) { memregs[memreg].tmp=i; //assume } term=term.next; continue; } case Term.TermType.GROUP_OUT: memreg=term.memreg; if(memreg>0){ MemReg mr=memregs[memreg]; SearchEntry.SaveMemregState((top!=null)? top: defaultEntry,memreg,mr); mr._in=mr.tmp; //commit mr._out=i; } term=term.next; continue; case Term.TermType.PLOOKBEHIND_IN:{ int tmp=i-term.distance; if(tmp<offset) break; LAEntry le=lookaheads[term.lookaheadId]; le.index=i; i=tmp; le.actual=actual; le.top=top; term=term.next; continue; } case Term.TermType.INDEPENDENT_IN: case Term.TermType.PLOOKAHEAD_IN:{ LAEntry le=lookaheads[term.lookaheadId]; le.index=i; le.actual=actual; le.top=top; term=term.next; continue; } case Term.TermType.LOOKBEHIND_CONDITION_OUT: case Term.TermType.LOOKAHEAD_CONDITION_OUT: case Term.TermType.PLOOKAHEAD_OUT: case Term.TermType.PLOOKBEHIND_OUT:{ LAEntry le=lookaheads[term.lookaheadId]; i=le.index; actual=le.actual; top=le.top; term=term.next; continue; } case Term.TermType.INDEPENDENT_OUT:{ LAEntry le=lookaheads[term.lookaheadId]; actual=le.actual; top=le.top; term=term.next; continue; } case Term.TermType.NLOOKBEHIND_IN:{ int tmp=i-term.distance; if(tmp<offset) { term=term.failNext; continue; } LAEntry le=lookaheads[term.lookaheadId]; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; i=tmp; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.NLOOKAHEAD_IN:{ LAEntry le=lookaheads[term.lookaheadId]; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.NLOOKBEHIND_OUT: case Term.TermType.NLOOKAHEAD_OUT:{ LAEntry le=lookaheads[term.lookaheadId]; actual=le.actual; top=le.top; break; } case Term.TermType.LOOKBEHIND_CONDITION_IN:{ int tmp=i-term.distance; if(tmp<offset){ term=term.failNext; continue; } LAEntry le=lookaheads[term.lookaheadId]; le.index=i; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } i=tmp; term=term.next; continue; } case Term.TermType.LOOKAHEAD_CONDITION_IN:{ LAEntry le=lookaheads[term.lookaheadId]; le.index=i; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.TermType.MEMREG_CONDITION:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr._in; int sampleOutside=mr._out; if(sampleOffset>=0 && sampleOutside>=0 && sampleOutside>=sampleOffset){ term=term.next; } else { term=term.failNext; } continue; } case Term.TermType.BRANCH_STORE_CNT_AUX1: actual.regLen=regLen; goto case Term.TermType.BRANCH_STORE_CNT; case Term.TermType.BRANCH_STORE_CNT: actual.cnt=cnt; goto case Term.TermType.BRANCH; case Term.TermType.BRANCH: actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null) { actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; case Term.TermType.SUCCESS: if(!matchEnd || i==end) { this.wOffset=memregs[0]._in=wOffset; this.wEnd=memregs[0]._out=i; this.top=top; return true; } else break; case Term.TermType.CNT_SET_0: cnt=0; term=term.next; continue; case Term.TermType.CNT_INC: cnt++; term=term.next; continue; case Term.TermType.CNT_GT_EQ: if(cnt>=term.maxCount) { term=term.next; continue; } else break; case Term.TermType.READ_CNT_LT: cnt=actual.cnt; if(cnt<term.maxCount) { term=term.next; continue; } else break; case Term.TermType.CRSTORE_CRINC:{ int cntvalue=counters[cntreg=term.cntreg]; SearchEntry.SaveCntState((top!=null)? top: defaultEntry,cntreg,cntvalue); counters[cntreg]=++cntvalue; term=term.next; continue; } case Term.TermType.CR_SET_0: counters[term.cntreg]=0; term=term.next; continue; case Term.TermType.CR_LT: if(counters[term.cntreg]<term.maxCount) { term=term.next; continue; } else break; case Term.TermType.CR_GT_EQ: if(counters[term.cntreg]>=term.maxCount) { term=term.next; continue; } else break; default: throw new System.Exception("unknown term type: "+term.type); } if(allowIncomplete && i==end) { return true; } if(top==null) { goto breakMatchHere; } top=(actual=top).sub; term=actual.term; i=actual.index; if(actual.isState) { SearchEntry.PopState(actual,memregs,counters); } } breakMatchHere: if(defaultEntry.isState) SearchEntry.PopState(defaultEntry,memregs,counters); term=root; i=++wOffset; } breakMain: this.wOffset=wOffset; this.top=top; return false; }
private void Flush() { top=null; defaultEntry.Reset(0); first.Reset(minQueueLength); for(int i=memregs.Length-1;i>0;i--){ MemReg mr=memregs[i]; mr._in=mr._out=-1; } for(int i=memregs.Length-1;i>0;i--){ MemReg mr=memregs[i]; mr._in=mr._out=-1; } called=false; }
internal static void SaveMemregState(SearchEntry entry, int memreg, MemReg mr) { entry.isState=true; MState current=entry.mCurrent; if(current==null) { MState head=entry.mHead; if(head==null) entry.mHead=entry.mCurrent=current=new MState(); else current=head; } else { MState next=current.next; if(next==null){ current.next=next=new MState(); next.prev=current; } current=next; } current.index=memreg; current._in=mr._in; current._out=mr._out; entry.mCurrent=current; }
internal static void SaveCntState(SearchEntry entry, int cntreg, int value) { entry.isState=true; CState current=entry.cCurrent; if(current==null) { CState head=entry.cHead; if(head==null) entry.cHead=entry.cCurrent=current=new CState(); else current=head; } else { CState next=current.next; if(next==null) { current.next=next=new CState(); next.prev=current; } current=next; } current.index=cntreg; current.value=value; entry.cCurrent=current; }
internal static void PopState(SearchEntry entry, MemReg[] memregs, int[] counters) { MState ms=entry.mCurrent; while(ms!=null){ MemReg mr=memregs[ms.index]; mr._in=ms._in; mr._out=ms._out; ms=ms.prev; } CState cs=entry.cCurrent; while(cs!=null) { counters[cs.index]=cs.value; cs=cs.prev; } entry.mCurrent=null; entry.cCurrent=null; entry.isState=false; }