/// <summary> /// Add a transition from NState "this" /// to NState "nxt", for each character /// value in the leaf range list. /// If the characters are packed, transform /// from character ordinal to equivalence class /// ordinal. /// </summary> /// <param name="leaf">The regex leaf node</param> /// <param name="nxt">The destination state</param> public void AddClsTrans(Leaf leaf, NState nxt) { BitArray cls = new BitArray(myNfaInst.MaxSym); if (myNfaInst.Pack) { foreach (int ord in leaf.rangeLit.equivClasses) cls[ord] = true; } else { foreach (CharRange rng in leaf.rangeLit.list.Ranges) for (int i = rng.minChr; i <= rng.maxChr; i++) cls[i] = true; if (leaf.rangeLit.list.IsInverted) cls = cls.Not(); } AddClsTrans(cls, nxt); }
/// <summary> /// Add a transition from NState "this" /// to NState "nxt", for each character /// value in the leaf range list. /// If the characters are packed, transform /// from character ordinal to equivalence class /// ordinal. /// </summary> /// <param name="leaf">The regex leaf node</param> /// <param name="nxt">The destination state</param> public void AddClsTrans(Leaf leaf, NState nxt) { if (myNfaInst.parent.task.CaseAgnostic) { leaf.rangeLit.list = leaf.rangeLit.list.MakeCaseAgnosticList(); leaf.rangeLit.list.Canonicalize(); } BitArray cls = new BitArray(myNfaInst.MaxSym); if (myNfaInst.Pack) { foreach (int ord in leaf.rangeLit.equivClasses) cls[ord] = true; } else { foreach (CharRange rng in leaf.rangeLit.list.Ranges) for (int i = rng.minChr; i <= rng.maxChr; i++) cls[i] = true; if (leaf.rangeLit.list.IsInverted) cls = cls.Not(); } AddClsTrans(cls, nxt); }
// // For version 1.0.1 recognize any line-end character if /unicode // static void AddAnchorContext(NfsaInstance nInst, NState endS, RuleDesc rule) { NState nEnd = nInst.MkState(); Leaf temp = new Leaf(RegOp.charClass); temp.rangeLit = RangeLiteral.RightAnchors; nInst.MakePath(temp, endS, nEnd); nInst.MarkAccept(nEnd, rule); nEnd.rhCntx = 1; }
// CharClass : "[" ["^"] {code | code "-" code | FilteredClass}+ "]" ; internal Leaf CharClass() { // Assert chr == '[' // Need to build a new string taking into account char escapes Leaf leaf = new Leaf( RegOp.charClass ); bool invert = false; scan(); // read past '[' if (!esc && chr == '^') { invert = true; scan(); // read past '^' } leaf.rangeLit = new RangeLiteral( invert ); // Special case of '-' at start, taken as ordinary class member. // This is correct for LEX specification, but is undocumented // behavior for FLEX. GPLEX gives a friendly warning, just in // case this is actually a typographical error. if (!esc && chr == '-') { Warn( 113, index - 1, 1, "-" ); leaf.rangeLit.list.Add( new CharRange( '-' ) ); scan(); // read past -' } while (chr != NUL && (esc || chr != ']')) { int lhCodePoint; int startIx = index - 1; // save starting index for error reporting lhCodePoint = (esc ? EscapedChar() : CodePoint()); if (!esc && lhCodePoint == (int)'-') Error( 82, startIx, index - startIx, null ); // // There are three possible elements here: // * a singleton character // * a character range // * a filtered class like [:IsLetter:] // if (chr == '[' && !esc && peek() == ':') // character category { Leaf rslt = FilteredClass(); leaf.Merge( rslt ); } else { scan(); if (!esc && chr == '-') // character range { scan(); if (!esc && chr == ']') { // Special case of '-' at end, taken as ordinary class member. // This is correct for LEX specification, but is undocumented // behavior for FLEX. GPLEX gives a friendly warning, just in // case this is actually a typographical error. leaf.rangeLit.list.Add( new CharRange( lhCodePoint ) ); leaf.rangeLit.list.Add( new CharRange( '-' ) ); //Error(81, idx, index - idx - 1); Warn( 114, startIx, index - startIx - 1, String.Format( CultureInfo.InvariantCulture, "'{0}','{1}'", CharacterUtilities.Map( lhCodePoint ), '-' ) ); } else { int rhCodePoint = (esc ? EscapedChar() : CodePoint()); if (rhCodePoint < lhCodePoint) Error( 54, startIx, index - startIx, null ); scan(); leaf.rangeLit.list.Add( new CharRange( lhCodePoint, rhCodePoint ) ); } } else // character singleton { leaf.rangeLit.list.Add( new CharRange( lhCodePoint ) ); } } } checkAndScan( ']' ); leaf.rangeLit.list.Canonicalize(); return leaf; }
// Primitive : CharClassExpr | NamedRegexReference | "." | escapedChar | char ; internal RegExTree Primitive() { RegExTree tmp; if (!esc && chr == '[') tmp = CharClassExpr(); else if (!esc && chr == '{' && !Char.IsDigit( peek() )) tmp = UseRegexRef(); else if (!esc && chr == '.') { Leaf leaf = new Leaf( RegOp.charClass ); leaf.rangeLit = new RangeLiteral( true ); scan(); leaf.rangeLit.list.Add( new CharRange( '\n' ) ); tmp = leaf; } // Remaining cases are: // 1. escaped character (maybe beyond ffff limit) // 2. ordinary unicode character // 3. maybe a surrogate pair in future else if (esc) { tmp = new Leaf( EscapedChar() ); scan(); } else { tmp = new Leaf( (int)chr ); scan(); } return tmp; }
internal void Intersect( Leaf rhOperand ) { this.rangeLit.list = this.rangeLit.list.AND( rhOperand.rangeLit.list ); }
internal void Subtract( Leaf subtrahend ) { this.rangeLit.list = this.rangeLit.list.SUB( subtrahend.rangeLit.list ); }
internal void Merge( Leaf addend ) { foreach (CharRange rng in addend.rangeLit.list.Ranges) this.rangeLit.list.Add( rng ); }