Наследование: RegExTree
Пример #1
0
 /// <summary>
 /// Add a transition from NState "this"
 /// to NState "nxt", for each character
 /// value in the leaf range list.
 /// If the characters are packed, transform 
 /// from character ordinal to equivalence class 
 /// ordinal.
 /// </summary>
 /// <param name="leaf">The regex leaf node</param>
 /// <param name="nxt">The destination state</param>
 public void AddClsTrans(Leaf leaf, NState nxt)
 {
     BitArray cls = new BitArray(myNfaInst.MaxSym);
     if (myNfaInst.Pack)
     {
         foreach (int ord in leaf.rangeLit.equivClasses)
             cls[ord] = true;
     }
     else
     {
         foreach (CharRange rng in leaf.rangeLit.list.Ranges)
             for (int i = rng.minChr; i <= rng.maxChr; i++)
                 cls[i] = true;
         if (leaf.rangeLit.list.IsInverted)
             cls = cls.Not();
     }
     AddClsTrans(cls, nxt);
 }
Пример #2
0
            /// <summary>
            /// Add a transition from NState "this"
            /// to NState "nxt", for each character
            /// value in the leaf range list.
            /// If the characters are packed, transform 
            /// from character ordinal to equivalence class 
            /// ordinal.
            /// </summary>
            /// <param name="leaf">The regex leaf node</param>
            /// <param name="nxt">The destination state</param>
            public void AddClsTrans(Leaf leaf, NState nxt)
            {
                if (myNfaInst.parent.task.CaseAgnostic) {
                    leaf.rangeLit.list = leaf.rangeLit.list.MakeCaseAgnosticList();
                    leaf.rangeLit.list.Canonicalize();
                }

                BitArray cls = new BitArray(myNfaInst.MaxSym);
                if (myNfaInst.Pack)
                {
                    foreach (int ord in leaf.rangeLit.equivClasses)
                        cls[ord] = true;
                }
                else
                {
                    foreach (CharRange rng in leaf.rangeLit.list.Ranges)
                        for (int i = rng.minChr; i <= rng.maxChr; i++)
                            cls[i] = true;
                    if (leaf.rangeLit.list.IsInverted)
                        cls = cls.Not();
                }
                AddClsTrans(cls, nxt);
            }
Пример #3
0
 //
 // For version 1.0.1 recognize any line-end character if /unicode
 //
 static void AddAnchorContext(NfsaInstance nInst, NState endS, RuleDesc rule)
 {
     NState nEnd = nInst.MkState();
     Leaf temp = new Leaf(RegOp.charClass);
     temp.rangeLit = RangeLiteral.RightAnchors;
     nInst.MakePath(temp, endS, nEnd);
     nInst.MarkAccept(nEnd, rule);
     nEnd.rhCntx = 1;
 }
Пример #4
0
            // CharClass : "[" ["^"] {code | code "-" code | FilteredClass}+ "]" ;
            internal Leaf CharClass() {
                // Assert chr == '['
                // Need to build a new string taking into account char escapes
                Leaf leaf = new Leaf( RegOp.charClass );
                bool invert = false;
                scan();                           // read past '['
                if (!esc && chr == '^') {
                    invert = true;
                    scan();                       // read past '^'
                }
                leaf.rangeLit = new RangeLiteral( invert );
                // Special case of '-' at start, taken as ordinary class member.
                // This is correct for LEX specification, but is undocumented
                // behavior for FLEX. GPLEX gives a friendly warning, just in
                // case this is actually a typographical error.
                if (!esc && chr == '-') {
                    Warn( 113, index - 1, 1, "-" );
                    leaf.rangeLit.list.Add( new CharRange( '-' ) );
                    scan();                       // read past -'
                }

                while (chr != NUL && (esc || chr != ']')) {
                    int lhCodePoint;
                    int startIx = index - 1; // save starting index for error reporting
                    lhCodePoint = (esc ? EscapedChar() : CodePoint());
                    if (!esc && lhCodePoint == (int)'-')
                        Error( 82, startIx, index - startIx, null );
                    //
                    // There are three possible elements here:
                    //  * a singleton character
                    //  * a character range
                    //  * a filtered class like [:IsLetter:]
                    //
                    if (chr == '[' && !esc && peek() == ':') // character category
                    {
                        Leaf rslt = FilteredClass();
                        leaf.Merge( rslt );
                    }
                    else {
                        scan();
                        if (!esc && chr == '-')             // character range
                        {
                            scan();
                            if (!esc && chr == ']') {
                                // Special case of '-' at end, taken as ordinary class member.
                                // This is correct for LEX specification, but is undocumented
                                // behavior for FLEX. GPLEX gives a friendly warning, just in
                                // case this is actually a typographical error.
                                leaf.rangeLit.list.Add( new CharRange( lhCodePoint ) );
                                leaf.rangeLit.list.Add( new CharRange( '-' ) );
                                //Error(81, idx, index - idx - 1);
                                Warn( 114, startIx, index - startIx - 1, String.Format(
                                    CultureInfo.InvariantCulture,
                                    "'{0}','{1}'",
                                    CharacterUtilities.Map( lhCodePoint ),
                                    '-' ) );
                            }
                            else {
                                int rhCodePoint = (esc ? EscapedChar() : CodePoint());
                                if (rhCodePoint < lhCodePoint)
                                    Error( 54, startIx, index - startIx, null );
                                scan();
                                leaf.rangeLit.list.Add( new CharRange( lhCodePoint, rhCodePoint ) );
                            }
                        }
                        else                               // character singleton
                        {
                            leaf.rangeLit.list.Add( new CharRange( lhCodePoint ) );
                        }
                    }
                }
                checkAndScan( ']' );
                leaf.rangeLit.list.Canonicalize();
                return leaf;
            }
Пример #5
0
 // Primitive : CharClassExpr | NamedRegexReference | "." | escapedChar | char ;
 internal RegExTree Primitive() {
     RegExTree tmp;
     if (!esc && chr == '[')
         tmp = CharClassExpr();
     else if (!esc && chr == '{' && !Char.IsDigit( peek() ))
         tmp = UseRegexRef();
     else if (!esc && chr == '.') {
         Leaf leaf = new Leaf( RegOp.charClass );
         leaf.rangeLit = new RangeLiteral( true );
         scan();
         leaf.rangeLit.list.Add( new CharRange( '\n' ) );
         tmp = leaf;
     }
     // Remaining cases are:
     //  1. escaped character (maybe beyond ffff limit)
     //  2. ordinary unicode character
     //  3. maybe a surrogate pair in future
     else if (esc) {
         tmp = new Leaf( EscapedChar() );
         scan();
     }
     else {
         tmp = new Leaf( (int)chr );
         scan();
     }
     return tmp;
 }
Пример #6
0
 internal void Intersect( Leaf rhOperand ) {
     this.rangeLit.list = this.rangeLit.list.AND( rhOperand.rangeLit.list );
 }
Пример #7
0
 internal void Subtract( Leaf subtrahend ) {
     this.rangeLit.list = this.rangeLit.list.SUB( subtrahend.rangeLit.list );
 }
Пример #8
0
 internal void Merge( Leaf addend ) {
     foreach (CharRange rng in addend.rangeLit.list.Ranges)
         this.rangeLit.list.Add( rng );
 }