Esempio n. 1
0
        /**
         * Returns the last set of the expression
         *
         * (the last-charater-projection of the language)
         */
        private static IntCharSet last(RegExp re)
        {
            RegExp2 r;

            switch (re.type)
            {
            case sym.BAR:
                r = (RegExp2)re;
                return(last(r.r1).add(last(r.r2)));

            case sym.CONCAT:
                r = (RegExp2)re;
                if (containsEpsilon(r.r2))
                {
                    return(last(r.r1).add(last(r.r2)));
                }
                else
                {
                    return(last(r.r2));
                }

            case sym.STAR:
            case sym.PLUS:
            case sym.QUESTION:
                return(last((RegExp)((RegExp1)re).content));

            case sym.CCLASS:
                return(new IntCharSet((ArrayList)((RegExp1)re).content));

            case sym.CCLASSNOT:
                IntCharSet all = new IntCharSet(new Interval((char)0, maxChar));
                IntCharSet set = new IntCharSet((ArrayList)((RegExp1)re).content);
                all.sub(set);
                return(all);

            case sym.CHAR:
                return(new IntCharSet((char)((RegExp1)re).content));

            case sym.STRING:
                String content = (String)((RegExp1)re).content;
                if (content.Length > 0)
                {
                    return(new IntCharSet(content[content.Length - 1]));
                }
                else
                {
                    return(new IntCharSet());
                }

            case sym.MACROUSE:
                return(last(macros.getDefinition((String)((RegExp1)re).content)));
            }

            throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$
        }
Esempio n. 2
0
        /**
         * Stores a new macro and its definition.
         *
         * @param name         the name of the new macro
         * @param definition   the definition of the new macro
         *
         * @return <code>true</code>, iff the macro name has not been
         *         stored before.
         */
        public bool insert(String name, RegExp definition)
        {
#if DEBUG_TRACE
            log.WriteLine("insert(String name = \"{0}\", RegExp definition = {1})", name, definition);
#endif // DEBUG_TRACE

            if (Options.DEBUG)
            {
                Out.debug("inserting macro " + name + " with definition :" + Out.NL + definition); //$NON-NLS-1$ //$NON-NLS-2$
            }
            used[name] = false;

            bool new_entry = !macros.ContainsKey(name);
            macros[name] = definition;
            return(new_entry);
        }
        public int insert(int line, ArrayList stateList, RegExp regExp, Action action,
                          Boolean isBOL, RegExp lookAhead)
        {
            if (Options.DEBUG)
            {
                Out.debug("Inserting regular expression with statelist :" + Out.NL + stateList); //$NON-NLS-1$
                Out.debug("and action code :" + Out.NL + action.content + Out.NL);               //$NON-NLS-1$
                Out.debug("expression :" + Out.NL + regExp);                                     //$NON-NLS-1$
            }

            states.Add(stateList);
            regExps.Add(regExp);
            actions.Add(action);
            BOL.Add(isBOL);
            look.Add(lookAhead);
            lines.Add(line);

            return(states.Count - 1);
        }
Esempio n. 4
0
        /**
         * Returns true iff the matched language contains epsilon
         */
        private static bool containsEpsilon(RegExp re)
        {
            RegExp2 r;

            switch (re.type)
            {
            case sym.BAR:
                r = (RegExp2)re;
                return(containsEpsilon(r.r1) || containsEpsilon(r.r2));

            case sym.CONCAT:
                r = (RegExp2)re;
                if (containsEpsilon(r.r1))
                {
                    return(containsEpsilon(r.r2));
                }
                else
                {
                    return(false);
                }

            case sym.STAR:
            case sym.QUESTION:
                return(true);

            case sym.PLUS:
                return(containsEpsilon((RegExp)((RegExp1)re).content));

            case sym.CCLASS:
            case sym.CCLASSNOT:
            case sym.CHAR:
                return(false);

            case sym.STRING:
                return(((String)((RegExp1)re).content).Length <= 0);

            case sym.MACROUSE:
                return(containsEpsilon(macros.getDefinition((String)((RegExp1)re).content)));
            }

            throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$
        }
Esempio n. 5
0
        /**
         * Constructs a two state NFA for char class regexps,
         * such that the NFA has
         *
         *   exactly one start state,
         *   exactly one end state,
         *   no transitions leading out of the end state
         *   no transitions leading into the start state
         *
         * Assumes that regExp.isCharClass(macros) == true
         *
         * @param regExp the regular expression to construct the
         *        NFA for
         *
         * @return a pair of integers denoting the index of start
         *         and end state of the NFA.
         */
        private void insertNFA(RegExp regExp, int start, int end)
        {
            switch (regExp.type)
            {
            case sym.BAR:
                RegExp2 r = (RegExp2)regExp;
                insertNFA(r.r1, start, end);
                insertNFA(r.r2, start, end);
                return;

            case sym.CCLASS:
                insertClassNFA((ArrayList)((RegExp1)regExp).content, start, end);
                return;

            case sym.CCLASSNOT:
                insertNotClassNFA((ArrayList)((RegExp1)regExp).content, start, end);
                return;

            case sym.CHAR:
                insertLetterNFA(
                    false, (char)((RegExp1)regExp).content,
                    start, end);
                return;

            case sym.CHAR_I:
                insertLetterNFA(
                    true, (char)((RegExp1)regExp).content,
                    start, end);
                return;

            case sym.MACROUSE:
                insertNFA(macros.getDefinition((String)((RegExp1)regExp).content),
                          start, end);
                return;
            }

            throw new Exception("Unknown expression type " + regExp.type + " in NFA construction");
        }
        public int NFASize(Macros macros)
        {
            int         size = 0;
            IEnumerator e    = regExps.GetEnumerator();

            while (e.MoveNext())
            {
                RegExp r = (RegExp)e.Current;
                if (r != null)
                {
                    size += r.size(macros);
                }
            }
            e = look.GetEnumerator();
            while (e.MoveNext())
            {
                RegExp r = (RegExp)e.Current;
                if (r != null)
                {
                    size += r.size(macros);
                }
            }
            return(size);
        }
Esempio n. 7
0
 public RegExp2(int type, RegExp r1, RegExp r2) : base(type)
 {
     this.r1 = r1;
     this.r2 = r2;
 }
Esempio n. 8
0
        /**
         * Constructs an NFA for regExp such that the NFA has
         *
         *   exactly one start state,
         *   exactly one end state,
         *   no transitions leading out of the end state
         *   no transitions leading into the start state
         *
         * @param regExp the regular expression to construct the
         *        NFA for
         *
         * @return a pair of integers denoting the index of start
         *         and end state of the NFA.
         */
        public IntPair insertNFA(RegExp regExp)
        {
            IntPair nfa1, nfa2;
            int     start, end;
            RegExp2 r;

            if (Options.DEBUG)
            {
                Out.debug("Inserting RegExp : " + regExp);
            }

            if (regExp.isCharClass(macros))
            {
                start = numStates;
                end   = numStates + 1;

                ensureCapacity(end + 1);
                if (end + 1 > numStates)
                {
                    numStates = end + 1;
                }

                insertNFA(regExp, start, end);

                return(new IntPair(start, end));
            }

            switch (regExp.type)
            {
            case sym.BAR:

                r = (RegExp2)regExp;

                nfa1 = insertNFA(r.r1);
                nfa2 = insertNFA(r.r2);

                start = nfa2.end + 1;
                end   = nfa2.end + 2;

                addEpsilonTransition(start, nfa1.start);
                addEpsilonTransition(start, nfa2.start);
                addEpsilonTransition(nfa1.end, end);
                addEpsilonTransition(nfa2.end, end);

                return(new IntPair(start, end));

            case sym.CONCAT:

                r = (RegExp2)regExp;

                nfa1 = insertNFA(r.r1);
                nfa2 = insertNFA(r.r2);

                addEpsilonTransition(nfa1.end, nfa2.start);

                return(new IntPair(nfa1.start, nfa2.end));

            case sym.STAR:
                nfa1 = insertNFA((RegExp)((RegExp1)regExp).content);

                start = nfa1.end + 1;
                end   = nfa1.end + 2;

                addEpsilonTransition(nfa1.end, end);
                addEpsilonTransition(start, nfa1.start);

                addEpsilonTransition(start, end);
                addEpsilonTransition(nfa1.end, nfa1.start);

                return(new IntPair(start, end));

            case sym.PLUS:
                nfa1 = insertNFA((RegExp)((RegExp1)regExp).content);

                start = nfa1.end + 1;
                end   = nfa1.end + 2;

                addEpsilonTransition(nfa1.end, end);
                addEpsilonTransition(start, nfa1.start);

                addEpsilonTransition(nfa1.end, nfa1.start);

                return(new IntPair(start, end));

            case sym.QUESTION:
                nfa1 = insertNFA((RegExp)((RegExp1)regExp).content);

                addEpsilonTransition(nfa1.start, nfa1.end);

                return(new IntPair(nfa1.start, nfa1.end));

            case sym.BANG:
                return(complement(insertNFA((RegExp)((RegExp1)regExp).content)));

            case sym.TILDE:
                nfa1 = insertNFA((RegExp)((RegExp1)regExp).content);

                start = nfa1.end + 1;
                int s1 = start + 1;
                int s2 = s1 + 1;
                end = s2 + 1;

                for (int i = 0; i < numInput; i++)
                {
                    addTransition(s1, i, s1);
                    addTransition(s2, i, s2);
                }

                addEpsilonTransition(start, s1);
                addEpsilonTransition(s1, nfa1.start);
                addEpsilonTransition(nfa1.end, s2);
                addEpsilonTransition(s2, end);

                nfa1 = complement(new IntPair(start, end));
                nfa2 = insertNFA((RegExp)((RegExp1)regExp).content);

                addEpsilonTransition(nfa1.end, nfa2.start);

                return(new IntPair(nfa1.start, nfa2.end));

            case sym.STRING:
                return(insertStringNFA(false, (String)((RegExp1)regExp).content));

            case sym.STRING_I:
                return(insertStringNFA(true, (String)((RegExp1)regExp).content));

            case sym.MACROUSE:
                return(insertNFA(macros.getDefinition((String)((RegExp1)regExp).content)));
            }

            throw new Exception("Unknown expression type " + regExp.type + " in NFA construction");
        }
Esempio n. 9
0
 /**
  * Checks for illegal lookahead expressions.
  *
  * Lookahead in C# Flex only works when the first expression has fixed
  * length or when the intersection of the last set of the first expression
  * and the first set of the second expression is empty.
  *
  * @param r1   first regexp
  * @param r2   second regexp (the lookahead)
  *
  * @return true iff C# Flex can generate code for the lookahead expression
  */
 private static bool checkLookAhead(RegExp r1, RegExp r2)
 {
     return(r2 == null || length(r1) > 0 || !(last(r1).and(first(r2)).containsElements()));
 }
Esempio n. 10
0
        /**
         * Returns length if expression has fixed length, -1 otherwise.
         */
        private static int length(RegExp re)
        {
            RegExp2 r;

            switch (re.type)
            {
            case sym.BAR: {
                r = (RegExp2)re;
                int l1 = length(r.r1);
                if (l1 < 0)
                {
                    return(-1);
                }
                int l2 = length(r.r2);

                if (l1 == l2)
                {
                    return(l1);
                }
                else
                {
                    return(-1);
                }
            }

            case sym.CONCAT: {
                r = (RegExp2)re;
                int l1 = length(r.r1);
                if (l1 < 0)
                {
                    return(-1);
                }
                int l2 = length(r.r2);
                if (l2 < 0)
                {
                    return(-1);
                }
                return(l1 + l2);
            }

            case sym.STAR:
            case sym.PLUS:
            case sym.QUESTION:
                return(-1);

            case sym.CCLASS:
            case sym.CCLASSNOT:
            case sym.CHAR:
                return(1);

            case sym.STRING: {
                String content = (String)((RegExp1)re).content;
                return(content.Length);
            }

            case sym.MACROUSE:
                return(length(macros.getDefinition((String)((RegExp1)re).content)));
            }

            throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$
        }