Exemplo n.º 1
0
            /// <summary>
            /// Replace end states in all transitions starting from base
            /// </summary>
            /// <param name="instat"></param>
            /// <param name="from"></param>
            /// <param name="to"></param>
            private static void replaceStates(RegularState instat, RegularState from, RegularState to)
            {
                Stack <RegularState> states = new Stack <RegularState>();

                states.Push(instat);
                Set <RegularState> visited = new Set <RegularState>();

                visited.Add(to);
                while (states.Count > 0)
                {
                    RegularState state = states.Pop();
                    visited.Add(state);
                    foreach (RegularTransition rt in state.getOutTransitions())
                    {
                        if (rt.getEndState().Equals(from))
                        {
                            rt.setEndState(to);
                            continue;
                        }

                        RegularState st = rt.getEndState();
                        if (!visited.Contains(st))
                        {
                            states.Push(st);
                        }
                    }
                }
            }
Exemplo n.º 2
0
        private int traverseStates(string word)
        {
            Queue <RegularState> queue   = new Queue <RegularState>(states);
            Set <RegularState>   visited = new Set <RegularState>();

            states.Clear();
            while (queue.Count > 0)
            {
                RegularState state = queue.Dequeue();
                hasGreedyEnd = state.isGreedyEnd();
                if (hasGreedyEnd)
                {
                    return(0);
                }
                visited.Add(state);
                foreach (RegularTransition rt in state.getOutTransitions())
                {
                    if (rt.isEmpty())
                    {
                        RegularState rst = rt.getEndState();
                        if (!queue.Contains(rst) && !visited.Contains(rst))
                        {
                            queue.Enqueue(rst);
                        }
                    }
                    else if (rt.match(word))
                    {
                        addState(rt.getEndState());
                    }
                }
            }
            return(states.Count);
        }
Exemplo n.º 3
0
            /// <summary>
            /// Process multiplication (?,*,+) of subexpressions.
            /// </summary>
            /// <param name="expr"></param>
            /// <param name="lexer"></param>
            /// <param name="endState"></param>
            private static void multTransform(RegularState expr, Lexer lexer, RegularState endState)
            {
                Token t = lexer.token();

                if (t.type == Token.STAR)
                {
                    lexer.nextToken();
                    // replace the end states with self
                    replaceStates(expr, endState, expr);
                    // lambda makes it optional
                    new RegularTransition(expr, endState);
                }
                else if (t.type == Token.OPT)
                {
                    lexer.nextToken();
                    // simply add lambda
                    new RegularTransition(expr, endState);
                }
                else if (t.type == Token.PLUS)
                {
                    lexer.nextToken();
                    // replace the end states with this state
                    RegularState newEnd = new RegularState();
                    replaceStates(expr, endState, newEnd);
                    // lambda to begin of expression
                    new RegularTransition(newEnd, expr);
                    // or to end state (had 1 iteration)
                    new RegularTransition(newEnd, endState);
                }
            }
Exemplo n.º 4
0
 /// <summary>
 /// Set a new start state
 /// </summary>
 /// <param name="newStart"></param>
 public void setStartState(RegularState newStart)
 {
     if (startState != null)
     {
         startState.removeOutTransition(this);
     }
     startState = newStart;
     startState.addOutTransition(this);
 }
Exemplo n.º 5
0
            /// <summary>
            ///  Process a word.
            /// </summary>
            /// <param name="lexer"></param>
            /// <param name="endState"></param>
            /// <returns></returns>
            private static RegularState pWord(Lexer lexer, RegularState endState)
            {
                Token t = lexer.token();

                lexer.nextToken();
                RegularState      result     = new RegularState();
                RegularTransition transition = new RegularTransition(result, endState);

                transition.addLabel(t.ToString());
                return(result);
            }
Exemplo n.º 6
0
            /// <summary>
            /// Process a subexpression
            /// </summary>
            /// <param name="lexer"></param>
            /// <param name="endState"></param>
            /// <returns></returns>
            private static RegularState pSubexp(Lexer lexer, RegularState endState)
            {
                lexer.nextToken();
                RegularState result = pAlt(lexer, endState);

                if (lexer.token().type != Token.PRIGHT)
                {
                    throw new PatternParseException(String.Format("Missing right paranthesis at #%d", lexer.charPos()));
                }
                lexer.nextToken();

                multTransform(result, lexer, endState);
                return(result);
            }
Exemplo n.º 7
0
        private void addState(RegularState state)
        {
            if (states.Contains(state))
            {
                return;
            }
            states.Add(state);

            // add lambda transitions
            foreach (RegularTransition rt in state.getOutTransitions())
            {
                if (rt.isEmpty())
                {
                    addState(rt.getEndState());
                }
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// Resolve greedy ends.
        /// </summary>
        /// <param name="visited"></param>
        /// <param name="endState"></param>
        public void resolveGreedyEnd(Set <RegularState> visited, RegularState endState)
        {
            bool hasSelfRef = false;
            bool hasEndRef  = false;

            visited.Add(this);
            foreach (RegularTransition transition in outTransitions)
            {
                if (transition.isWildcard() && transition.getEndState() == this)
                {
                    hasSelfRef = true;
                }
                else if (transition.isEmpty() && transition.getEndState() == endState)
                {
                    // TODO: doesn't check if end state is reachable through lambda
                    // transitions
                    hasEndRef = true;
                }

                if (visited.Contains(transition.getEndState()))
                {
                    continue;
                }
                transition.getEndState().resolveGreedyEnd(visited, endState);

                if (greedyEnd)
                {
                    continue;
                }
                if (transition.isEmpty())
                {
                    if (transition.getEndState().isGreedyEnd())
                    {
                        // a lambda transition to a greedyEnd state makes this state
                        // also a greedyEnd state
                        greedyEnd = true;
                    }
                }
            }
            if (!greedyEnd)
            {
                greedyEnd = hasSelfRef && hasEndRef;
            }
        }
Exemplo n.º 9
0
            /// <summary>
            /// Parse the input string and return a regular automaton
            /// </summary>
            /// <param name="pattern"></param>
            /// <returns></returns>
            public static RegularAutomaton parse(string pattern)
            {
                if (pattern == null)
                {
                    throw new PatternParseException("Pattern can not be null");
                }

                if (!pattern.StartsWith("^"))
                {
                    pattern = ".*" + pattern;
                }
                else
                {
                    pattern = pattern.Substring(1);
                }
                if (!pattern.EndsWith("$"))
                {
                    pattern = pattern + ".*";
                }
                else
                {
                    if (pattern.Length > 0)
                    {
                        pattern = pattern.Substring(0, pattern.Length - 1);
                    }
                }

                Lexer        lexer = new Lexer(pattern);
                RegularState end   = new FinalRegularState();
                RegularState start = pAlt(lexer, end);

                start.resolveGreedyEnd(new Set <RegularState>(), end);
                RegularAutomaton auto = new RegularAutomaton();

                auto.setStartState(start);
                auto.setEndState(end);
                if (lexer.token().type != Token.EOF)
                {
                    throw new PatternParseException(String.Format("Garbage token '%s' at #%d", lexer.token().text, lexer
                                                                  .charPos()));
                }
                return(auto);
            }
Exemplo n.º 10
0
            /// <summary>
            /// Return true when the provided state has a path to itself
            /// </summary>
            /// <param name="self"></param>
            /// <returns></returns>
            private static bool hasSelfReference(RegularState self)
            {
                Stack <RegularState> states = new Stack <RegularState>();

                states.Push(self);
                Set <RegularState> visited = new Set <RegularState>();

                while (states.Count > 0)
                {
                    RegularState state = states.Pop();
                    visited.Add(state);
                    foreach (RegularTransition rt in state.getOutTransitions())
                    {
                        if (rt.getEndState().Equals(self))
                        {
                            return(true);
                        }
                    }
                }
                return(false);
            }
Exemplo n.º 11
0
 /// <summary>
 /// set the new end state
 /// </summary>
 /// <param name="newEndState"></param>
 public void setEndState(RegularState newEndState)
 {
     endState = newEndState;
 }
Exemplo n.º 12
0
 /// <summary>
 /// Set the new start state
 /// </summary>
 /// <param name="newStartState"></param>
 public void setStartState(RegularState newStartState)
 {
     startState = newStartState;
 }
Exemplo n.º 13
0
 /// <summary>
 /// Creates an automaton with a given start and end state
 /// </summary>
 /// <param name="inStart"></param>
 /// <param name="inEnd"></param>
 public RegularAutomaton(RegularState inStart, RegularState inEnd)
 {
     startState = inStart;
     endState   = inEnd;
 }
Exemplo n.º 14
0
 /// <summary>
 /// Create an automaton with a given start state and a newly created end state
 /// </summary>
 /// <param name="inStart"></param>
 public RegularAutomaton(RegularState inStart)
 {
     startState = inStart;
     endState   = new RegularState();
 }
Exemplo n.º 15
0
            /// <summary>
            /// Process alternatives. This will optimize the state machine where
            /// possible. Edges with identical destinations will be merged, but only
            /// when they are not a lambda transition or contain a self reference.
            /// </summary>
            /// <param name="lexer"></param>
            /// <param name="endState"></param>
            /// <returns></returns>
            private static RegularState pAlt(Lexer lexer, RegularState endState)
            {
                List <RegularState> alts = new List <RegularState>();

                alts.Add(pSeq(lexer, endState));

                while (lexer.token().type == Token.OR)
                {
                    lexer.nextToken();
                    alts.Add(pSeq(lexer, endState));
                }

                if (alts.Count == 1)
                {
                    return(alts[0]);
                }
                // combine transitions with identical destinations and negation

                RegularState result = new RegularState();
                Dictionary <RegularState, List <RegularTransition> > destMap = new Dictionary <RegularState, List <RegularTransition> >();

                foreach (RegularState state in alts)
                {
                    if (hasSelfReference(state))
                    {
                        // add lambda to self referencing states
                        new RegularTransition(result, state);
                        continue;
                    }
                    foreach (RegularTransition rt in state.getOutTransitions())
                    {
                        List <RegularTransition> dst;
                        if (!destMap.TryGetValue(rt.getEndState(), out dst))
                        {
                            dst = new List <RegularTransition>();
                            destMap.Add(rt.getEndState(), dst);
                        }
                        dst.Add(rt);
                    }
                }
                foreach (RegularState key in destMap.Keys)
                {
                    List <RegularTransition> value  = destMap[key];
                    RegularTransition        regrt  = null;
                    RegularTransition        neqrt  = null;
                    RegularTransition        lambda = null;
                    foreach (RegularTransition rt in value)
                    {
                        if (rt.isEmpty())
                        {
                            // don't combine lambda transitions with others
                            if (lambda == null)
                            {
                                lambda = new RegularTransition(result, key);
                            }
                        }
                        else if (rt.isNegation())
                        {
                            if (neqrt == null)
                            {
                                neqrt = new RegularTransition(result, key);
                                neqrt.setNegation(true);
                            }
                            neqrt.addLabels(rt.getLabels());
                        }
                        else
                        {
                            if (regrt == null)
                            {
                                regrt = new RegularTransition(result, key);
                            }
                            regrt.addLabels(rt.getLabels());
                        }
                    }
                }
                return(result);
            }
Exemplo n.º 16
0
            /// <summary>
            /// Process sequences of words and subexpressions
            /// </summary>
            /// <param name="lexer"></param>
            /// <param name="endState"></param>
            /// <returns></returns>
            private static RegularState pSeq(Lexer lexer, RegularState endState)
            {
                RegularState result = null;
                RegularState lhs    = null;

                while (lexer.token().type == Token.WORD || lexer.token().type == Token.NOT ||
                       lexer.token().type == Token.PLEFT || lexer.token().type == Token.DOT)
                {
                    RegularState rhs;
                    if (lexer.token().type == Token.WORD)
                    {
                        rhs = pWord(lexer, endState);
                    }
                    else if (lexer.token().type == Token.DOT)
                    {
                        lexer.nextToken();
                        rhs = new RegularState();
                        RegularTransition transition = new RegularTransition(rhs, endState);
                        transition.addLabel(RegularTransition.WILDCARD);
                        multTransform(rhs, lexer, endState);
                    }
                    else if (lexer.token().type == Token.NOT)
                    {
                        // ![word1,word2,word3,word4,...]
                        rhs = new RegularState();
                        RegularTransition transition = new RegularTransition(rhs, endState);
                        transition.setNegation(true);
                        lexer.nextToken();
                        while (lexer.token().type == Token.WORD)
                        {
                            transition.addLabel(lexer.token().ToString());
                            lexer.nextToken();
                            if (lexer.token().type == Token.COMMA)
                            {
                                lexer.nextToken();
                            }
                        }
                        if (lexer.token().type != Token.SRIGHT)
                        {
                            throw new PatternParseException(String.Format("Missing right square bracket at #%d", lexer
                                                                          .charPos()));
                        }
                        lexer.nextToken();
                        multTransform(rhs, lexer, endState);
                    }
                    else
                    {
                        rhs = pSubexp(lexer, endState);
                    }
                    if (result == null)
                    {
                        // first item in the sequence is the return value;
                        result = rhs;
                    }
                    if (lhs != null)
                    {
                        // link end state of previous item in the list to current
                        // item
                        replaceStates(lhs, endState, rhs);
                    }
                    lhs = rhs;
                }
                if (lexer.token().type == Token.EOF && result == null)
                {
                    // empty regex
                    return(endState);
                }
                if (result == null)
                {
                    throw new PatternParseException(String.Format("Unexpected token '%s' at #%d", lexer.token().text, lexer
                                                                  .charPos()));
                }
                return(result);
            }
Exemplo n.º 17
0
 /// <summary>
 /// Create a new transition with a given start and end
 /// </summary>
 /// <param name="mystart"></param>
 /// <param name="myend"></param>
 public RegularTransition(RegularState mystart, RegularState myend)
 {
     setStartState(mystart);
     setEndState(myend);
     labels = new Set <string>();
 }