Exemple #1
0
        /* file format compatible with https://github.com/Miskaaa/symboliclib */
        private static void PrintAutomatonTimbuk(
            StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames,
            PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet,
            List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            // all actual symbols have arity 1 ("start" symbol x has arity 0)
            var labelList = new List <string>(alphabet.Select(symbol => string.Format("{0}:1", symbol)));

            Func <ILabel <SYMBOL>, string> printPredicate = ilabel => {
                var predicate = (Predicate <SYMBOL>)ilabel;
                if (predicate == null)
                {
                    return("");
                }
                if (predicate.Type == PredicateType.In)
                {
                    return(string.Format("in{0}{1}{2}", '{', string.Join(",", predicate.Set), '}'));
                }
                else
                {
                    return(string.Format("not_in{0}{1}{2}", '{', string.Join(",", predicate.Set), '}'));
                }
            };
            Func <ILabel <SYMBOL>, string> printLabel = ilabel => {
                var label = (Label <SYMBOL>)ilabel;
                return(label.IsIdentity ?
                       string.Format("@{0}/@{0}", printPredicate(label.Input)) :
                       string.Format("{0}/{1}", printPredicate(label.Input), printPredicate(label.Output)));
            };
            Func <ILabel <SYMBOL>, string> printILabel = (type == AutomatonType.SSA) ?
                                                         printPredicate : printLabel;

            file.WriteLine(string.Format("Ops x:0 {0}", string.Join(" ", labelList)));
            file.WriteLine();
            file.WriteLine(string.Format("Automaton {0} @{1}", name ?? "M", type == AutomatonType.SSA ? "INFA" : "INT"));
            file.WriteLine(string.Format("States {0}", string.Join(" ", states.Select(state => stateNames[state]))));
            file.WriteLine(string.Format("Final States {0}", string.Join(" ", finalStates.Select(state => stateNames[state]))));
            file.WriteLine("Transitions");
            file.WriteLine(string.Format("x() -> {0}", stateNames[initialState]));
            foreach (Move <ILabel <SYMBOL> > move in moves)
            {
                file.WriteLine(string.Format("\"{0}\"({1}) -> {2}",
                                             printILabel(move.Label), stateNames[move.SourceState], stateNames[move.TargetState]
                                             ));
            }
            file.WriteLine();
        }
Exemple #2
0
 public static TimbukParserException NoStartSymbol(AutomatonType type)
 => new TimbukParserException(type, "no start symbol (arity 0) specified");
Exemple #3
0
        /* FSA format spec at http://www.let.rug.nl/~vannoord/Fsa/Manual/node5.html#anc1 */
        private static void PrintAutomatonFSA(
            StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames,
            PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet,
            List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            var transitions = new List <Move <ILabel <SYMBOL> > >(); // non-epsilon moves
            var jumps       = new List <Tuple <int, int> >();        // epsilon moves

            foreach (Move <ILabel <SYMBOL> > move in moves)
            {
                if (move.IsEpsilon)
                {
                    jumps.Add(new Tuple <int, int>(move.SourceState, move.TargetState));
                }
                else
                {
                    transitions.Add(move);
                }
            }

            Func <ILabel <SYMBOL>, string> printPredicate = ilabel => {
                var predicate = (Predicate <SYMBOL>)ilabel;
                if (predicate == null)  // epsilon (only for transducers)
                {
                    return("[]");
                }
                // add quotes if necessary
                Func <SYMBOL, string> sanitizeSymbol = symbol => {
                    string s = symbol.ToString();
                    if (char.IsUpper(s[0]) || s.Any(ch => char.IsWhiteSpace(ch)) || (s[0] == '0' && s.Length > 1))
                    {
                        return("'" + s + "'");
                    }
                    return(s);
                };
                // `in([a])` may just be written as `a`
                if (algebra.InclusiveSet(predicate).Count == 1)
                {
                    return(sanitizeSymbol(algebra.InclusiveSet(predicate).First()));
                }
                string        typeName = (predicate.Type == PredicateType.In) ? "in" : "not_in";
                List <string> symbols  = predicate.Set.Select(sanitizeSymbol).ToList();
                symbols.Sort();
                return(string.Format("{0}([{1}])", typeName, string.Join(",", symbols)));
            };
            Func <ILabel <SYMBOL>, string> printLabel = ilabel => {
                var label = (Label <SYMBOL>)ilabel;
                return(label.IsIdentity ?
                       string.Format("$@({0})/$@({0})", printPredicate(label.Input)) :
                       string.Format("{0}/{1}", printPredicate(label.Input), printPredicate(label.Output)));
            };
            Func <ILabel <SYMBOL>, string> printILabel = (type == AutomatonType.SSA) ?
                                                         printPredicate : printLabel;

            file.WriteLine("%% {0} {1}", (type == AutomatonType.SSA) ? "Recognizer" : "Transducer", name ?? "");
            file.WriteLine("%% Automatically generated by ARMC.");
            file.WriteLine("%% For more info, cf. http://www.let.rug.nl/~vannoord/Fsa/");
            file.WriteLine();
            file.WriteLine("fa(");
            if (type == AutomatonType.SSA)
            {
                file.WriteLine("    r(fsa_preds),");
            }
            else
            {
                file.WriteLine("    t(fsa_preds,fsa_preds),");
            }
            file.WriteLine("    % number of states");
            file.WriteLine("    {0},", states.Count);
            file.WriteLine("    % start states");
            file.WriteLine("    [ {0} ],", initialState);
            file.WriteLine("    % final states");
            // put up to 10 final states on one line
            int i = 0;

            FSAWriteList(
                file,
                finalStates.GroupBy(state => i++ / 10).Select(group => group.ToList()),
                stateList => string.Join(",", stateList)
                );
            file.WriteLine("    ],");
            file.WriteLine("    % moves");
            FSAWriteList(file, transitions, (move => string.Format("trans({0},{1},{2})", move.SourceState, printILabel(move.Label), move.TargetState)));
            file.WriteLine("    % jumps");
            FSAWriteList(file, jumps, (jump => string.Format("jump({0},{1})", jump.Item1, jump.Item2)), false);
            file.WriteLine(").");
            file.WriteLine();
        }
Exemple #4
0
        /* format spec at http://www.graphviz.org/doc/info/lang.html */
        private static void PrintAutomatonDot(
            StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames,
            PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet,
            List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            // group labels for moves with same source and target state (reduce edges in graph)
            var transGroups = moves.GroupBy(
                move => new Tuple <int, int>(move.SourceState, move.TargetState),
                move => move.Label,
                (key, labels) => new Tuple <int, List <ILabel <SYMBOL> >, int>(key.Item1, labels.ToList(), key.Item2)
                );

            // create subscripts and superscripts based on LaTeX-like markup in state names
            // e.g. q_0 becomes <q<sub>0</sub>>, q_{42} becomes <q<sub>42</sub>>, q_M^2 becomes <q<sub>M</sub><sup>2</sup>>
            var regexSubSingle             = new Regex(@"_([^{])");
            var regexSubGroup              = new Regex(@"_{([^}]*)}");
            var regexSuperSingle           = new Regex(@"\^([^{])");
            var regexSuperGroup            = new Regex(@"\^{([^}]*)}");
            Func <int, string> stateToHTML = delegate(int state) {
                string stateName = stateNames[state];
                stateName = regexSubSingle.Replace(stateName, "<sub>$1</sub>");
                stateName = regexSubGroup.Replace(stateName, "<sub>$1</sub>");
                stateName = regexSuperSingle.Replace(stateName, "<sup>$1</sup>");
                stateName = regexSuperGroup.Replace(stateName, "<sup>$1</sup>");
                return(stateName);
            };

            Func <SYMBOL, string> printSymbol = symbol => symbol.ToString()
                                                .Replace("<", "&lt;").Replace(">", "&gt;")
                                                .Replace("[", "&#91;").Replace("]", "&#93;");

            Func <ILabel <SYMBOL>, string> printPredicate = ilabel => {
                var predicate = (Predicate <SYMBOL>)ilabel;
                if (predicate == null)
                {
                    return("&epsilon;");
                }
                if (algebra.InclusiveSet(predicate).Count == 1)
                {
                    return(string.Format("<i>{0}</i>", printSymbol(algebra.InclusiveSet(predicate).First())));
                }
                string typeSymbol = (predicate.Type == PredicateType.In) ? "&isin;" : "&notin;";  // unicode math set symbols
                // sort symbols and put in italics
                var symbols = new List <SYMBOL>(predicate.Set);
                symbols.Sort();
                var symbolsFormatted         = new List <string>(symbols.Select(symbol => string.Format("<i>{0}</i>", symbol)));
                Func <int, int, int> ceilDiv = (x, y) => (x - 1) / y + 1;
                int groupSize = 5;
                if (symbols.Count > 1)
                {
                    while (ceilDiv(symbols.Count, groupSize - 1) == ceilDiv(symbols.Count, groupSize))
                    {
                        groupSize--;
                    }
                }
                int    i      = 0;
                string joined = string.Join(",<br/>", symbols
                                            .Select(printSymbol)
                                            .Select(symbol => string.Format("<i>{0}</i>", symbol))
                                            .GroupBy(symbol => i++ / groupSize)
                                            .Select(group => string.Join(", ", group.ToList()))
                                            );
                return(typeSymbol + "{" + joined + "}");
            };
            Func <ILabel <SYMBOL>, string> printLabel = ilabel => {
                var label = (Label <SYMBOL>)ilabel;
                return(label.IsIdentity ?
                       string.Format("{0}<b>/</b>&#x1d704;", printPredicate(label.Input)) :
                       string.Format("{0}<b>/</b>{1}", printPredicate(label.Input), printPredicate(label.Output)));
            };
            Func <ILabel <SYMBOL>, string> printILabel = (type == AutomatonType.SSA) ?
                                                         printPredicate : printLabel;

            file.WriteLine("digraph {");
            file.WriteLine("    rankdir=LR;"); // left-to-right direction more readable for automata
            if (name != null)                  // print automaton name
            {
                file.WriteLine("    label=<{0}:>;", name);
                file.WriteLine("    labelloc=top;");
                file.WriteLine("    labeljust=left;");
            }
            // final states have double circle
            file.Write("    node [shape=doublecircle];");
            foreach (int finalState in finalStates)
            {
                file.Write(" {0};", finalState);
            }
            file.WriteLine();
            file.WriteLine("    node [shape=circle];");
            // invisible zero-width dummy node as source of arrow to initial state
            file.WriteLine("    dummy_node [style=invis,width=0,fixedsize=true,label=\"\"];");
            file.WriteLine("    dummy_node -> {0} [len=0.2,penwidth=2.0];", initialState);
            foreach (Tuple <int, List <ILabel <SYMBOL> >, int> trans in transGroups)
            {
                int sourceState = trans.Item1;
                List <ILabel <SYMBOL> > labels = trans.Item2;
                int targetState = trans.Item3;
                file.WriteLine("    {0} -> {1} [label=<{2}>];",
                               sourceState, targetState, string.Join(",<br/>", labels.Select(printILabel))
                               );
            }
            foreach (int state in states)
            {
                file.WriteLine("    {0} [label=<{1}>];", state, stateToHTML(state));
            }
            file.WriteLine("}");
            file.WriteLine();
        }
Exemple #5
0
        /// <summary>
        /// Extracts automaton/trasducer constructor parameters by parsing text file.
        /// </summary>
        /// <remarks>
        /// Determines file format from file extension or file contents.
        /// </remarks>
        /// <param name="fileName">File name.</param>
        /// <param name="type">Expected type (automaton or transducer).</param>
        /// <param name="initialState">Initial state.</param>
        /// <param name="finalStates">Final states.</param>
        /// <param name="moves">Moves.</param>
        /// <param name="alphabet">Alphabet (may be <c>null</c>).</param>
        /// <param name="name">Name (may be <c>null</c>).</param>
        /// <param name="stateNames">State names (may be <c>null</c>).</param>
        /// <param name="stateSymbolsFileName">Path to state symbols file (optional and only used with FSM format).</param>
        /// <param name="inputSymbolsFileName">Path to input arc symbols file (optional and only used with FSM format).</param>
        /// <param name="outputSymbolsFileName">Path to output arc symbols file (optional and only used with a transducer in FSM format).</param>
        public static void ParseAutomaton(
            string fileName, AutomatonType type,
            out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves,
            out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            string      input      = File.ReadAllText(fileName);
            ParseAction parse      = null;
            bool        deriveName = false;       // derive automaton name from file name?
            string      extension  = Path.GetExtension(fileName);

            // try guessing format from file extension
            switch (extension)
            {
            case "timbuk":
            case "tmb":
                parse = ParseAutomatonTimbuk;
                break;

            case "fsa":
            case "pl":
                parse      = ParseAutomatonFSA;
                deriveName = true;
                break;

            case "fsm":
                parse      = ParseAutomatonFSM;
                deriveName = true;
                break;
            }

            if (parse == null)                // try guessing format from file contents
            {
                var regexTimbuk = new Regex(@"^Ops.*Automaton.*States.*Final States.*Transitions", RegexOptions.Singleline);
                var regexFSA    = new Regex(@"fa\(.*\)", RegexOptions.Singleline);
                var regexFSM    = new Regex(@"^[\d\s.]*$"); // only if no symbol files specified, otherwise assume FSM

                if (regexTimbuk.IsMatch(input))
                {
                    parse = ParseAutomatonTimbuk;
                }
                else if (regexFSA.IsMatch(input))
                {
                    parse      = ParseAutomatonFSA;
                    deriveName = true;
                }
                else if ((stateSymbolsFileName ?? inputSymbolsFileName ?? outputSymbolsFileName) != null ||
                         regexFSM.IsMatch(input))
                {
                    parse      = ParseAutomatonFSM;
                    deriveName = true;
                }
            }

            if (parse == null)
            {
                throw ParserException.UnknownFormat(type);
            }

            parse(
                input, type,
                out initialState, out finalStates, out moves, out alphabet, out name, out stateNames,
                stateSymbolsFileName, inputSymbolsFileName, outputSymbolsFileName
                );

            if (deriveName)
            {
                name = Path.GetFileNameWithoutExtension(fileName);
            }
        }
Exemple #6
0
        /* file format compatible with https://github.com/Miskaaa/symboliclib */
        public static void ParseAutomatonTimbuk(
            string input, AutomatonType type,
            out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves,
            out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            string namePattern           = string.Format(@"(.*?)(?:  @{0})?", type == AutomatonType.SSA ? "INFA" : "INT");
            string labelDeclPattern      = @"[^:]+:\d+";
            string labelListPattern      = string.Format(@"(?:({0})  )*", labelDeclPattern);
            string stateListPattern      = @"(?:(\w+)  )*";
            string transitionPattern     = @".*? \( (?:\w+(?: , \w+)*)? \) -> \w+";
            string transitionListPattern = string.Format(@"(?:({0})  )*", transitionPattern);
            string automatonPattern      = string.Format(
                @"Automaton  {0}  States  {1}Final States  {1}Transitions  {2}",
                namePattern, stateListPattern, transitionListPattern
                );
            string filePattern = string.Format(@"^Ops  {0}  {1} $", labelListPattern, automatonPattern);

            var   fileRegex = new Regex(_(filePattern));
            Match match     = fileRegex.Match(input);

            if (!match.Success)
            {
                throw TimbukParserException.InvalidFormat(type);
            }

            alphabet = new Set <SYMBOL>();
            SYMBOL startSymbol      = default(SYMBOL); // meaningless assignment
            bool   foundStartSymbol = false;

            foreach (Capture capture in match.Groups[1].Captures)
            {
                string[] parts  = capture.Value.Split(':');
                SYMBOL   symbol = StringToSymbol(parts[0]);
                int      arity  = int.Parse(parts[1]);

                switch (arity)
                {
                case 0:
                    if (foundStartSymbol)
                    {
                        throw TimbukParserException.DuplicateLabelDecl(type);
                    }
                    startSymbol      = symbol;
                    foundStartSymbol = true;
                    break;

                case 1:
                    if (alphabet.Contains(symbol))
                    {
                        throw TimbukParserException.DuplicateLabelDecl(type);
                    }
                    alphabet.Add(symbol);
                    break;

                default:
                    throw TimbukParserException.TreeAutomataNotSupported(type);
                }
            }
            if (!foundStartSymbol)
            {
                throw TimbukParserException.NoStartSymbol(type);
            }

            name = match.Groups[2].Value;

            var stateDict = new Dictionary <string, int>(match.Groups[3].Captures.Count);
            int id        = 0;
            var states    = new Set <int>();

            finalStates = new Set <int>();
            moves       = new Set <Move <ILabel <SYMBOL> > >();

            foreach (Capture capture in match.Groups[3].Captures)
            {
                string stateName = capture.Value;
                if (stateDict.ContainsKey(stateName))
                {
                    throw TimbukParserException.DuplicateState(type);
                }
                int state = id++;
                stateDict[stateName] = state;
                states.Add(state);
            }

            foreach (Capture capture in match.Groups[4].Captures)
            {
                string stateName = capture.Value;
                int    state;
                if (!stateDict.TryGetValue(stateName, out state))
                {
                    throw TimbukParserException.UnknownFinalState(type);
                }
                if (finalStates.Contains(state))
                {
                    throw TimbukParserException.DuplicateFinalState(type);
                }
                finalStates.Add(state);
            }

            Func <string, ILabel <SYMBOL> > parsePredicate = predString => {
                if (predString == "")
                {
                    return(null);
                }

                var   regex     = new Regex(@"^""?(in|not_in)\{(.*)\}""?$");
                Match predMatch = regex.Match(predString);

                if (predMatch.Success)
                {
                    PredicateType predType = predMatch.Groups[1].Value == "in" ?
                                             PredicateType.In : PredicateType.NotIn;
                    var symbols = predMatch.Groups[2].Value == "" ?
                                  new Set <SYMBOL>() :
                                  new Set <SYMBOL>(predMatch.Groups[2].Value.Split(',').Select(s => StringToSymbol(s)));
                    return(new Predicate <SYMBOL>(predType, symbols));
                }

                return(new Predicate <SYMBOL>(StringToSymbol(predString)));
            };
            Func <string, ILabel <SYMBOL> > parseLabel = labelString => {
                string[] parts = labelString.Split('/');
                switch (parts.Length)
                {
                case 1:
                    return(new Label <SYMBOL>((Predicate <SYMBOL>)parsePredicate(parts[0])));

                case 2:
                    if (parts.All(s => s.StartsWith("@")))
                    {
                        var inputPred  = (Predicate <SYMBOL>)parsePredicate(parts[0].Substring(1));
                        var outputPred = (Predicate <SYMBOL>)parsePredicate(parts[1].Substring(1));
                        if (inputPred == null ? outputPred != null : !inputPred.Equals(outputPred))
                        {
                            throw TimbukParserException.InvalidIdentityLabel(type);
                        }
                        return(new Label <SYMBOL>(inputPred));
                    }
                    return(new Label <SYMBOL>(
                               (Predicate <SYMBOL>)parsePredicate(parts[0]),
                               (Predicate <SYMBOL>)parsePredicate(parts[1])
                               ));

                default:
                    throw TimbukParserException.InvalidTransducerLabel(type);
                }
            };

            Func <string, ILabel <SYMBOL> > parseILabel = (type == AutomatonType.SSA) ?
                                                          parsePredicate : parseLabel;

            transitionPattern = @"(.*) \( (\w*) \) -> (\w+)";
            var  transitionRegex   = new Regex(_(transitionPattern));
            bool foundInitialState = false;

            initialState = 0;              // meaningless, but prevents compilation error

            foreach (Capture capture in match.Groups[5].Captures)
            {
                Match transMatch = transitionRegex.Match(capture.Value);

                if (!transMatch.Success)
                {
                    throw TimbukParserException.UnknownSymbol(type);
                }

                if (transMatch.Groups[2].Value == "")
                {
                    if (!StringToSymbol(transMatch.Groups[1].Value).Equals(startSymbol))
                    {
                        throw TimbukParserException.UnknownSymbol(type);
                    }

                    if (!stateDict.TryGetValue(transMatch.Groups[3].Value, out initialState))
                    {
                        throw TimbukParserException.UnknownState(type);
                    }

                    foundInitialState = true;
                    continue;
                }

                ILabel <SYMBOL> label = parseILabel(transMatch.Groups[1].Value);
                int             sourceState, targetState;
                if (!stateDict.TryGetValue(transMatch.Groups[2].Value, out sourceState) ||
                    !stateDict.TryGetValue(transMatch.Groups[3].Value, out targetState))
                {
                    throw TimbukParserException.UnknownState(type);
                }

                if (label.Symbols > alphabet)
                {
                    throw TimbukParserException.UnknownSymbol(type);
                }

                moves.Add(new Move <ILabel <SYMBOL> >(sourceState, targetState, label));
            }

            if (!foundInitialState)
            {
                throw TimbukParserException.NoInitialState(type);
            }

            stateNames = new Dictionary <int, string>(stateDict.Count);
            foreach (KeyValuePair <string, int> item in stateDict)
            {
                stateNames[item.Value] = item.Key;
            }
        }
Exemple #7
0
 public static AutomatonException UnknownSymbolsInTransitions(AutomatonType type)
 => new AutomatonException(type, "transitions contain symbols not in alphabet");
Exemple #8
0
 public static FSMParserException NoTransitions(AutomatonType type)
 => new FSMParserException(type, "no transitions - cannot determine initial state");
Exemple #9
0
 public static FSAParserException DuplicateStartState(AutomatonType type)
 => new FSAParserException(type, "duplicate start state");
Exemple #10
0
 public static FSAParserException UnsupportedPredicateModule(AutomatonType type)
 => new FSAParserException(type, "predicate module not supported (must be fsa_preds or fsa_frozen)");
Exemple #11
0
 public static TimbukParserException InvalidTransducerLabel(AutomatonType type)
 => new TimbukParserException(type, "invalid transducer label format");
Exemple #12
0
 public static TimbukParserException InvalidIdentityLabel(AutomatonType type)
 => new TimbukParserException(type, "mismatched predicates in identity label");
Exemple #13
0
 public static TimbukParserException UnknownState(AutomatonType type)
 => new TimbukParserException(type, "unknown state in transition");
Exemple #14
0
 public static TimbukParserException UnknownFinalState(AutomatonType type)
 => new TimbukParserException(type, "final state not in states");
Exemple #15
0
 public static TimbukParserException NoInitialState(AutomatonType type)
 => new TimbukParserException(type, "no initial state specified");
Exemple #16
0
 public static FSMParserException InvalidArcSymbolsFile(AutomatonType type)
 => new FSMParserException(type, "invalid arc symbols file format");
Exemple #17
0
 public static FSMParserException UnknownArcSymbol(AutomatonType type)
 => new FSMParserException(type, "arc symbol not declared in symbols file");
Exemple #18
0
 public static FSAParserException DuplicateFinalState(AutomatonType type)
 => new FSAParserException(type, "duplicate final state");
Exemple #19
0
 public static AutomatonException InvalidStateNames(AutomatonType type)
 => new AutomatonException(type, "invalid state names");
Exemple #20
0
 public static FSAParserException NoStartState(AutomatonType type)
 => new FSAParserException(type, "no start state");
Exemple #21
0
 public ParserException(AutomatonType type, string message)
     : base(string.Format("{0} parser: {1}", type.ToString(), message))
 {
 }
Exemple #22
0
 public static FSAParserException StateCountMismatch(AutomatonType type)
 => new FSAParserException(type, "contradictory number of states");
Exemple #23
0
        /* FSA format spec at http://www.let.rug.nl/~vannoord/Fsa/Manual/node5.html#anc1 */
        public static void ParseAutomatonFSA(
            string input, AutomatonType type,
            out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves,
            out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            var regexInlineComments = new Regex(@"%.*(?=\n)");
            var regexBlockComments  = new Regex(@"/\*.*?\*/", RegexOptions.Singleline);

            input = regexInlineComments.Replace(input, "");
            input = regexBlockComments.Replace(input, "");

            string prologListFormatString = @"\[(?: {0} ,? )*\]";

            string symbolsPattern = (type == AutomatonType.SSA) ?
                                    @"r \( (\w+) \)" : @"t \( (\w+ , \w+) \)";
            string statesPattern    = @"(\d+)";
            string startsPattern    = string.Format(prologListFormatString, @"(\d+)");
            string finalsPattern    = string.Format(prologListFormatString, @"(\d+)");
            string atomPattern      = @"(?:'[^']*'|[\w<>|[\]{}!?+*/#$%@=-]+)";
            string predicatePattern = string.Format(@"(?:{0}|(?:not_)?in \( {1} \))",
                                                    atomPattern, string.Format(prologListFormatString, atomPattern));
            string transsPattern = string.Format(prologListFormatString,
                                                 string.Format(@"trans \( (\d+) , ({0}) , (\d+) \)",
                                                               (type == AutomatonType.SSA) ? predicatePattern :
                                                               string.Format(@"{0} \/ {0}|(?:\$@|'\$@') \( {0} \) \/ (?:\$@|'\$@') \( {0} \)",
                                                                             string.Format(@"(?:{0}|\[ \])", predicatePattern))));
            string jumpsPattern = string.Format(prologListFormatString, @"jump \( (\d+) , (\d+) \)");
            string filePattern  = string.Format(@"fa \( {0} , {1} , {2} , {3} , {4} , {5} \) \.",
                                                symbolsPattern, statesPattern, startsPattern, finalsPattern, transsPattern, jumpsPattern);

            var   fileRegex = new Regex(_(filePattern));
            Match match     = fileRegex.Match(input);

            if (!match.Success)
            {
                throw FSAParserException.InvalidFormat(type);
            }

            Func <string, bool> isFsaFrozen = module => {
                switch (module)
                {
                case "fsa_preds":
                    return(false);

                case "fsa_frozen":
                    return(true);

                default:
                    throw FSAParserException.UnsupportedPredicateModule(type);
                }
            };
            bool inputSymbolAsIs;
            bool outputSymbolAsIs = true;  // meaningless assignment

            if (type == AutomatonType.SSA)
            {
                inputSymbolAsIs = isFsaFrozen(match.Groups[1].Value);
            }
            else
            {
                string[] parts = Regex.Split(match.Groups[1].Value, _(@" , "));
                inputSymbolAsIs  = isFsaFrozen(parts[0]);
                outputSymbolAsIs = isFsaFrozen(parts[1]);
            }

            int stateCount = int.Parse(match.Groups[2].Value);

            var stateDict = new Dictionary <int, int>(stateCount);
            int id        = 0;

            var startStates = new Set <int>();

            foreach (Capture capture in match.Groups[3].Captures)
            {
                int stateNum = int.Parse(capture.Value);
                int state;
                if (!stateDict.TryGetValue(stateNum, out state))
                {
                    stateDict[stateNum] = state = id++;
                }
                if (startStates.Contains(state))
                {
                    throw FSAParserException.DuplicateStartState(type);
                }
                startStates.Add(state);
            }

            finalStates = new Set <int>();
            foreach (Capture capture in match.Groups[4].Captures)
            {
                int stateNum = int.Parse(capture.Value);
                int state;
                if (!stateDict.TryGetValue(stateNum, out state))
                {
                    stateDict[stateNum] = state = id++;
                }
                if (finalStates.Contains(state))
                {
                    throw FSAParserException.DuplicateFinalState(type);
                }
                finalStates.Add(state);
            }

            var asIsPredicateRegex    = new Regex(atomPattern);
            var inNotInPredicateRegex = new Regex(_(string.Format(
                                                        @"(in|not_in) \( {0} \)", string.Format(prologListFormatString, string.Format(@"({0})", atomPattern))
                                                        )));
            Func <string, string> stripQuotes = atom =>
                                                atom[0] == '\'' && atom[atom.Length - 1] == '\'' ? atom.Substring(1, atom.Length - 2) : atom;
            Func <string, bool, ILabel <SYMBOL> > parsePredicate = (predString, asIs) => {
                if (asIs)
                {
                    if (!asIsPredicateRegex.IsMatch(predString))
                    {
                        throw FSAParserException.InvalidPredicate(type);
                    }
                    return(new Predicate <SYMBOL>(StringToSymbol(stripQuotes(predString))));
                }
                else
                {
                    Match predMatch = inNotInPredicateRegex.Match(predString);
                    if (predMatch.Success)
                    {
                        var symbols = new Set <SYMBOL>();
                        foreach (Capture capture in predMatch.Groups[2].Captures)
                        {
                            symbols.Add(StringToSymbol(stripQuotes(capture.Value)));
                        }
                        return(new Predicate <SYMBOL>(
                                   (predMatch.Groups[1].Value == "in") ? PredicateType.In : PredicateType.NotIn,
                                   symbols
                                   ));
                    }
                    else
                    {
                        if (!asIsPredicateRegex.IsMatch(predString))
                        {
                            throw FSAParserException.InvalidPredicate(type);
                        }
                        return(new Predicate <SYMBOL>(StringToSymbol(stripQuotes(predString))));
                    }
                }
            };
            Func <string, ILabel <SYMBOL> > parseInputPredicate = (predString => parsePredicate(predString, inputSymbolAsIs));
            Func <string, ILabel <SYMBOL> > parseLabel          = labelString => {
                string[] parts          = labelString.Split('/');
                var      predicates     = new Predicate <SYMBOL> [2];
                bool     isIdentity     = false;
                var      emptyListRegex = new Regex(_(@"^\[ \]$"));
                var      identityRegex  = new Regex(_(string.Format(@"^\$@ \( ({0}) \)$", predicatePattern)));
                for (int i = 0; i < 2; i++)
                {
                    if (emptyListRegex.IsMatch(parts[i]))
                    {
                        predicates[i] = null;
                    }
                    else
                    {
                        Match  identityMatch = identityRegex.Match(parts[i]);
                        string predString;
                        if (identityMatch.Success)
                        {
                            isIdentity = true;
                            predString = identityMatch.Groups[1].Value;
                        }
                        else
                        {
                            predString = parts[i];
                        }
                        predicates[i] = (Predicate <SYMBOL>)parsePredicate(predString, (i == 0) ? inputSymbolAsIs : outputSymbolAsIs);
                    }
                }
                if (isIdentity)
                {
                    if (predicates[0] == null)
                    {
                        if (predicates[1] != null)
                        {
                            throw FSAParserException.InvalidIdentityLabel(type);
                        }
                    }
                    else
                    {
                        if (predicates[1] == null)
                        {
                            throw FSAParserException.InvalidIdentityLabel(type);
                        }
                        if (predicates[0].Type == predicates[1].Type && predicates[0].Set != predicates[1].Set)
                        {
                            throw FSAParserException.InvalidIdentityLabel(type);
                        }
                    }
                    return(new Label <SYMBOL>(predicates[0]));
                }
                return(new Label <SYMBOL>(predicates[0], predicates[1]));
            };
            Func <string, ILabel <SYMBOL> > parseILabel = (type == AutomatonType.SSA) ?
                                                          parseInputPredicate : parseLabel;

            moves = new Set <Move <ILabel <SYMBOL> > >();

            int transitionCount = match.Groups[5].Captures.Count;

            for (int i = 0; i < transitionCount; i++)
            {
                int             sourceStateNum = int.Parse(match.Groups[5].Captures[i].Value);
                ILabel <SYMBOL> label = parseILabel(match.Groups[6].Captures[i].Value);
                int             targetStateNum = int.Parse(match.Groups[7].Captures[i].Value);
                int             sourceState, targetState;
                if (!stateDict.TryGetValue(sourceStateNum, out sourceState))
                {
                    stateDict[sourceStateNum] = sourceState = id++;
                }
                if (!stateDict.TryGetValue(targetStateNum, out targetState))
                {
                    stateDict[targetStateNum] = targetState = id++;
                }
                moves.Add(new Move <ILabel <SYMBOL> >(sourceState, targetState, label));
            }

            int jumpCount = match.Groups[8].Captures.Count;

            for (int i = 0; i < jumpCount; i++)
            {
                int sourceStateNum = int.Parse(match.Groups[8].Captures[i].Value);
                int targetStateNum = int.Parse(match.Groups[9].Captures[i].Value);
                int sourceState, targetState;
                if (!stateDict.TryGetValue(sourceStateNum, out sourceState))
                {
                    stateDict[sourceStateNum] = sourceState = id++;
                }
                if (!stateDict.TryGetValue(targetStateNum, out targetState))
                {
                    stateDict[sourceStateNum] = targetState = id++;
                }
                moves.Add(Move <ILabel <SYMBOL> > .Epsilon(sourceState, targetState));
            }

            if (stateDict.Count > stateCount)
            {
                throw FSAParserException.StateCountMismatch(type);
            }

            initialState = 0;              // meaningless, prevents compiler error
            if (startStates.Count < 1)
            {
                throw FSAParserException.NoStartState(type);
            }
            else if (startStates.Count == 1)
            {
                initialState = startStates.First();
            }
            else
            {
                initialState = id++;
                for (int i = 0; ; i++)
                {
                    if (!stateDict.ContainsKey(i))
                    {
                        stateDict[i] = initialState;
                        break;
                    }
                }
                foreach (int startState in startStates)
                {
                    moves.Add(Move <ILabel <SYMBOL> > .Epsilon(initialState, startState));
                }
            }

            alphabet   = null;
            name       = null;
            stateNames = null;
        }
Exemple #24
0
 public static FSAParserException InvalidPredicate(AutomatonType type)
 => new FSAParserException(type, "predicate invalid for given module");
Exemple #25
0
        /* format spec at http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html */
        public static void ParseAutomatonFSM(
            string input, AutomatonType type,
            out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves,
            out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            string statePattern        = (stateSymbolsFileName == null) ? @"\d+" : @"[^\s]+";
            string inputSymbolPattern  = (inputSymbolsFileName == null) ? @"\d+" : @"[^\s]+";
            string outputSymbolPattern = (outputSymbolsFileName == null) ? @"\d+" : @"[^\s]+";
            var    transitionRegex     = new Regex(_(string.Format(
                                                         (type == AutomatonType.SSA) ?
                                                         @" ({0})  ({0})  ({1})(?:  \d*\.\d*)? " :
                                                         @" ({0})  ({0})  ({1})  ({2})(?:  \d*\.\d*)? ",
                                                         statePattern, inputSymbolPattern, outputSymbolPattern
                                                         )));
            var   finalStateRegex = new Regex(_(string.Format(@" ({0})(?:  \d*\.\d*)? ", statePattern)));
            var   blankLineRegex  = new Regex(@"\s*");
            var   symbolRegex     = new Regex(_(@" ([^\s]+)  (\d+) "));
            Match match;

            Dictionary <string, int> stateDict = null;

            if (stateSymbolsFileName != null)
            {
                stateDict = new Dictionary <string, int>();
                foreach (string stateLine in File.ReadLines(stateSymbolsFileName))
                {
                    match = symbolRegex.Match(stateLine);
                    if (match.Success)
                    {
                        stateDict[match.Groups[1].Value] = int.Parse(match.Groups[2].Value);
                    }
                    else if (!blankLineRegex.IsMatch(stateLine))
                    {
                        throw FSMParserException.InvalidStateSymbolsFile(type);
                    }
                }
            }

            alphabet = null;
            string[] symbolsFileNames = (type == AutomatonType.SSA) ?
                                        new string[] { inputSymbolsFileName } :
            new string[] { inputSymbolsFileName, outputSymbolsFileName };
            foreach (string symbolsFileName in symbolsFileNames)
            {
                if (symbolsFileName != null)
                {
                    alphabet = alphabet ?? new Set <SYMBOL>();
                    foreach (string symbolLine in File.ReadLines(symbolsFileName))
                    {
                        match = symbolRegex.Match(symbolLine);
                        if (match.Success)
                        {
                            alphabet.Add(StringToSymbol(match.Groups[1].Value));
                        }
                        else if (!blankLineRegex.IsMatch(symbolLine))
                        {
                            throw FSMParserException.InvalidArcSymbolsFile(type);
                        }
                    }
                }
            }

            moves        = new Set <Move <ILabel <SYMBOL> > >();
            finalStates  = new Set <int>();
            initialState = 0;  // meaningless, prevents compiler error

            Func <string, Predicate <SYMBOL> > parsePredicate = (symbol =>
                                                                 (symbol == "0") ? null : new Predicate <SYMBOL>(StringToSymbol(symbol))
                                                                 );
            bool   isFirstTransition = true;
            var    reader            = new StringReader(input);
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                match = transitionRegex.Match(line);
                if (match.Success)
                {
                    int sourceState, targetState;
                    if (stateSymbolsFileName == null)
                    {
                        sourceState = int.Parse(match.Groups[1].Value);
                        targetState = int.Parse(match.Groups[2].Value);
                    }
                    else
                    {
                        if (!stateDict.TryGetValue(match.Groups[1].Value, out sourceState) ||
                            !stateDict.TryGetValue(match.Groups[2].Value, out targetState))
                        {
                            throw FSMParserException.UnknownStateSymbol(type);
                        }
                    }

                    if (inputSymbolsFileName != null && !alphabet.Contains(StringToSymbol(match.Groups[3].Value)))
                    {
                        throw FSMParserException.UnknownArcSymbol(type);
                    }
                    ILabel <SYMBOL> label;
                    var             inputPredicate = parsePredicate(match.Groups[3].Value);
                    if (type == AutomatonType.SST)
                    {
                        if (outputSymbolsFileName != null && !alphabet.Contains(StringToSymbol(match.Groups[4].Value)))
                        {
                            throw FSMParserException.UnknownArcSymbol(type);
                        }
                        var outputPredicate = parsePredicate(match.Groups[4].Value);
                        label = new Label <SYMBOL>(inputPredicate, outputPredicate);
                    }
                    else
                    {
                        label = inputPredicate;
                    }

                    moves.Add(new Move <ILabel <SYMBOL> >(sourceState, targetState, label));

                    if (isFirstTransition)
                    {
                        initialState      = sourceState;
                        isFirstTransition = false;
                    }
                }
                else
                {
                    match = finalStateRegex.Match(line);
                    if (!match.Success)
                    {
                        if (blankLineRegex.IsMatch(line))  // permit blank line
                        {
                            continue;
                        }
                        throw FSMParserException.InvalidFormat(type);
                    }

                    int finalState = int.Parse(match.Groups[1].Value);
                    finalStates.Add(finalState);
                }
            }

            if (isFirstTransition)
            {
                throw FSMParserException.NoTransitions(type);
            }

            name = null;

            stateNames = null;
            if (stateDict != null)
            {
                stateNames = new Dictionary <int, string>(stateDict.Count);
                foreach (KeyValuePair <string, int> item in stateDict)
                {
                    stateNames[item.Value] = item.Key;
                }
            }
        }
Exemple #26
0
 public static FSAParserException InvalidIdentityLabel(AutomatonType type)
 => new FSAParserException(type, "invalid transducer identity label");
Exemple #27
0
 public FSMParserException(AutomatonType type, string message)
     : base(type, string.Format("FSM: {0}", message))
 {
 }
Exemple #28
0
 public static FSMParserException InvalidFormat(AutomatonType type)
 => new FSMParserException(type, "invalid automaton format");
Exemple #29
0
        /* format spec at http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html */
        private static void PrintAutomatonFSM(
            StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames,
            PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet,
            List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates,
            string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null)
        {
            // map symbols to numeric IDs
            var symbolDict = new Dictionary <SYMBOL, int>(alphabet.Count);
            int id         = 1; // 0 reserved for epsilon

            foreach (SYMBOL symbol in alphabet)
            {
                symbolDict[symbol] = id++;
            }

            // print state/symbol as number if corresponding symbols file unspecified,
            // otherwise print as string and map it to number in symbols file

            Func <int, string>                state2str          = state => (stateSymbolsFileName == null) ? state.ToString() : stateNames[state];
            Func <SYMBOL, string>             input2str          = symbol => (inputSymbolsFileName == null) ? symbolDict[symbol].ToString() : symbol.ToString();
            Func <SYMBOL, string>             output2str         = symbol => (outputSymbolsFileName == null) ? symbolDict[symbol].ToString() : symbol.ToString();
            Action <Move <ILabel <SYMBOL> > > printPredicateMove = move => {
                int sourceState = move.SourceState;
                var predicate   = (Predicate <SYMBOL>)move.Label;
                int targetState = move.TargetState;
                if (predicate == null)
                {
                    file.WriteLine("{0} {1} 0", state2str(sourceState), state2str(targetState));
                }
                else
                {
                    foreach (SYMBOL symbol in algebra.InclusiveSet(predicate))
                    {
                        file.WriteLine("{0} {1} {2}", state2str(sourceState), state2str(targetState), input2str(symbol));
                    }
                }
            };
            Action <Move <ILabel <SYMBOL> > > printLabelMove = move => {
                int sourceState = move.SourceState;
                var label       = (Label <SYMBOL>)move.Label;
                int targetState = move.TargetState;
                if (label.IsIdentity)
                {
                    if (label.Input == null)
                    {
                        file.WriteLine("{0} {1} 0 0", state2str(sourceState), state2str(targetState));
                    }
                    else
                    {
                        foreach (SYMBOL symbol in algebra.InclusiveSet(label.Input))
                        {
                            file.WriteLine("{0} {1} {2} {3}",
                                           state2str(sourceState), state2str(targetState), input2str(symbol), output2str(symbol));
                        }
                    }
                }
                else
                {
                    if (label.Input == null)
                    {
                        if (label.Output == null)
                        {
                            file.WriteLine("{0} {1} 0 0", state2str(sourceState), state2str(targetState));
                        }
                        else
                        {
                            foreach (SYMBOL output in algebra.InclusiveSet(label.Output))
                            {
                                file.WriteLine("{0} {1} 0 {2}", state2str(sourceState), state2str(targetState), output2str(output));
                            }
                        }
                    }
                    else
                    {
                        foreach (SYMBOL input in algebra.InclusiveSet(label.Input))
                        {
                            if (label.Output == null)
                            {
                                file.WriteLine("{0} {1} {2} 0", state2str(sourceState), state2str(targetState), input2str(input));
                            }
                            else
                            {
                                foreach (SYMBOL output in algebra.InclusiveSet(label.Output))
                                {
                                    file.WriteLine("{0} {1} {2} {3}",
                                                   state2str(sourceState), state2str(targetState), input2str(input), output2str(output));
                                }
                            }
                        }
                    }
                }
            };
            Action <Move <ILabel <SYMBOL> > > printMove = (type == AutomatonType.SSA) ?
                                                          printPredicateMove : printLabelMove;

            // print moves
            foreach (Move <ILabel <SYMBOL> > move in moves)
            {
                printMove(move);
            }
            // print final states
            foreach (int finalState in states)
            {
                file.WriteLine((stateSymbolsFileName == null) ? finalState.ToString() : stateNames[finalState]);
            }

            if (stateSymbolsFileName != null)
            {
                var stateFile = new StreamWriter(stateSymbolsFileName);
                try {
                    foreach (KeyValuePair <int, string> item in stateNames)
                    {
                        stateFile.WriteLine("{0} {1}", item.Value, item.Key);
                    }
                } finally {
                    stateFile.Close();
                }
            }

            string[] symbolsFileNames = (type == AutomatonType.SSA) ?
                                        new string[] { inputSymbolsFileName } :
            new string[] { inputSymbolsFileName, outputSymbolsFileName };
            foreach (string symbolsFileName in symbolsFileNames)
            {
                if (symbolsFileName != null)
                {
                    var symbolsFile = new StreamWriter(symbolsFileName);
                    try {
                        foreach (KeyValuePair <SYMBOL, int> item in symbolDict)
                        {
                            symbolsFile.WriteLine("{0} {1}", item.Key, item.Value);
                        }
                    } finally {
                        symbolsFile.Close();
                    }
                }
            }
        }
Exemple #30
0
 public static TimbukParserException DuplicateState(AutomatonType type)
 => new TimbukParserException(type, "duplicate state");