/* file format compatible with https://github.com/Miskaaa/symboliclib */ private static void PrintAutomatonTimbuk( StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames, PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet, List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { // all actual symbols have arity 1 ("start" symbol x has arity 0) var labelList = new List <string>(alphabet.Select(symbol => string.Format("{0}:1", symbol))); Func <ILabel <SYMBOL>, string> printPredicate = ilabel => { var predicate = (Predicate <SYMBOL>)ilabel; if (predicate == null) { return(""); } if (predicate.Type == PredicateType.In) { return(string.Format("in{0}{1}{2}", '{', string.Join(",", predicate.Set), '}')); } else { return(string.Format("not_in{0}{1}{2}", '{', string.Join(",", predicate.Set), '}')); } }; Func <ILabel <SYMBOL>, string> printLabel = ilabel => { var label = (Label <SYMBOL>)ilabel; return(label.IsIdentity ? string.Format("@{0}/@{0}", printPredicate(label.Input)) : string.Format("{0}/{1}", printPredicate(label.Input), printPredicate(label.Output))); }; Func <ILabel <SYMBOL>, string> printILabel = (type == AutomatonType.SSA) ? printPredicate : printLabel; file.WriteLine(string.Format("Ops x:0 {0}", string.Join(" ", labelList))); file.WriteLine(); file.WriteLine(string.Format("Automaton {0} @{1}", name ?? "M", type == AutomatonType.SSA ? "INFA" : "INT")); file.WriteLine(string.Format("States {0}", string.Join(" ", states.Select(state => stateNames[state])))); file.WriteLine(string.Format("Final States {0}", string.Join(" ", finalStates.Select(state => stateNames[state])))); file.WriteLine("Transitions"); file.WriteLine(string.Format("x() -> {0}", stateNames[initialState])); foreach (Move <ILabel <SYMBOL> > move in moves) { file.WriteLine(string.Format("\"{0}\"({1}) -> {2}", printILabel(move.Label), stateNames[move.SourceState], stateNames[move.TargetState] )); } file.WriteLine(); }
public static TimbukParserException NoStartSymbol(AutomatonType type) => new TimbukParserException(type, "no start symbol (arity 0) specified");
/* FSA format spec at http://www.let.rug.nl/~vannoord/Fsa/Manual/node5.html#anc1 */ private static void PrintAutomatonFSA( StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames, PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet, List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { var transitions = new List <Move <ILabel <SYMBOL> > >(); // non-epsilon moves var jumps = new List <Tuple <int, int> >(); // epsilon moves foreach (Move <ILabel <SYMBOL> > move in moves) { if (move.IsEpsilon) { jumps.Add(new Tuple <int, int>(move.SourceState, move.TargetState)); } else { transitions.Add(move); } } Func <ILabel <SYMBOL>, string> printPredicate = ilabel => { var predicate = (Predicate <SYMBOL>)ilabel; if (predicate == null) // epsilon (only for transducers) { return("[]"); } // add quotes if necessary Func <SYMBOL, string> sanitizeSymbol = symbol => { string s = symbol.ToString(); if (char.IsUpper(s[0]) || s.Any(ch => char.IsWhiteSpace(ch)) || (s[0] == '0' && s.Length > 1)) { return("'" + s + "'"); } return(s); }; // `in([a])` may just be written as `a` if (algebra.InclusiveSet(predicate).Count == 1) { return(sanitizeSymbol(algebra.InclusiveSet(predicate).First())); } string typeName = (predicate.Type == PredicateType.In) ? "in" : "not_in"; List <string> symbols = predicate.Set.Select(sanitizeSymbol).ToList(); symbols.Sort(); return(string.Format("{0}([{1}])", typeName, string.Join(",", symbols))); }; Func <ILabel <SYMBOL>, string> printLabel = ilabel => { var label = (Label <SYMBOL>)ilabel; return(label.IsIdentity ? string.Format("$@({0})/$@({0})", printPredicate(label.Input)) : string.Format("{0}/{1}", printPredicate(label.Input), printPredicate(label.Output))); }; Func <ILabel <SYMBOL>, string> printILabel = (type == AutomatonType.SSA) ? printPredicate : printLabel; file.WriteLine("%% {0} {1}", (type == AutomatonType.SSA) ? "Recognizer" : "Transducer", name ?? ""); file.WriteLine("%% Automatically generated by ARMC."); file.WriteLine("%% For more info, cf. http://www.let.rug.nl/~vannoord/Fsa/"); file.WriteLine(); file.WriteLine("fa("); if (type == AutomatonType.SSA) { file.WriteLine(" r(fsa_preds),"); } else { file.WriteLine(" t(fsa_preds,fsa_preds),"); } file.WriteLine(" % number of states"); file.WriteLine(" {0},", states.Count); file.WriteLine(" % start states"); file.WriteLine(" [ {0} ],", initialState); file.WriteLine(" % final states"); // put up to 10 final states on one line int i = 0; FSAWriteList( file, finalStates.GroupBy(state => i++ / 10).Select(group => group.ToList()), stateList => string.Join(",", stateList) ); file.WriteLine(" ],"); file.WriteLine(" % moves"); FSAWriteList(file, transitions, (move => string.Format("trans({0},{1},{2})", move.SourceState, printILabel(move.Label), move.TargetState))); file.WriteLine(" % jumps"); FSAWriteList(file, jumps, (jump => string.Format("jump({0},{1})", jump.Item1, jump.Item2)), false); file.WriteLine(")."); file.WriteLine(); }
/* format spec at http://www.graphviz.org/doc/info/lang.html */ private static void PrintAutomatonDot( StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames, PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet, List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { // group labels for moves with same source and target state (reduce edges in graph) var transGroups = moves.GroupBy( move => new Tuple <int, int>(move.SourceState, move.TargetState), move => move.Label, (key, labels) => new Tuple <int, List <ILabel <SYMBOL> >, int>(key.Item1, labels.ToList(), key.Item2) ); // create subscripts and superscripts based on LaTeX-like markup in state names // e.g. q_0 becomes <q<sub>0</sub>>, q_{42} becomes <q<sub>42</sub>>, q_M^2 becomes <q<sub>M</sub><sup>2</sup>> var regexSubSingle = new Regex(@"_([^{])"); var regexSubGroup = new Regex(@"_{([^}]*)}"); var regexSuperSingle = new Regex(@"\^([^{])"); var regexSuperGroup = new Regex(@"\^{([^}]*)}"); Func <int, string> stateToHTML = delegate(int state) { string stateName = stateNames[state]; stateName = regexSubSingle.Replace(stateName, "<sub>$1</sub>"); stateName = regexSubGroup.Replace(stateName, "<sub>$1</sub>"); stateName = regexSuperSingle.Replace(stateName, "<sup>$1</sup>"); stateName = regexSuperGroup.Replace(stateName, "<sup>$1</sup>"); return(stateName); }; Func <SYMBOL, string> printSymbol = symbol => symbol.ToString() .Replace("<", "<").Replace(">", ">") .Replace("[", "[").Replace("]", "]"); Func <ILabel <SYMBOL>, string> printPredicate = ilabel => { var predicate = (Predicate <SYMBOL>)ilabel; if (predicate == null) { return("ε"); } if (algebra.InclusiveSet(predicate).Count == 1) { return(string.Format("<i>{0}</i>", printSymbol(algebra.InclusiveSet(predicate).First()))); } string typeSymbol = (predicate.Type == PredicateType.In) ? "∈" : "∉"; // unicode math set symbols // sort symbols and put in italics var symbols = new List <SYMBOL>(predicate.Set); symbols.Sort(); var symbolsFormatted = new List <string>(symbols.Select(symbol => string.Format("<i>{0}</i>", symbol))); Func <int, int, int> ceilDiv = (x, y) => (x - 1) / y + 1; int groupSize = 5; if (symbols.Count > 1) { while (ceilDiv(symbols.Count, groupSize - 1) == ceilDiv(symbols.Count, groupSize)) { groupSize--; } } int i = 0; string joined = string.Join(",<br/>", symbols .Select(printSymbol) .Select(symbol => string.Format("<i>{0}</i>", symbol)) .GroupBy(symbol => i++ / groupSize) .Select(group => string.Join(", ", group.ToList())) ); return(typeSymbol + "{" + joined + "}"); }; Func <ILabel <SYMBOL>, string> printLabel = ilabel => { var label = (Label <SYMBOL>)ilabel; return(label.IsIdentity ? string.Format("{0}<b>/</b>𝜄", printPredicate(label.Input)) : string.Format("{0}<b>/</b>{1}", printPredicate(label.Input), printPredicate(label.Output))); }; Func <ILabel <SYMBOL>, string> printILabel = (type == AutomatonType.SSA) ? printPredicate : printLabel; file.WriteLine("digraph {"); file.WriteLine(" rankdir=LR;"); // left-to-right direction more readable for automata if (name != null) // print automaton name { file.WriteLine(" label=<{0}:>;", name); file.WriteLine(" labelloc=top;"); file.WriteLine(" labeljust=left;"); } // final states have double circle file.Write(" node [shape=doublecircle];"); foreach (int finalState in finalStates) { file.Write(" {0};", finalState); } file.WriteLine(); file.WriteLine(" node [shape=circle];"); // invisible zero-width dummy node as source of arrow to initial state file.WriteLine(" dummy_node [style=invis,width=0,fixedsize=true,label=\"\"];"); file.WriteLine(" dummy_node -> {0} [len=0.2,penwidth=2.0];", initialState); foreach (Tuple <int, List <ILabel <SYMBOL> >, int> trans in transGroups) { int sourceState = trans.Item1; List <ILabel <SYMBOL> > labels = trans.Item2; int targetState = trans.Item3; file.WriteLine(" {0} -> {1} [label=<{2}>];", sourceState, targetState, string.Join(",<br/>", labels.Select(printILabel)) ); } foreach (int state in states) { file.WriteLine(" {0} [label=<{1}>];", state, stateToHTML(state)); } file.WriteLine("}"); file.WriteLine(); }
/// <summary> /// Extracts automaton/trasducer constructor parameters by parsing text file. /// </summary> /// <remarks> /// Determines file format from file extension or file contents. /// </remarks> /// <param name="fileName">File name.</param> /// <param name="type">Expected type (automaton or transducer).</param> /// <param name="initialState">Initial state.</param> /// <param name="finalStates">Final states.</param> /// <param name="moves">Moves.</param> /// <param name="alphabet">Alphabet (may be <c>null</c>).</param> /// <param name="name">Name (may be <c>null</c>).</param> /// <param name="stateNames">State names (may be <c>null</c>).</param> /// <param name="stateSymbolsFileName">Path to state symbols file (optional and only used with FSM format).</param> /// <param name="inputSymbolsFileName">Path to input arc symbols file (optional and only used with FSM format).</param> /// <param name="outputSymbolsFileName">Path to output arc symbols file (optional and only used with a transducer in FSM format).</param> public static void ParseAutomaton( string fileName, AutomatonType type, out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves, out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { string input = File.ReadAllText(fileName); ParseAction parse = null; bool deriveName = false; // derive automaton name from file name? string extension = Path.GetExtension(fileName); // try guessing format from file extension switch (extension) { case "timbuk": case "tmb": parse = ParseAutomatonTimbuk; break; case "fsa": case "pl": parse = ParseAutomatonFSA; deriveName = true; break; case "fsm": parse = ParseAutomatonFSM; deriveName = true; break; } if (parse == null) // try guessing format from file contents { var regexTimbuk = new Regex(@"^Ops.*Automaton.*States.*Final States.*Transitions", RegexOptions.Singleline); var regexFSA = new Regex(@"fa\(.*\)", RegexOptions.Singleline); var regexFSM = new Regex(@"^[\d\s.]*$"); // only if no symbol files specified, otherwise assume FSM if (regexTimbuk.IsMatch(input)) { parse = ParseAutomatonTimbuk; } else if (regexFSA.IsMatch(input)) { parse = ParseAutomatonFSA; deriveName = true; } else if ((stateSymbolsFileName ?? inputSymbolsFileName ?? outputSymbolsFileName) != null || regexFSM.IsMatch(input)) { parse = ParseAutomatonFSM; deriveName = true; } } if (parse == null) { throw ParserException.UnknownFormat(type); } parse( input, type, out initialState, out finalStates, out moves, out alphabet, out name, out stateNames, stateSymbolsFileName, inputSymbolsFileName, outputSymbolsFileName ); if (deriveName) { name = Path.GetFileNameWithoutExtension(fileName); } }
/* file format compatible with https://github.com/Miskaaa/symboliclib */ public static void ParseAutomatonTimbuk( string input, AutomatonType type, out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves, out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { string namePattern = string.Format(@"(.*?)(?: @{0})?", type == AutomatonType.SSA ? "INFA" : "INT"); string labelDeclPattern = @"[^:]+:\d+"; string labelListPattern = string.Format(@"(?:({0}) )*", labelDeclPattern); string stateListPattern = @"(?:(\w+) )*"; string transitionPattern = @".*? \( (?:\w+(?: , \w+)*)? \) -> \w+"; string transitionListPattern = string.Format(@"(?:({0}) )*", transitionPattern); string automatonPattern = string.Format( @"Automaton {0} States {1}Final States {1}Transitions {2}", namePattern, stateListPattern, transitionListPattern ); string filePattern = string.Format(@"^Ops {0} {1} $", labelListPattern, automatonPattern); var fileRegex = new Regex(_(filePattern)); Match match = fileRegex.Match(input); if (!match.Success) { throw TimbukParserException.InvalidFormat(type); } alphabet = new Set <SYMBOL>(); SYMBOL startSymbol = default(SYMBOL); // meaningless assignment bool foundStartSymbol = false; foreach (Capture capture in match.Groups[1].Captures) { string[] parts = capture.Value.Split(':'); SYMBOL symbol = StringToSymbol(parts[0]); int arity = int.Parse(parts[1]); switch (arity) { case 0: if (foundStartSymbol) { throw TimbukParserException.DuplicateLabelDecl(type); } startSymbol = symbol; foundStartSymbol = true; break; case 1: if (alphabet.Contains(symbol)) { throw TimbukParserException.DuplicateLabelDecl(type); } alphabet.Add(symbol); break; default: throw TimbukParserException.TreeAutomataNotSupported(type); } } if (!foundStartSymbol) { throw TimbukParserException.NoStartSymbol(type); } name = match.Groups[2].Value; var stateDict = new Dictionary <string, int>(match.Groups[3].Captures.Count); int id = 0; var states = new Set <int>(); finalStates = new Set <int>(); moves = new Set <Move <ILabel <SYMBOL> > >(); foreach (Capture capture in match.Groups[3].Captures) { string stateName = capture.Value; if (stateDict.ContainsKey(stateName)) { throw TimbukParserException.DuplicateState(type); } int state = id++; stateDict[stateName] = state; states.Add(state); } foreach (Capture capture in match.Groups[4].Captures) { string stateName = capture.Value; int state; if (!stateDict.TryGetValue(stateName, out state)) { throw TimbukParserException.UnknownFinalState(type); } if (finalStates.Contains(state)) { throw TimbukParserException.DuplicateFinalState(type); } finalStates.Add(state); } Func <string, ILabel <SYMBOL> > parsePredicate = predString => { if (predString == "") { return(null); } var regex = new Regex(@"^""?(in|not_in)\{(.*)\}""?$"); Match predMatch = regex.Match(predString); if (predMatch.Success) { PredicateType predType = predMatch.Groups[1].Value == "in" ? PredicateType.In : PredicateType.NotIn; var symbols = predMatch.Groups[2].Value == "" ? new Set <SYMBOL>() : new Set <SYMBOL>(predMatch.Groups[2].Value.Split(',').Select(s => StringToSymbol(s))); return(new Predicate <SYMBOL>(predType, symbols)); } return(new Predicate <SYMBOL>(StringToSymbol(predString))); }; Func <string, ILabel <SYMBOL> > parseLabel = labelString => { string[] parts = labelString.Split('/'); switch (parts.Length) { case 1: return(new Label <SYMBOL>((Predicate <SYMBOL>)parsePredicate(parts[0]))); case 2: if (parts.All(s => s.StartsWith("@"))) { var inputPred = (Predicate <SYMBOL>)parsePredicate(parts[0].Substring(1)); var outputPred = (Predicate <SYMBOL>)parsePredicate(parts[1].Substring(1)); if (inputPred == null ? outputPred != null : !inputPred.Equals(outputPred)) { throw TimbukParserException.InvalidIdentityLabel(type); } return(new Label <SYMBOL>(inputPred)); } return(new Label <SYMBOL>( (Predicate <SYMBOL>)parsePredicate(parts[0]), (Predicate <SYMBOL>)parsePredicate(parts[1]) )); default: throw TimbukParserException.InvalidTransducerLabel(type); } }; Func <string, ILabel <SYMBOL> > parseILabel = (type == AutomatonType.SSA) ? parsePredicate : parseLabel; transitionPattern = @"(.*) \( (\w*) \) -> (\w+)"; var transitionRegex = new Regex(_(transitionPattern)); bool foundInitialState = false; initialState = 0; // meaningless, but prevents compilation error foreach (Capture capture in match.Groups[5].Captures) { Match transMatch = transitionRegex.Match(capture.Value); if (!transMatch.Success) { throw TimbukParserException.UnknownSymbol(type); } if (transMatch.Groups[2].Value == "") { if (!StringToSymbol(transMatch.Groups[1].Value).Equals(startSymbol)) { throw TimbukParserException.UnknownSymbol(type); } if (!stateDict.TryGetValue(transMatch.Groups[3].Value, out initialState)) { throw TimbukParserException.UnknownState(type); } foundInitialState = true; continue; } ILabel <SYMBOL> label = parseILabel(transMatch.Groups[1].Value); int sourceState, targetState; if (!stateDict.TryGetValue(transMatch.Groups[2].Value, out sourceState) || !stateDict.TryGetValue(transMatch.Groups[3].Value, out targetState)) { throw TimbukParserException.UnknownState(type); } if (label.Symbols > alphabet) { throw TimbukParserException.UnknownSymbol(type); } moves.Add(new Move <ILabel <SYMBOL> >(sourceState, targetState, label)); } if (!foundInitialState) { throw TimbukParserException.NoInitialState(type); } stateNames = new Dictionary <int, string>(stateDict.Count); foreach (KeyValuePair <string, int> item in stateDict) { stateNames[item.Value] = item.Key; } }
public static AutomatonException UnknownSymbolsInTransitions(AutomatonType type) => new AutomatonException(type, "transitions contain symbols not in alphabet");
public static FSMParserException NoTransitions(AutomatonType type) => new FSMParserException(type, "no transitions - cannot determine initial state");
public static FSAParserException DuplicateStartState(AutomatonType type) => new FSAParserException(type, "duplicate start state");
public static FSAParserException UnsupportedPredicateModule(AutomatonType type) => new FSAParserException(type, "predicate module not supported (must be fsa_preds or fsa_frozen)");
public static TimbukParserException InvalidTransducerLabel(AutomatonType type) => new TimbukParserException(type, "invalid transducer label format");
public static TimbukParserException InvalidIdentityLabel(AutomatonType type) => new TimbukParserException(type, "mismatched predicates in identity label");
public static TimbukParserException UnknownState(AutomatonType type) => new TimbukParserException(type, "unknown state in transition");
public static TimbukParserException UnknownFinalState(AutomatonType type) => new TimbukParserException(type, "final state not in states");
public static TimbukParserException NoInitialState(AutomatonType type) => new TimbukParserException(type, "no initial state specified");
public static FSMParserException InvalidArcSymbolsFile(AutomatonType type) => new FSMParserException(type, "invalid arc symbols file format");
public static FSMParserException UnknownArcSymbol(AutomatonType type) => new FSMParserException(type, "arc symbol not declared in symbols file");
public static FSAParserException DuplicateFinalState(AutomatonType type) => new FSAParserException(type, "duplicate final state");
public static AutomatonException InvalidStateNames(AutomatonType type) => new AutomatonException(type, "invalid state names");
public static FSAParserException NoStartState(AutomatonType type) => new FSAParserException(type, "no start state");
public ParserException(AutomatonType type, string message) : base(string.Format("{0} parser: {1}", type.ToString(), message)) { }
public static FSAParserException StateCountMismatch(AutomatonType type) => new FSAParserException(type, "contradictory number of states");
/* FSA format spec at http://www.let.rug.nl/~vannoord/Fsa/Manual/node5.html#anc1 */ public static void ParseAutomatonFSA( string input, AutomatonType type, out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves, out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { var regexInlineComments = new Regex(@"%.*(?=\n)"); var regexBlockComments = new Regex(@"/\*.*?\*/", RegexOptions.Singleline); input = regexInlineComments.Replace(input, ""); input = regexBlockComments.Replace(input, ""); string prologListFormatString = @"\[(?: {0} ,? )*\]"; string symbolsPattern = (type == AutomatonType.SSA) ? @"r \( (\w+) \)" : @"t \( (\w+ , \w+) \)"; string statesPattern = @"(\d+)"; string startsPattern = string.Format(prologListFormatString, @"(\d+)"); string finalsPattern = string.Format(prologListFormatString, @"(\d+)"); string atomPattern = @"(?:'[^']*'|[\w<>|[\]{}!?+*/#$%@=-]+)"; string predicatePattern = string.Format(@"(?:{0}|(?:not_)?in \( {1} \))", atomPattern, string.Format(prologListFormatString, atomPattern)); string transsPattern = string.Format(prologListFormatString, string.Format(@"trans \( (\d+) , ({0}) , (\d+) \)", (type == AutomatonType.SSA) ? predicatePattern : string.Format(@"{0} \/ {0}|(?:\$@|'\$@') \( {0} \) \/ (?:\$@|'\$@') \( {0} \)", string.Format(@"(?:{0}|\[ \])", predicatePattern)))); string jumpsPattern = string.Format(prologListFormatString, @"jump \( (\d+) , (\d+) \)"); string filePattern = string.Format(@"fa \( {0} , {1} , {2} , {3} , {4} , {5} \) \.", symbolsPattern, statesPattern, startsPattern, finalsPattern, transsPattern, jumpsPattern); var fileRegex = new Regex(_(filePattern)); Match match = fileRegex.Match(input); if (!match.Success) { throw FSAParserException.InvalidFormat(type); } Func <string, bool> isFsaFrozen = module => { switch (module) { case "fsa_preds": return(false); case "fsa_frozen": return(true); default: throw FSAParserException.UnsupportedPredicateModule(type); } }; bool inputSymbolAsIs; bool outputSymbolAsIs = true; // meaningless assignment if (type == AutomatonType.SSA) { inputSymbolAsIs = isFsaFrozen(match.Groups[1].Value); } else { string[] parts = Regex.Split(match.Groups[1].Value, _(@" , ")); inputSymbolAsIs = isFsaFrozen(parts[0]); outputSymbolAsIs = isFsaFrozen(parts[1]); } int stateCount = int.Parse(match.Groups[2].Value); var stateDict = new Dictionary <int, int>(stateCount); int id = 0; var startStates = new Set <int>(); foreach (Capture capture in match.Groups[3].Captures) { int stateNum = int.Parse(capture.Value); int state; if (!stateDict.TryGetValue(stateNum, out state)) { stateDict[stateNum] = state = id++; } if (startStates.Contains(state)) { throw FSAParserException.DuplicateStartState(type); } startStates.Add(state); } finalStates = new Set <int>(); foreach (Capture capture in match.Groups[4].Captures) { int stateNum = int.Parse(capture.Value); int state; if (!stateDict.TryGetValue(stateNum, out state)) { stateDict[stateNum] = state = id++; } if (finalStates.Contains(state)) { throw FSAParserException.DuplicateFinalState(type); } finalStates.Add(state); } var asIsPredicateRegex = new Regex(atomPattern); var inNotInPredicateRegex = new Regex(_(string.Format( @"(in|not_in) \( {0} \)", string.Format(prologListFormatString, string.Format(@"({0})", atomPattern)) ))); Func <string, string> stripQuotes = atom => atom[0] == '\'' && atom[atom.Length - 1] == '\'' ? atom.Substring(1, atom.Length - 2) : atom; Func <string, bool, ILabel <SYMBOL> > parsePredicate = (predString, asIs) => { if (asIs) { if (!asIsPredicateRegex.IsMatch(predString)) { throw FSAParserException.InvalidPredicate(type); } return(new Predicate <SYMBOL>(StringToSymbol(stripQuotes(predString)))); } else { Match predMatch = inNotInPredicateRegex.Match(predString); if (predMatch.Success) { var symbols = new Set <SYMBOL>(); foreach (Capture capture in predMatch.Groups[2].Captures) { symbols.Add(StringToSymbol(stripQuotes(capture.Value))); } return(new Predicate <SYMBOL>( (predMatch.Groups[1].Value == "in") ? PredicateType.In : PredicateType.NotIn, symbols )); } else { if (!asIsPredicateRegex.IsMatch(predString)) { throw FSAParserException.InvalidPredicate(type); } return(new Predicate <SYMBOL>(StringToSymbol(stripQuotes(predString)))); } } }; Func <string, ILabel <SYMBOL> > parseInputPredicate = (predString => parsePredicate(predString, inputSymbolAsIs)); Func <string, ILabel <SYMBOL> > parseLabel = labelString => { string[] parts = labelString.Split('/'); var predicates = new Predicate <SYMBOL> [2]; bool isIdentity = false; var emptyListRegex = new Regex(_(@"^\[ \]$")); var identityRegex = new Regex(_(string.Format(@"^\$@ \( ({0}) \)$", predicatePattern))); for (int i = 0; i < 2; i++) { if (emptyListRegex.IsMatch(parts[i])) { predicates[i] = null; } else { Match identityMatch = identityRegex.Match(parts[i]); string predString; if (identityMatch.Success) { isIdentity = true; predString = identityMatch.Groups[1].Value; } else { predString = parts[i]; } predicates[i] = (Predicate <SYMBOL>)parsePredicate(predString, (i == 0) ? inputSymbolAsIs : outputSymbolAsIs); } } if (isIdentity) { if (predicates[0] == null) { if (predicates[1] != null) { throw FSAParserException.InvalidIdentityLabel(type); } } else { if (predicates[1] == null) { throw FSAParserException.InvalidIdentityLabel(type); } if (predicates[0].Type == predicates[1].Type && predicates[0].Set != predicates[1].Set) { throw FSAParserException.InvalidIdentityLabel(type); } } return(new Label <SYMBOL>(predicates[0])); } return(new Label <SYMBOL>(predicates[0], predicates[1])); }; Func <string, ILabel <SYMBOL> > parseILabel = (type == AutomatonType.SSA) ? parseInputPredicate : parseLabel; moves = new Set <Move <ILabel <SYMBOL> > >(); int transitionCount = match.Groups[5].Captures.Count; for (int i = 0; i < transitionCount; i++) { int sourceStateNum = int.Parse(match.Groups[5].Captures[i].Value); ILabel <SYMBOL> label = parseILabel(match.Groups[6].Captures[i].Value); int targetStateNum = int.Parse(match.Groups[7].Captures[i].Value); int sourceState, targetState; if (!stateDict.TryGetValue(sourceStateNum, out sourceState)) { stateDict[sourceStateNum] = sourceState = id++; } if (!stateDict.TryGetValue(targetStateNum, out targetState)) { stateDict[targetStateNum] = targetState = id++; } moves.Add(new Move <ILabel <SYMBOL> >(sourceState, targetState, label)); } int jumpCount = match.Groups[8].Captures.Count; for (int i = 0; i < jumpCount; i++) { int sourceStateNum = int.Parse(match.Groups[8].Captures[i].Value); int targetStateNum = int.Parse(match.Groups[9].Captures[i].Value); int sourceState, targetState; if (!stateDict.TryGetValue(sourceStateNum, out sourceState)) { stateDict[sourceStateNum] = sourceState = id++; } if (!stateDict.TryGetValue(targetStateNum, out targetState)) { stateDict[sourceStateNum] = targetState = id++; } moves.Add(Move <ILabel <SYMBOL> > .Epsilon(sourceState, targetState)); } if (stateDict.Count > stateCount) { throw FSAParserException.StateCountMismatch(type); } initialState = 0; // meaningless, prevents compiler error if (startStates.Count < 1) { throw FSAParserException.NoStartState(type); } else if (startStates.Count == 1) { initialState = startStates.First(); } else { initialState = id++; for (int i = 0; ; i++) { if (!stateDict.ContainsKey(i)) { stateDict[i] = initialState; break; } } foreach (int startState in startStates) { moves.Add(Move <ILabel <SYMBOL> > .Epsilon(initialState, startState)); } } alphabet = null; name = null; stateNames = null; }
public static FSAParserException InvalidPredicate(AutomatonType type) => new FSAParserException(type, "predicate invalid for given module");
/* format spec at http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html */ public static void ParseAutomatonFSM( string input, AutomatonType type, out int initialState, out Set <int> finalStates, out Set <Move <ILabel <SYMBOL> > > moves, out Set <SYMBOL> alphabet, out string name, out Dictionary <int, string> stateNames, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { string statePattern = (stateSymbolsFileName == null) ? @"\d+" : @"[^\s]+"; string inputSymbolPattern = (inputSymbolsFileName == null) ? @"\d+" : @"[^\s]+"; string outputSymbolPattern = (outputSymbolsFileName == null) ? @"\d+" : @"[^\s]+"; var transitionRegex = new Regex(_(string.Format( (type == AutomatonType.SSA) ? @" ({0}) ({0}) ({1})(?: \d*\.\d*)? " : @" ({0}) ({0}) ({1}) ({2})(?: \d*\.\d*)? ", statePattern, inputSymbolPattern, outputSymbolPattern ))); var finalStateRegex = new Regex(_(string.Format(@" ({0})(?: \d*\.\d*)? ", statePattern))); var blankLineRegex = new Regex(@"\s*"); var symbolRegex = new Regex(_(@" ([^\s]+) (\d+) ")); Match match; Dictionary <string, int> stateDict = null; if (stateSymbolsFileName != null) { stateDict = new Dictionary <string, int>(); foreach (string stateLine in File.ReadLines(stateSymbolsFileName)) { match = symbolRegex.Match(stateLine); if (match.Success) { stateDict[match.Groups[1].Value] = int.Parse(match.Groups[2].Value); } else if (!blankLineRegex.IsMatch(stateLine)) { throw FSMParserException.InvalidStateSymbolsFile(type); } } } alphabet = null; string[] symbolsFileNames = (type == AutomatonType.SSA) ? new string[] { inputSymbolsFileName } : new string[] { inputSymbolsFileName, outputSymbolsFileName }; foreach (string symbolsFileName in symbolsFileNames) { if (symbolsFileName != null) { alphabet = alphabet ?? new Set <SYMBOL>(); foreach (string symbolLine in File.ReadLines(symbolsFileName)) { match = symbolRegex.Match(symbolLine); if (match.Success) { alphabet.Add(StringToSymbol(match.Groups[1].Value)); } else if (!blankLineRegex.IsMatch(symbolLine)) { throw FSMParserException.InvalidArcSymbolsFile(type); } } } } moves = new Set <Move <ILabel <SYMBOL> > >(); finalStates = new Set <int>(); initialState = 0; // meaningless, prevents compiler error Func <string, Predicate <SYMBOL> > parsePredicate = (symbol => (symbol == "0") ? null : new Predicate <SYMBOL>(StringToSymbol(symbol)) ); bool isFirstTransition = true; var reader = new StringReader(input); string line; while ((line = reader.ReadLine()) != null) { match = transitionRegex.Match(line); if (match.Success) { int sourceState, targetState; if (stateSymbolsFileName == null) { sourceState = int.Parse(match.Groups[1].Value); targetState = int.Parse(match.Groups[2].Value); } else { if (!stateDict.TryGetValue(match.Groups[1].Value, out sourceState) || !stateDict.TryGetValue(match.Groups[2].Value, out targetState)) { throw FSMParserException.UnknownStateSymbol(type); } } if (inputSymbolsFileName != null && !alphabet.Contains(StringToSymbol(match.Groups[3].Value))) { throw FSMParserException.UnknownArcSymbol(type); } ILabel <SYMBOL> label; var inputPredicate = parsePredicate(match.Groups[3].Value); if (type == AutomatonType.SST) { if (outputSymbolsFileName != null && !alphabet.Contains(StringToSymbol(match.Groups[4].Value))) { throw FSMParserException.UnknownArcSymbol(type); } var outputPredicate = parsePredicate(match.Groups[4].Value); label = new Label <SYMBOL>(inputPredicate, outputPredicate); } else { label = inputPredicate; } moves.Add(new Move <ILabel <SYMBOL> >(sourceState, targetState, label)); if (isFirstTransition) { initialState = sourceState; isFirstTransition = false; } } else { match = finalStateRegex.Match(line); if (!match.Success) { if (blankLineRegex.IsMatch(line)) // permit blank line { continue; } throw FSMParserException.InvalidFormat(type); } int finalState = int.Parse(match.Groups[1].Value); finalStates.Add(finalState); } } if (isFirstTransition) { throw FSMParserException.NoTransitions(type); } name = null; stateNames = null; if (stateDict != null) { stateNames = new Dictionary <int, string>(stateDict.Count); foreach (KeyValuePair <string, int> item in stateDict) { stateNames[item.Value] = item.Key; } } }
public static FSAParserException InvalidIdentityLabel(AutomatonType type) => new FSAParserException(type, "invalid transducer identity label");
public FSMParserException(AutomatonType type, string message) : base(type, string.Format("FSM: {0}", message)) { }
public static FSMParserException InvalidFormat(AutomatonType type) => new FSMParserException(type, "invalid automaton format");
/* format spec at http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html */ private static void PrintAutomatonFSM( StreamWriter file, AutomatonType type, string name, Dictionary <int, string> stateNames, PredicateAlgebra <SYMBOL> algebra, List <int> states, List <SYMBOL> alphabet, List <Move <ILabel <SYMBOL> > > moves, int initialState, List <int> finalStates, string stateSymbolsFileName = null, string inputSymbolsFileName = null, string outputSymbolsFileName = null) { // map symbols to numeric IDs var symbolDict = new Dictionary <SYMBOL, int>(alphabet.Count); int id = 1; // 0 reserved for epsilon foreach (SYMBOL symbol in alphabet) { symbolDict[symbol] = id++; } // print state/symbol as number if corresponding symbols file unspecified, // otherwise print as string and map it to number in symbols file Func <int, string> state2str = state => (stateSymbolsFileName == null) ? state.ToString() : stateNames[state]; Func <SYMBOL, string> input2str = symbol => (inputSymbolsFileName == null) ? symbolDict[symbol].ToString() : symbol.ToString(); Func <SYMBOL, string> output2str = symbol => (outputSymbolsFileName == null) ? symbolDict[symbol].ToString() : symbol.ToString(); Action <Move <ILabel <SYMBOL> > > printPredicateMove = move => { int sourceState = move.SourceState; var predicate = (Predicate <SYMBOL>)move.Label; int targetState = move.TargetState; if (predicate == null) { file.WriteLine("{0} {1} 0", state2str(sourceState), state2str(targetState)); } else { foreach (SYMBOL symbol in algebra.InclusiveSet(predicate)) { file.WriteLine("{0} {1} {2}", state2str(sourceState), state2str(targetState), input2str(symbol)); } } }; Action <Move <ILabel <SYMBOL> > > printLabelMove = move => { int sourceState = move.SourceState; var label = (Label <SYMBOL>)move.Label; int targetState = move.TargetState; if (label.IsIdentity) { if (label.Input == null) { file.WriteLine("{0} {1} 0 0", state2str(sourceState), state2str(targetState)); } else { foreach (SYMBOL symbol in algebra.InclusiveSet(label.Input)) { file.WriteLine("{0} {1} {2} {3}", state2str(sourceState), state2str(targetState), input2str(symbol), output2str(symbol)); } } } else { if (label.Input == null) { if (label.Output == null) { file.WriteLine("{0} {1} 0 0", state2str(sourceState), state2str(targetState)); } else { foreach (SYMBOL output in algebra.InclusiveSet(label.Output)) { file.WriteLine("{0} {1} 0 {2}", state2str(sourceState), state2str(targetState), output2str(output)); } } } else { foreach (SYMBOL input in algebra.InclusiveSet(label.Input)) { if (label.Output == null) { file.WriteLine("{0} {1} {2} 0", state2str(sourceState), state2str(targetState), input2str(input)); } else { foreach (SYMBOL output in algebra.InclusiveSet(label.Output)) { file.WriteLine("{0} {1} {2} {3}", state2str(sourceState), state2str(targetState), input2str(input), output2str(output)); } } } } } }; Action <Move <ILabel <SYMBOL> > > printMove = (type == AutomatonType.SSA) ? printPredicateMove : printLabelMove; // print moves foreach (Move <ILabel <SYMBOL> > move in moves) { printMove(move); } // print final states foreach (int finalState in states) { file.WriteLine((stateSymbolsFileName == null) ? finalState.ToString() : stateNames[finalState]); } if (stateSymbolsFileName != null) { var stateFile = new StreamWriter(stateSymbolsFileName); try { foreach (KeyValuePair <int, string> item in stateNames) { stateFile.WriteLine("{0} {1}", item.Value, item.Key); } } finally { stateFile.Close(); } } string[] symbolsFileNames = (type == AutomatonType.SSA) ? new string[] { inputSymbolsFileName } : new string[] { inputSymbolsFileName, outputSymbolsFileName }; foreach (string symbolsFileName in symbolsFileNames) { if (symbolsFileName != null) { var symbolsFile = new StreamWriter(symbolsFileName); try { foreach (KeyValuePair <SYMBOL, int> item in symbolDict) { symbolsFile.WriteLine("{0} {1}", item.Key, item.Value); } } finally { symbolsFile.Close(); } } } }
public static TimbukParserException DuplicateState(AutomatonType type) => new TimbukParserException(type, "duplicate state");