public ItemSet(GrammarRule sourceRule, RuleSymbolLink[] items) { SourceRule = sourceRule; Items = items; TableRow = new int[SYMCNT_TOTAL]; for (int i = 0; i < SYMCNT_TOTAL; i++) { TableRow[i] = -1; } }
private void BuildItemSet(GrammarRule rule, List <RuleSymbolLink> itemSet, bool justIndex0 = false) { if (rule.MatchSymbols.Length == 1 || justIndex0) { itemSet.Add(new RuleSymbolLink(rule, 0)); } else { for (int j = 0; j <= rule.MatchSymbols.Length; j++) { itemSet.Add(new RuleSymbolLink(rule, j)); } } CloseItemSet(itemSet); }
/// <summary> /// Initialises a new instance of the Parser class /// </summary> public Parser() { // Generate grammar rules grammarRules = new List <GrammarRule>() { // TypeName := '<' Identifier '>' GrammarRule.Sequence(SymbolType.TypeName, SymbolType.OpenTypeDef, SymbolType.Identifier, SymbolType.CloseTypeDef), // PrimitiveValue := String | Number | Boolean GrammarRule.Sequence(SymbolType.PrimitiveValue, SymbolType.StringLiteral), GrammarRule.Sequence(SymbolType.PrimitiveValue, SymbolType.NumberLiteral), GrammarRule.Sequence(SymbolType.PrimitiveValue, SymbolType.BooleanLiteral), // EnumValue := Identifier '.' Identifier | Identifier '.' EnumValue GrammarRule.Sequence(SymbolType.EnumValue, SymbolType.Identifier, SymbolType.Dot, SymbolType.Identifier), //GrammarRule.Sequence(SymbolType.EnumValue, SymbolType.Identifier, SymbolType.Dot, SymbolType.EnumValue), // MapValue := '{' KeyValueSeq '}' | '{' '}' GrammarRule.Sequence(SymbolType.MapValue, SymbolType.OpenMap, SymbolType.KeyValueSeq, SymbolType.CloseMap), GrammarRule.Sequence(SymbolType.MapValue, SymbolType.OpenMap, SymbolType.CloseMap), // ArrayValue := '[' ArraySeq ']' | '[' ']' GrammarRule.Sequence(SymbolType.ArrayValue, SymbolType.OpenArray, SymbolType.ArraySeq, SymbolType.CloseArray), GrammarRule.Sequence(SymbolType.ArrayValue, SymbolType.OpenArray, SymbolType.CloseArray), // TypedMapValue = TypeName MapValue GrammarRule.Sequence(SymbolType.TypedMapValue, SymbolType.TypeName, SymbolType.MapValue), // TypedArrayValue = TypeName ArrayValue GrammarRule.Sequence(SymbolType.TypedArrayValue, SymbolType.TypeName, SymbolType.ArrayValue), // ComplexValue := TypedMapValue | TypedArrayValue | MapValue | ArrayValue GrammarRule.Sequence(SymbolType.ComplexValue, SymbolType.TypedMapValue), GrammarRule.Sequence(SymbolType.ComplexValue, SymbolType.TypedArrayValue), GrammarRule.Sequence(SymbolType.ComplexValue, SymbolType.MapValue), GrammarRule.Sequence(SymbolType.ComplexValue, SymbolType.ArrayValue), // KeyValue := Identifier ComplexValue | Identifier '=' PrimitiveValue | Identifier '=' EnumValue | TypeName Identifier '=' PrimitiveValue GrammarRule.Sequence(SymbolType.KeyValue, SymbolType.Identifier, SymbolType.ComplexValue), GrammarRule.Sequence(SymbolType.KeyValue, SymbolType.Identifier, SymbolType.Assign, SymbolType.PrimitiveValue), GrammarRule.Sequence(SymbolType.KeyValue, SymbolType.Identifier, SymbolType.Assign, SymbolType.EnumValue), GrammarRule.Sequence(SymbolType.KeyValue, SymbolType.TypeName, SymbolType.Identifier, SymbolType.Assign, SymbolType.PrimitiveValue), // KeyValueSeq := KeyValue* GrammarRule.Sequence(SymbolType.KeyValueSeq, SymbolType.KeyValueSeq, SymbolType.KeyValue), GrammarRule.Sequence(SymbolType.KeyValueSeq, SymbolType.KeyValue), // ArraySeq := ArraySeq ',' PrimitiveValue | ArraySeq ',' ComplexValue | ArraySeq ',' EnumValue | PrimitiveValue | ComplexValue | EnumValue GrammarRule.Sequence(SymbolType.ArraySeq, SymbolType.ArraySeq, SymbolType.Seperator, SymbolType.PrimitiveValue), GrammarRule.Sequence(SymbolType.ArraySeq, SymbolType.ArraySeq, SymbolType.Seperator, SymbolType.ComplexValue), GrammarRule.Sequence(SymbolType.ArraySeq, SymbolType.ArraySeq, SymbolType.Seperator, SymbolType.EnumValue), GrammarRule.Sequence(SymbolType.ArraySeq, SymbolType.PrimitiveValue), GrammarRule.Sequence(SymbolType.ArraySeq, SymbolType.ComplexValue), GrammarRule.Sequence(SymbolType.ArraySeq, SymbolType.EnumValue) }.ToArray(); goalSymbol = SymbolType.KeyValueSeq; /*grammarRules = new List<GrammarRule>() * { * GrammarRule.Sequence(SymbolType.E, SymbolType.E, SymbolType.Mult, SymbolType.B), * GrammarRule.Sequence(SymbolType.E, SymbolType.E, SymbolType.Add, SymbolType.B), * GrammarRule.Sequence(SymbolType.E, SymbolType.B), * GrammarRule.Sequence(SymbolType.B, SymbolType.Zero), * GrammarRule.Sequence(SymbolType.B, SymbolType.One) * }.ToArray(); * goalSymbol = SymbolType.E;*/ // Generate parse tables GenerateParseTables(); }
public Model.Node Parse(TokenStream strm) { // Sanity check if (strm.EoS) { return(null); } // Initialise parse state Stack <Symbol> parseStack = new Stack <Symbol>(); parseStack.Push(Symbol.ParseState(0)); Symbol lookahead = strm.Read(); // Loop until we're done bool done = false; while (!done) { // Read current parse state Symbol parseState = parseStack.Peek(); if (!parseState.IsParseState) { throw new InvalidOperationException(); } StateDefinition state = parseStates[parseState.State]; // Select next action int terminalIndex = lookahead != null ? lookahead.TerminalIndex : SYMCNT_TERMINAL - 1; var rule = state.LookaheadTable[terminalIndex]; switch (rule.Action) { case ParserAction.Shift: parseStack.Push(lookahead); lookahead = strm.Read(); parseStack.Push(Symbol.ParseState(rule.Arg)); break; case ParserAction.Reduce: GrammarRule grammarRule = grammarRules[rule.Arg]; int toRemove = grammarRule.MatchSymbols.Length; Symbol[] children = new Symbol[toRemove]; while (toRemove > 0) { Symbol s = parseStack.Pop(); if (!s.IsParseState) { children[toRemove - 1] = s; toRemove--; } } Symbol newSymbol = new Symbol(grammarRule.OutputSymbol, children); Symbol priorState = parseStack.Peek(); if (!priorState.IsParseState) { throw new InvalidOperationException(); } StateDefinition priorStateDef = parseStates[priorState.State]; int nextState = priorStateDef.GotoTable[SymbolTypeToCol(grammarRule.OutputSymbol, true)]; parseStack.Push(newSymbol); parseStack.Push(Symbol.ParseState(nextState)); break; case ParserAction.Done: done = true; break; case ParserAction.Error: if (lookahead != null) { throw new InvalidOperationException($"Unexpected symbol '{lookahead.Value}' (line {lookahead.Position.Line}, col {lookahead.Position.Column})"); } else { throw new InvalidOperationException($"Unexpected end of stream"); } } } // Locate the parse tree Symbol parseTree = null; while (parseStack.Count > 0) { Symbol sym = parseStack.Pop(); if (!sym.IsParseState) { if (parseTree != null) { throw new InvalidOperationException($"Multiple parse trees generated (got '{sym.Type}', already had '{parseTree.Type}')"); } else { parseTree = sym; } } } if (parseTree == null) { throw new InvalidOperationException($"Parse tree not generated"); } // Identify it switch (parseTree.Type) { case SymbolType.KeyValueSeq: // We're going to emit a map var map = new Model.MapValue(); PopulateMap(map, parseTree); return(map); case SymbolType.ArraySeq: // We're going to emit an array var arr = new Model.ArrayValue(); PopulateArray(arr, parseTree); return(arr); default: throw new InvalidOperationException($"Unexpected root symbol '{parseTree.Type}'"); } }
private void GenerateParseTables() { // Produce item sets from rules (item set = list of rules that a symbol might be a part of) // 1 item set per symbol, 1 parser state per item set // Produce all item sets GrammarRule goalRule = GrammarRule.Sequence(SymbolType.Goal, goalSymbol); ItemSet goalItemSet; List <ItemSet> itemSets = new List <ItemSet>(); { List <RuleSymbolLink> itemSet = new List <RuleSymbolLink>(); BuildItemSet(goalRule, itemSet); itemSets.Add(goalItemSet = new ItemSet(goalRule, itemSet.ToArray())); } /*for (int i = 0; i < grammarRules.Length; i++) * { * GrammarRule rule = grammarRules[i]; * List<RuleSymbolLink> itemSet = new List<RuleSymbolLink>(); * BuildItemSet(rule, itemSet); * itemSets.Add(new ItemSet(rule, itemSet.ToArray())); * }*/ // Derive all item sets Stack <int> toProcess = new Stack <int>(); toProcess.Push(0); while (toProcess.Count > 0) { int curItemSetIdx = toProcess.Pop(); ItemSet curItemSet = itemSets[curItemSetIdx]; HashSet <SymbolType> possibleSymbols = new HashSet <SymbolType>(); for (int i = 0; i < curItemSet.Items.Length; i++) { SymbolType symbol; if (curItemSet.Items[i].GetNextSymbol(out symbol)) { possibleSymbols.Add(symbol); } } foreach (SymbolType pSymbol in possibleSymbols) { List <RuleSymbolLink> subset = new List <RuleSymbolLink>(); for (int i = 0; i < curItemSet.Items.Length; i++) { SymbolType symbol; if (curItemSet.Items[i].GetNextSymbol(out symbol) && pSymbol == symbol) { RuleSymbolLink item = curItemSet.Items[i]; item = new RuleSymbolLink(item.Rule, item.Index + 1, item.Lookahead); subset.Add(item); } } CloseItemSet(subset); ItemSet newItemSet = new ItemSet(null, subset.ToArray()); int newIdx = itemSets.IndexOf(newItemSet); if (newIdx == -1) { newIdx = itemSets.Count; itemSets.Add(newItemSet); toProcess.Push(newIdx); } curItemSet.TableRow[SymbolTypeToCol(pSymbol)] = newIdx; } } // Create states parseStates = new StateDefinition[itemSets.Count]; for (int i = 0; i < itemSets.Count; i++) { ItemSet itemSet = itemSets[i]; StateDefinition.Rule[] lookaheadTable = new StateDefinition.Rule[SYMCNT_TERMINAL]; int[] gotoTable = new int[SYMCNT_NONTERMINAL]; for (int j = 0; j < SYMCNT_TERMINAL - 1; j++) { SymbolType symbol = SymbolType.Terminal | (SymbolType)(j + 1); int arg = itemSet.TableRow[SymbolTypeToCol(symbol)]; if (arg != -1) { lookaheadTable[j] = new StateDefinition.Rule(ParserAction.Shift, arg); } else { lookaheadTable[j] = new StateDefinition.Rule(ParserAction.Error, 0); } } if (itemSet.Items.Any(item => item.Rule == goalRule)) { lookaheadTable[SYMCNT_TERMINAL - 1] = new StateDefinition.Rule(ParserAction.Done, 0); } else { lookaheadTable[SYMCNT_TERMINAL - 1] = new StateDefinition.Rule(ParserAction.Error, 0); } for (int j = 0; j < SYMCNT_NONTERMINAL; j++) { SymbolType symbol = SymbolType.NonTerminal | (SymbolType)(j + 1); gotoTable[j] = itemSet.TableRow[SymbolTypeToCol(symbol)]; } parseStates[i] = new StateDefinition(lookaheadTable, gotoTable); } for (int i = 0; i < itemSets.Count; i++) { ItemSet itemSet = itemSets[i]; bool reduce = false; RuleSymbolLink reduceItem = default(RuleSymbolLink); for (int j = 0; j < itemSet.Items.Length; j++) { RuleSymbolLink item = itemSet.Items[j]; if (item.Rule != goalRule && item.Index >= item.Rule.MatchSymbols.Length) { reduce = true; reduceItem = item; break; } } if (reduce) { int ruleIndex = -1; for (int j = 0; j < grammarRules.Length; j++) { if (grammarRules[j] == reduceItem.Rule) { ruleIndex = j; break; } } var pS = parseStates[i]; for (int j = 0; j < SYMCNT_TERMINAL; j++) { if (pS.LookaheadTable[j].Action == ParserAction.Shift) { // Shift-reduce conflict throw new Exception($"Shift-reduce conflict ({grammarRules[ruleIndex]})"); } else if (pS.LookaheadTable[j].Action == ParserAction.Reduce) { // Reduce-reduce conflict throw new Exception($"Reduce-reduce conflict ({grammarRules[ruleIndex]})"); } else { pS.LookaheadTable[j] = new StateDefinition.Rule(ParserAction.Reduce, ruleIndex); } } } } }
public RuleSymbolLink(GrammarRule rule, int index, SymbolType lookahead = SymbolType.None) { Rule = rule; Index = index; Lookahead = lookahead; }