public GrammarParserResult Parse() { rootElement = XDocument.Load(grammarXmlFilePath).Element("g"); nodes = new List <INode>(); classesWithRemovedSymbols = new Dictionary <string, List <char> >(); unclassifiedTokenClassName = rootElement .Attribute("unclassified-token-class-name").Value; unsupportedTokenClassName = rootElement .Attribute("unsupported-token-class-name").Value; var firstDefinition = rootElement.Element("d"); axiom = (Medium)ParseNode(firstDefinition, false); // Make copy of nodes list to print // factorized grammar in right order later. var unsortedNodesCopy = nodes.ToList(); // Sort nodes. nodes.Sort(new NodeComparer()); classTable = new ClassTable { UnclassifiedTokenClassName = unclassifiedTokenClassName, UndefinedTokenClassName = unsupportedTokenClassName, WhiteDelimiters = rootElement .Attribute("white-delimiters") .Value .Replace("\\t", "\t") .Replace("\\n", "\n") .Replace("\\r", "\r"), SymbolClasses = ParseClasses(rootElement), TokenClasses = GetTokenClassesIndices(), }; FactorizeAllNonTerminals(); // Set indices now. FillIndices(); // Concat copies with created mediums while removing left recursion. var unsortedCopyWithMediums = unsortedNodesCopy.Concat(nodes.Except(unsortedNodesCopy).OfType <IMedium>().OrderBy(n => n.Name)); GrammarParserResult result = new GrammarParserResult { ClassTable = classTable, HasLeftRecursion = !shouldConvertLeftRecursionToRight, SortedNodes = nodes, UnsortedNodes = unsortedCopyWithMediums.ToList(), Axiom = axiom, UnclassifiedTerminals = unclassifiedTerminals, Errors = null, }; return(result); }
/// <summary> /// Reads a node from the grammar file with all its children /// </summary> /// <example> /// <d>a</d> -> define new non-terminal (class or token or medium) /// <n>b</n> -> search for defined non-terminal /// <t>c</t> -> define terminal /// </example> /// <param name="element">Definition node in grammar file.</param> /// <param name="isInsideToken">If current node is located inside a token.</param> /// <returns>Parsed node.</returns> Node ParseNode(XElement element, bool isInsideToken) { switch (element.Name.LocalName) { case "d": { var name = element.Attribute("name").Value; // If node is symbol class if (element.Attribute("symbol-class") != null) { var node = new Class { Name = name, SymbolClass = element.Attribute("symbol-class").Value, Symbols = element.Element("cta").Value, }; nodes.Add(node); return(node); } // If node is token else if (element.Attribute("token-class") != null) { string execClass = element.Attribute("exec-class")?.Value; var node = new DefinedToken { Name = name, TokenClass = element.Attribute("token-class").Value, ExecuteStreamNodeType = execClass, }; nodes.Add(node); node.Cases = ParseCases(element, true); return(node); } // If node is medium else { var node = new Medium { Name = name, }; nodes.Add(node); node.Cases = ParseCases(element, false); return(node); } } case "n": { string name = element.Value; if (nodes.OfType <INonterminal>().All(n => n.Name != name)) { throw new Exception($"Node {name} was referenced before it was defined"); } return((Node)nodes.OfType <INonterminal>().Single(n => n.Name == name)); } case "t": { string name = element.Value; string execClass = element.Attribute("exec-class")?.Value ?? string.Empty; string[] streamers = element.Attribute("streamers")?.Value.Split('|') ?? new string[0]; string[] breakers = element.Attribute("breakers")?.Value.Split('|') ?? new string[0]; bool isStreamMaxCountSet = int.TryParse(element.Attribute("stream-max-count")?.Value, out int streamMaxCount); bool isOperatorPrioritySet = int.TryParse(element.Attribute("operator-priority")?.Value, out int operatorPriority); if (nodes.OfType <ITerminal>().Any(n => n.Name == name)) { var node1 = (Node)nodes.OfType <ITerminal>().Single(n => n.Name == name); if (!string.IsNullOrEmpty(execClass)) { if (!string.IsNullOrEmpty(node1.ExecuteStreamNodeType) && node1.ExecuteStreamNodeType != execClass) { throw new Exception("Attempt to assign two different execute stream node types."); } node1.ExecuteStreamNodeType = execClass; } return(node1); } Terminal node; switch (execClass) { case "statement": node = new DefinedStatement { Name = name, TokenClass = unclassifiedTokenClassName, ExecuteStreamNodeType = execClass, Streamers = streamers, Breakers = breakers, IsStreamMaxCountSet = isStreamMaxCountSet, StreamMaxCount = streamMaxCount, }; break; case "operator": node = new DefinedOperator { Name = name, TokenClass = unclassifiedTokenClassName, ExecuteStreamNodeType = execClass, Priority = operatorPriority, }; break; default: node = new Terminal { Name = name, TokenClass = unclassifiedTokenClassName, ExecuteStreamNodeType = execClass, }; break; } if (!isInsideToken || shouldIncludeTerminalsFromInsideOfDefinedTokens) { nodes.Add(node); } return(node); } default: throw new Exception($"Unsupported tag {element.Name.LocalName} was found in grammar."); } }
/// <summary> /// Converts left recursive sequences in cases in to right recursive /// with adding medium FactorNode. /// </summary> /// <remarks> /// Works only with recursions where node references itself. /// </remarks> /// <example> /// A::=B|AcB|AeB|D /// A::=B|D{cB|eB} /// /// A->B /// A->D /// A->AcB /// A->AeB /// /// A->B /// A->BX /// X->cBX /// X->cB /// /// X->eBX /// X->eB /// /// A->D /// A->DY /// Y->cBY /// Y->cB /// /// Y->eBY /// Y->eB /// /// A::=B(X|^)|D(Y|^) /// X::=cB(X|^)|eB(X|^) /// Y::=cB(Y|^)|eB(Y|^) /// /// where X and Y are medium FactorNodes. /// </example> private void GetRidOfLeftRecursion() { // Create a copy of the list as its original will be changed // in process; hence foreach won't be able to iterate through it. List <Medium> nodes = this.nodes.OfType <Medium>().ToList(); // First get rid of left recursion foreach (Medium node in nodes) { // Nodes with first lexem equal to the node var recursive = node.Cases.Where(n => n.Count() > 0 && n.ElementAt(0) == node); // If there are recursive sequences(cases) if (recursive.Count() != 0) { int mediumIndex = 0; // Non-recursive sequences(cases) // Create a list to be able to add new elements. var nonRecursiveSequences = node.Cases.Except(recursive).ToList(); if (nonRecursiveSequences.Count == 0) { throw new Exception("Cannot transform left recursion to right " + "if there is no non-left-recursive sequences."); } // Create a copy of the list as its original will be changed // in process; hence foreach won't be able to iterate through it. var nonRecursiveSequencesToIterate = nonRecursiveSequences.ToList(); // When creating medium the beheaded is required. // Look example. var beheadedRecursive = recursive.Select(n => n.Skip(1).ToList()); foreach (var nonRecursiveSequence in nonRecursiveSequencesToIterate) { // Create new medium node. Medium mediumNode = new Medium { Name = $"{node.Name}({mediumIndex})", }; // Add medium to general node list. this.nodes.Add(mediumNode); mediumIndex++; var mediumCases = new List <List <INode> >(); // X->cBX, X->cB and X->eBX, X->eB sequences. foreach (var recursiveSequence in beheadedRecursive) { // X->cBX var newSequence = recursiveSequence.ToList(); newSequence.Add(mediumNode); mediumCases.Add(newSequence); // X->cB mediumCases.Add(recursiveSequence.ToList()); } mediumNode.Cases = mediumCases; // A->BX sequence. // Create a copy as we will add new element to the sequence // and want the original to stay unchanged. var newNonRecursiveSequence = nonRecursiveSequence.ToList(); newNonRecursiveSequence.Add(mediumNode); // Add A->BX sequence to non-recursive cases of A. nonRecursiveSequences.Add(newNonRecursiveSequence); } node.Cases = nonRecursiveSequences; } } }