Exemplo n.º 1
0
        private static CFG verifyConsistency(CFG oldGrammar, CFG newGrammar)
        {
            OrderedHashSet <string> oldVariables = oldGrammar.Variables();

            foreach (string terminal in newGrammar.Terminals())
            {
                if (oldVariables.Contains(terminal))
                {
                    throw new Exception($"grammar is malformed: variable {terminal} cannot be derived");
                }
            }
            foreach (string variable in newGrammar.Variables())
            {
                bool variableIsDerived = false;
                foreach (Production production in newGrammar.productions)
                {
                    if (production.lhs == variable)
                    {
                        variableIsDerived = true;
                        break;
                    }
                }
                if (!variableIsDerived)
                {
                    throw new Exception($"grammar is malformed: variable {variable} cannot be derived");
                }
            }
            return(newGrammar);
        }
Exemplo n.º 2
0
        private static OrderedHashSet <Production> ParseProductions(StringReader reader, out string firstVariable)
        {
            firstVariable = null;
            OrderedHashSet <Production> productions = new OrderedHashSet <Production>();
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                if (line.Trim() == "")
                {
                    continue;
                }
                try
                {
                    var lineParts = line.Split(new string[] { "::=" }, 2, StringSplitOptions.None);
                    productions.Add(new Production(lineParts[0].Trim(), lineParts[1].Trim().Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)));
                    if (firstVariable == null)
                    {
                        firstVariable = lineParts[0].Trim();
                    }
                } catch (Exception e)
                {
                    throw new Exception($"Error when parsing \"{line}\"", e);
                }
            }
            return(productions);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Simplify context-free grammar by removing unreachable symbols and productions and productions of
        /// the form V ::= V.
        /// </summary>
        /// <param name="grammar">target grammar</param>
        /// <returns>simplified grammar</returns>
        public static CFG Simplify(this CFG grammar)
        {
            OrderedHashSet <Production> productions = new OrderedHashSet <Production>();
            OrderedHashSet <string>     newSymbols = new OrderedHashSet <string>(), seenSymbols = new OrderedHashSet <string>();

            newSymbols.Add(grammar.startVariable);
            while (newSymbols.Count() > 0)
            {
                foreach (string symbol in newSymbols)
                {
                    seenSymbols.Add(symbol);
                }
                OrderedHashSet <string> nextSymbols = new OrderedHashSet <string>();
                foreach (Production production in grammar.productions)
                {
                    if (newSymbols.Contains(production.lhs) && (production.rhs.Length != 1 || production.rhs[0] != production.lhs))
                    {
                        productions.Add(production);
                        foreach (string symbol in production.rhs)
                        {
                            if (!seenSymbols.Contains(symbol))
                            {
                                nextSymbols.Add(symbol);
                            }
                        }
                    }
                }
                newSymbols = nextSymbols;
            }
            return(verifyConsistency(grammar, new CFG(grammar.startVariable, productions)));
        }
Exemplo n.º 4
0
 /// <summary>
 /// Transforms grammar according to given grammar options.
 /// </summary>
 /// <param name="grammar">target grammar</param>
 /// <param name="options">set of options</param>
 /// <returns>application of options to grammar</returns>
 public static CFG ApplyOptions(this CFG grammar, OrderedHashSet <GrammarOption> options)
 {
     if (options.Contains(GrammarOption.Reversed))
     {
         return(Reversed(grammar));
     }
     return(grammar);
 }
Exemplo n.º 5
0
        /// <param name="grammar">textual context-free grammar description</param>
        /// <returns>described grammar</returns>
        public static CFG Parse(string grammar)
        {
            grammar = grammar.Trim();
            var    reader = new StringReader(grammar);
            string startVariable;
            OrderedHashSet <Production> productions = ParseProductions(reader, out startVariable);

            return(new CFG(startVariable, productions));
        }
Exemplo n.º 6
0
 private static void AddUnique(this OrderedHashSet <List <string> > set, List <string> item)
 {
     foreach (var existingItem in set)
     {
         if (item.SequenceEqual(existingItem))
         {
             return;
         }
     }
     set.Add(item);
 }
Exemplo n.º 7
0
        private OrderedHashSet <GrammarOption> GetOptions(GeneratorExecutionContext context, AdditionalText file)
        {
            OrderedHashSet <GrammarOption> options = new OrderedHashSet <GrammarOption>();
            bool reversed = GetBoolConfiguration("Reversed", context, file);

            if (reversed)
            {
                options.Add(GrammarOption.Reversed);
            }
            return(options);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Reverses context-free grammar, by reversing each of its productions.
        /// </summary>
        /// <param name="grammar">target grammar</param>
        /// <returns>reversed grammar</returns>
        public static CFG Reversed(this CFG grammar)
        {
            OrderedHashSet <Production> productions = new OrderedHashSet <Production>();

            foreach (Production production in grammar.productions)
            {
                string[] reversed = new string[production.rhs.Length];
                Array.Copy(production.rhs, reversed, reversed.Length);
                Array.Reverse(reversed);
                productions.Add(new Production(production.lhs, reversed));
            }
            return(new CFG(grammar.startVariable, productions));
        }
Exemplo n.º 9
0
        /// <summary>
        /// Checks for set equality.
        /// </summary>
        /// <param name="other">other set</param>
        /// <returns>whether set contains same items as this</returns>
        public bool SetEquals(OrderedHashSet <T> other)
        {
            ISet <T> set1 = new HashSet <T>(), set2 = new HashSet <T>();

            foreach (T item in this)
            {
                set1.Add(item);
            }
            foreach (T item in other)
            {
                set2.Add(item);
            }
            return(set1.SetEquals(set2));
        }
Exemplo n.º 10
0
        private static string GenerateAuxiliaryVariable(OrderedHashSet <string> usedSymbols, string variableName, int indexHint)
        {
            int index = indexHint;

            while (index > 0)
            {
                string name = variableName + index;
                if (!usedSymbols.Contains(name))
                {
                    return(name);
                }
                ++index;
            }
            throw new Exception("grammar transformation error");
        }
Exemplo n.º 11
0
 private static string GenerateAuxiliaryVariable(OrderedHashSet <string> usedSymbols, string sourceVariable)
 {
     if (!usedSymbols.Contains(sourceVariable))
     {
         return(sourceVariable);
     }
     for (int i = 0; i < sourceVariable.Length; ++i)
     {
         if (int.TryParse(sourceVariable.Substring(i), out int variableIndex))
         {
             return(GenerateAuxiliaryVariable(usedSymbols, sourceVariable.Substring(0, i), variableIndex));
         }
     }
     return(GenerateAuxiliaryVariable(usedSymbols, sourceVariable, 2));
 }
Exemplo n.º 12
0
        private static bool HasDirectLeftRecursion(OrderedHashSet <Production> productions)
        {
            if (productions.Count == 0)
            {
                return(false);
            }
            string lhs = productions[0].lhs;

            foreach (Production production in productions)
            {
                Debug.Assert(lhs == production.lhs);
                if (production.rhs.Count() > 0 && production.rhs[0] == lhs)
                {
                    return(true);
                }
            }
            return(false);
        }
Exemplo n.º 13
0
        /// <summary>
        /// Computes the power set of this set, i.e., the set of all sub-sets.
        /// </summary>
        /// <returns>power set</returns>
        public OrderedHashSet <OrderedHashSet <T> > PowerSet()
        {
            int n  = Count;
            int pn = 1 << n;
            OrderedHashSet <OrderedHashSet <T> > powerSet = new OrderedHashSet <OrderedHashSet <T> >();

            for (int mask = 0; mask < pn; mask++)
            {
                OrderedHashSet <T> set = new OrderedHashSet <T>();
                for (int i = 0; i < n; i++)
                {
                    if ((mask & (1 << i)) > 0)
                    {
                        set.Add(this[i]);
                    }
                }
                powerSet.Add(set);
            }
            return(powerSet);
        }
Exemplo n.º 14
0
 /// <summary>
 /// Transforms grammar into Greibach normal form, i.e.,
 /// where productions are of the form `v ::= t s1 s2...`, `v` a variable,
 /// `t` a terminal.
 /// The grammar's start variable may be nullable, `v0 ::= epsilon`.
 /// </summary>
 /// <param name="grammar">target grammar</param>
 /// <returns>grammar in Greibach normal form</returns>
 public static CFG ToGreibachNormalForm(this CFG grammar)
 {
     if (grammar.InGreibachNormalForm())
     {
         return(grammar);
     }
     while (true)
     {
         grammar = grammar.RemoveEpsilonProductions().RemoveLeftRecursion();
         OrderedHashSet <Production> productions = new OrderedHashSet <Production>();
         foreach (Production production in grammar.productions)
         {
             if (production.IsEpsilonProduction() || grammar.Terminals().Contains(production.rhs[0]))
             {
                 productions.Add(production);
             }
             else
             {
                 string rhsStartVariable = production.rhs[0];
                 foreach (Production otherProduction in grammar.productions)
                 {
                     if (otherProduction.lhs == rhsStartVariable)
                     {
                         List <string> newRHS = new List <string>();
                         newRHS.AddRange(otherProduction.rhs);
                         newRHS.AddRange(production.rhs.Skip(1));
                         productions.Add(new Production(production.lhs, newRHS.ToArray()));
                     }
                 }
             }
         }
         grammar = verifyConsistency(grammar, new CFG(grammar.startVariable, productions));
         if (grammar.InGreibachNormalForm())
         {
             return(grammar);
         }
     }
 }
Exemplo n.º 15
0
 /// <summary>
 /// Checks whether the grammar has a left-recursive production, i.e.,
 /// of the form `v ::= v s1 s2...` (can also be indirect).
 /// </summary>
 /// <param name="grammar">context-free grammar</param>
 /// <returns>whether grammar has left-recursion</returns>
 public static bool HasLeftRecursion(this CFG grammar)
 {
     foreach (string variable in grammar.Variables())
     {
         OrderedHashSet <string> reachableVariables = new OrderedHashSet <string>();
         reachableVariables.Add(variable);
         int n = 0;
         while (reachableVariables.Count() > n)
         {
             n = reachableVariables.Count();
             OrderedHashSet <string> newReachableVariables = new OrderedHashSet <string>();
             foreach (string reachableVariable in reachableVariables)
             {
                 foreach (Production production in grammar.productions)
                 {
                     if (production.lhs == reachableVariable && production.rhs.Length > 0)
                     {
                         string first = production.rhs[0];
                         if (first == variable)
                         {
                             return(true);
                         }
                         else if (grammar.Variables().Contains(first))
                         {
                             newReachableVariables.Add(first);
                         }
                     }
                 }
             }
             foreach (string newVariable in newReachableVariables)
             {
                 reachableVariables.Add(newVariable);
             }
         }
     }
     return(false);
 }
Exemplo n.º 16
0
        /// <summary>
        /// Removes left-recursive productions from the grammar.
        /// Implementation of Paull's algorithm.
        /// </summary>
        /// <param name="grammar">target grammar</param>
        /// <returns>grammar without left recursion</returns>
        public static CFG RemoveLeftRecursion(this CFG grammar)
        {
            if (!grammar.HasLeftRecursion())
            {
                return(grammar);
            }
            grammar = grammar.Simplify().RemoveEpsilonProductions();
            if (!grammar.HasLeftRecursion())
            {
                return(grammar);
            }
            bool epsilonIsDerived = grammar.productions.Contains(new Production(grammar.startVariable));
            OrderedHashSet <Production> productions = new OrderedHashSet <Production>();

            foreach (Production production in grammar.productions)
            {
                productions.Add(production);
            }
            if (epsilonIsDerived)
            {
                productions.Remove(new Production(grammar.startVariable));
            }
            List <string>           variables   = new List <string>(grammar.Variables());
            OrderedHashSet <string> usedSymbols = new OrderedHashSet <string>();

            foreach (string symbol in grammar.Symbols())
            {
                usedSymbols.Add(symbol);
            }
            for (int i = 0; i < variables.Count; ++i)
            {
                string vi = variables[i];
                for (int j = 0; j < i; ++j)
                {
                    string vj = variables[j];
                    OrderedHashSet <Production> toRemove = new OrderedHashSet <Production>();
                    OrderedHashSet <Production> toAdd    = new OrderedHashSet <Production>();
                    foreach (Production production in productions)
                    {
                        if (production.lhs == vi && production.rhs.Length > 0 && production.rhs[0] == vj)
                        {
                            toRemove.Add(production);
                            foreach (Production otherProduction in productions)
                            {
                                if (otherProduction.lhs == vj)
                                {
                                    List <string> newRHS = new List <string>();
                                    newRHS.AddRange(otherProduction.rhs);
                                    newRHS.AddRange(production.rhs.Skip(1));
                                    toAdd.Add(new Production(vi, newRHS.ToArray()));
                                }
                            }
                            break;
                        }
                    }
                    foreach (Production addedProduction in toAdd)
                    {
                        productions.Add(addedProduction);
                    }
                    foreach (Production removedProduction in toRemove)
                    {
                        productions.Remove(removedProduction);
                    }
                }
                OrderedHashSet <Production> productionsWithoutViDirectLeftRecursion = new OrderedHashSet <Production>();
                OrderedHashSet <Production> newViProductions = new OrderedHashSet <Production>();
                foreach (Production production in productions)
                {
                    if (production.lhs != vi)
                    {
                        productionsWithoutViDirectLeftRecursion.Add(production);
                    }
                    else
                    {
                        newViProductions.Add(production);
                    }
                }
                foreach (Production newViProduction in RemoveDirectLeftRecursion(usedSymbols, newViProductions))
                {
                    productionsWithoutViDirectLeftRecursion.Add(newViProduction);
                }
                productions = productionsWithoutViDirectLeftRecursion;
            }
            if (epsilonIsDerived)
            {
                productions.Add(new Production(grammar.startVariable));
            }
            return(verifyConsistency(grammar, new CFG(grammar.startVariable, productions)));
        }
Exemplo n.º 17
0
        /// <summary>
        /// Remove epsilon productions.
        /// If the grammar derives epsilon, then only the start variable may derive epsilon, but then it
        /// would not be found on the right-hand side of any production.
        /// </summary>
        /// <param name="grammar">target grammar</param>
        /// <returns>grammar without epsilon productions</returns>
        public static CFG RemoveEpsilonProductions(this CFG grammar)
        {
            bool hasNullableStartSymbolInRHS = grammar.HasNullableStartSymbolInRHS();

            if (!grammar.HasEpsilonProductionsExceptStartVariable() && !hasNullableStartSymbolInRHS)
            {
                return(grammar);
            }
            if (hasNullableStartSymbolInRHS)
            {
                OrderedHashSet <Production> newProductions = new OrderedHashSet <Production>();
                foreach (Production production in grammar.productions)
                {
                    newProductions.Add(production);
                }
                string newStartVariable = GenerateAuxiliaryVariable(grammar.Symbols(), grammar.startVariable);
                newProductions.Add(new Production(newStartVariable, grammar.startVariable));
                newProductions.Add(new Production(newStartVariable));
                return(RemoveEpsilonProductions(new CFG(newStartVariable, newProductions)));
            }
            OrderedHashSet <Production> productions = grammar.productions;

            while (true)
            {
                OrderedHashSet <string> nullableVariables = new OrderedHashSet <string>(), nullVariables = new OrderedHashSet <string>();
                foreach (Production production in productions)
                {
                    if (production.IsEpsilonProduction())
                    {
                        nullableVariables.Add(production.lhs);
                        nullVariables.Add(production.lhs);
                    }
                }
                foreach (Production production in productions)
                {
                    if (!production.IsEpsilonProduction())
                    {
                        nullVariables.Remove(production.lhs);
                    }
                }
                if (nullableVariables.Count == 0 || nullableVariables.Count == 1 && nullableVariables.Contains(grammar.startVariable))
                {
                    return(verifyConsistency(grammar, new CFG(grammar.startVariable, productions).Simplify()));
                }
                OrderedHashSet <Production> newProductions = new OrderedHashSet <Production>();
                foreach (Production production in productions)
                {
                    OrderedHashSet <List <string> > rhss = new OrderedHashSet <List <string> >();
                    rhss.Add(new List <string>());
                    foreach (string symbol in production.rhs)
                    {
                        OrderedHashSet <List <string> > newRHSs = new OrderedHashSet <List <string> >();
                        foreach (var rhs in rhss)
                        {
                            if (nullVariables.Contains(symbol))
                            {
                                newRHSs.AddUnique(rhs);
                            }
                            else if (!nullableVariables.Contains(symbol))
                            {
                                List <string> newRHS = new List <string>();
                                newRHS.AddRange(rhs);
                                newRHS.Add(symbol);
                                newRHSs.Add(newRHS);
                            }
                            else
                            {
                                newRHSs.AddUnique(rhs);
                                List <string> newRHS = new List <string>();
                                newRHS.AddRange(rhs);
                                newRHS.Add(symbol);
                                newRHSs.Add(newRHS);
                            }
                        }
                        rhss = newRHSs;
                    }
                    foreach (var newRHS in rhss)
                    {
                        newProductions.Add(new Production(production.lhs, newRHS.ToArray()));
                    }
                }
                foreach (string nullableVariable in nullableVariables)
                {
                    if (nullableVariable != grammar.startVariable)
                    {
                        newProductions.Remove(new Production(nullableVariable));
                    }
                }
                productions = newProductions;
            }
        }
Exemplo n.º 18
0
        private static OrderedHashSet <Production> RemoveDirectLeftRecursion(OrderedHashSet <string> usedSymbols, OrderedHashSet <Production> productions)
        {
            if (!HasDirectLeftRecursion(productions))
            {
                return(productions);
            }
            OrderedHashSet <Production> newProductions = new OrderedHashSet <Production>();

            foreach (Production production in productions)
            {
                Debug.Assert(production.rhs.Length > 0);
                if (production.rhs.Length != 1 || production.lhs != production.rhs[0])
                {
                    newProductions.Add(production);
                }
            }
            if (newProductions.Count == 0)
            {
                return(newProductions);
            }
            string lhs = newProductions[0].lhs;

            foreach (Production production in newProductions)
            {
                Debug.Assert(production.lhs == lhs);
            }
            OrderedHashSet <Production> leftRecursiveProductions    = new OrderedHashSet <Production>(),
                                        nonLeftRecursiveProductions = new OrderedHashSet <Production>();

            foreach (Production production in newProductions)
            {
                if (production.rhs[0] != lhs)
                {
                    nonLeftRecursiveProductions.Add(production);
                }
                else
                {
                    leftRecursiveProductions.Add(production);
                }
            }
            if (leftRecursiveProductions.Count == 0)
            {
                return(newProductions);
            }
            newProductions.Clear();
            string auxiliaryVariable = GenerateAuxiliaryVariable(usedSymbols, lhs);

            usedSymbols.Add(auxiliaryVariable);
            foreach (Production nonLeftRecursiveProduction in nonLeftRecursiveProductions)
            {
                List <string> newRHS = new List <string>();
                newRHS.AddRange(nonLeftRecursiveProduction.rhs);
                newProductions.Add(new Production(lhs, newRHS.ToArray()));
                newRHS.Add(auxiliaryVariable);
                newProductions.Add(new Production(lhs, newRHS.ToArray()));
            }
            foreach (Production leftRecursiveProduction in leftRecursiveProductions)
            {
                List <string> newRHS = new List <string>();
                newRHS.AddRange(leftRecursiveProduction.rhs.Skip(1));
                Debug.Assert(newRHS.Count > 0);
                newProductions.Add(new Production(auxiliaryVariable, newRHS.ToArray()));
                newRHS.Add(auxiliaryVariable);
                newProductions.Add(new Production(auxiliaryVariable, newRHS.ToArray()));
            }
            return(newProductions);
        }