Пример #1
0
 public Rule(Rule otherRule)
 {
     Name = new NonTerminalObject(otherRule.Name);
     Production = otherRule.Production.Select(nonterminal => new NonTerminalObject(nonterminal)).ToArray();
     HeadPosition = otherRule.HeadPosition;
     ComplementPosition = otherRule.ComplementPosition;
     Number = otherRule.Number;
     Occurrences = otherRule.Occurrences;
 }
Пример #2
0
 public State(Rule r, int dotIndex, Column c, Node n)
 {
     Rule = r;
     DotIndex = dotIndex;
     StartColumn = c;
     EndColumn = null;
     Node = n;
     StateNumber = stateCounter;
     stateCounter += 1;
     LogProbability = -1;
 }
Пример #3
0
        //generate a new rule from random existing productions.
        public bool InsertRule(Grammar grammar)
        {
            for (var i = 0; i < NumberOfRetries; i++)
            {
                var productions = new List<string>();
                var randomDaughter = grammar.StartSymbol;
                while (randomDaughter == grammar.StartSymbol)
                    randomDaughter = grammar.GetRandomNonTerminal(); //the first daughter is never the start symbol.

                productions.Add(randomDaughter);

                if (_rand.NextDouble() < 0.5f)
                    productions.Add(grammar.GetRandomNonTerminal());

                var newRule = new Rule();
                newRule.Occurrences = 1;
                newRule.Production = productions.Select(x => new NonTerminalObject(x)).ToArray();

                newRule.HeadPosition = _rand.Next(newRule.Production.Length);
                newRule.ComplementPosition = _rand.Next(newRule.Production.Length);

                if (newRule.HeadTerm == grammar.StartSymbol)
                    //never let the head be the start symbol. the start symbol can only be the second term(see above).
                    newRule.HeadPosition = 0;


                var ruleName = grammar.StartSymbol;
                if (_rand.NextDouble() < 0.9f)
                    //90% probability of projecting regular head stucture. 10% allow to project to the START symbol.
                {
                    try
                    {
                        ruleName = grammar.NonTerminalsTypeDictionary[newRule.HeadTerm] + "P";
                    }
                    catch
                    {
                        throw new Exception(string.Format("rule head term not found", newRule.HeadTerm));
                    }
                }
                newRule.Name = new NonTerminalObject(ruleName);

                if (grammar.AreHeadRelationsConsistent(newRule))
                {
                    grammar.AddRule(newRule);
                    return true;
                }
            }
            return false;
        }
Пример #4
0
        private void Predict(Column col, List<Rule> ruleList, State state, NonTerminalObject currObject)
        {
            if (generator)
            {
                var rule = Grammar.GetRandomRuleForAGivenLHS(currObject.NonTerminal, true);
                ruleList = new List<Rule> {rule};
            }

            foreach (var rule in ruleList)
            {
                //TODO: change later to skip -all- rules whose derivation leads to the empty string.
                //I.e,  A-> B.C , C -> D E. D -> epsilon, E -> epsilon. C itself is not an epsilon rule.
                if (rule.IsEpsilonRule() && Grammar.nullableProductions.ContainsKey(currObject))
                {
                    //states that are the result of a spontenous dot shift (due to nullable production) 
                    //have already been added to the agendas in Column.Add()
                    continue;
                }


                //if current stack is empty but predicted stack is not, mismatch - do not predict this rule.
                if (currObject.IsStackEmpty() && !rule.IsInitialOrDotStack()) continue;


                //prepare new rule based on the stack information contained in the current state
                //and based on the predicted rule.
                var createdRule = new Rule(rule);


                //if the rule is not a stack manipulating rule, 
                if (rule.IsInitialRule())
                {
                    var complementPositionObject = createdRule.Production[createdRule.ComplementPosition];

                    //if current stack is not empty, but the complement position does not allow for stacks (POS),
                    //mismatch - do not predict this rule.
                    if (!currObject.IsStackEmpty() && Grammar.IsPOS(complementPositionObject.NonTerminal)) continue;

                    //the stack of the LHS of the created rule is the stack of the current object:
                    createdRule.Name = currObject;

                    //copy the stack to the complement position.
                    complementPositionObject.Stack = currObject.Stack;
                }
                else
                {
                    if (createdRule.Name.Stack.Peek() != "." && currObject.Stack.Top != Grammar.Epsilon &&
                        currObject.Stack.Top != createdRule.Name.Stack.Peek())
                        continue;
                    //if tops of the stacks do not match, continue. e.g created rule PP[PP] -> epsilon, current object: PP[NP].

                    //create left hand side of new rule.
                    createdRule.Name.Stack = currObject.Stack;
                    createdRule.Name.NonTerminal = currObject.NonTerminal;

                    //create right hand side of new rule.
                    NonTerminalStack contentOfDot;
                    if (rule.Name.Stack.Peek() == ".")
                        contentOfDot = currObject.Stack; //e.g. A[..]
                    else
                        contentOfDot = currObject.Stack.GetPrefixListStackObjectOfGivenTop(rule.Name.Stack.Peek());
                            //e.g A[..X]

                    for (var i = 0; i < rule.Production.Length; i++)
                    {
                        var s = rule.Production[i].Stack;

                        if (s != null)
                        {
                            if (s.Peek() == ".")
                                createdRule.Production[i].Stack = contentOfDot; // e.g, A[..] pop rule.
                            else if (s.PrefixList == null)
                                createdRule.Production[i].Stack = s; //e.g. A[X] //secondary constituent.
                            else
                                createdRule.Production[i].Stack = new NonTerminalStack(s.Peek(), contentOfDot);
                            //e.g. A[..X] - push rule.

                            //calculate the new weight of the top of the stack from the weights of its sons.
                            //if (createdRule.Production[i].Stack != null)
                            //    createdRule.Production[i].Stack.Weight = createdRule.Production[i].Stack.PrefixList != null ? createdRule.Production[i].Stack.PrefixList.Sum(x => x.Weight) : 1;
                        }
                    }
                }


                var newState = new State(createdRule, 0, col, null) {LogProbability = ruleLogProbabilities[rule.Number]};

                if (newState.LogProbability < 0)
                    throw new Exception("wrong probability");

                var added = col.AddState(newState, ParsingOperation.Predict);

                if (Debug)
                    Console.WriteLine("{0} & {1} & {2} & Predicted from State {3}, added: {4}\\\\", newState.StateNumber,
                        newState,
                        col.Index, state.StateNumber, added);
            }
        }
Пример #5
0
        public Node ParseSentence(string text)
        {
            string[] arr;
            if (text == null)
            {
                generator = true;
                arr = Enumerable.Repeat("", 100).ToArray();
            }
            else
                arr = text.Split();


            //check below that the text appears in the vocabulary
            if (!generator && arr.Any(str => !Grammar.Vocabulary.ContainsWord(str)))
                throw new Exception("word in text does not appear in the vocabulary.");

            var table = new Column[arr.Length + 1];
            for (var i = 1; i < table.Length; i++)
                table[i] = new Column(i, arr[i - 1], Grammar);
            table[0] = new Column(0, "", Grammar);
            State.stateCounter = 0;
            var startRule = new Rule(0, GammaRule, new[] {Grammar.StartSymbol}, 0, 0);
            //startRule.Production[0].Stack = new NonTerminalStack(Grammar.EPSILON);
            var startState = new State(startRule, 0, table[0], null);
            startState.LogProbability = 0.0f;
            Node.grammar = Grammar;
            table[0].AddState(startState, ParsingOperation.Scan);
            var finalColumn = table[table.Length - 1];
            try
            {
                foreach (var col in table)
                {
                    var count = 0;
                    if (generator && !col.States.Any())
                    {
                        finalColumn = table[col.Index - 1];
                        break;
                    }
                    //1. complete
                    while (col.ActionableCompleteStates.Any())
                    {
                        count++;
                        TestForTooManyStatesInColumn(count, Debug);

                        var states = col.ActionableCompleteStates.First().Value;

                        var state = states.Dequeue();
                        if (!states.Any())
                            col.ActionableCompleteStates.Remove(state);

                        if (generator)
                            state.LogProbability = 0;

                        Complete(col, state);
                    }

                    //2. predict after complete:
                    while (col.ActionableNonCompleteStates.Any())
                    {
                        if (col.ActionableCompleteStates.Any())
                            throw new Exception(
                                "completed states queue should always be empty while processing predicted states.");
                        count++;
                        TestForTooManyStatesInColumn(count, Debug);

                        var state = col.ActionableNonCompleteStates.Dequeue();
                        if (generator)
                            state.LogProbability = 0;

                        var currObject = state.NextProductionTerm();
                        var term = currObject.NonTerminal;
                        var ruleList = Grammar[term];

                        if (ruleList != null)
                            Predict(col, ruleList, state, currObject);
                    }
                    //3. scan after predict.
                    foreach (var state in col)
                    {
                        if (!state.IsCompleted())
                        {
                            var currObject = state.NextProductionTerm();
                            var term = currObject.NonTerminal;

                            if (!generator)
                            {
                                if (col.Index + 1 < table.Length &&
                                    Grammar.Vocabulary[table[col.Index + 1].Token].Contains(term))
                                    Scan(table[col.Index + 1], state, term, table[col.Index + 1].Token);
                            }
                            else
                            {
                                if (Grammar.Vocabulary.POSWithPossibleWords.ContainsKey(term))
                                {
                                    var ruleList = Grammar[term];
                                    //if the term is a constituent, generate it given some probability. otherwise continue.
                                    if (ruleList != null)
                                    {
                                        if (rand.NextDouble() > ChanceToGenerateConstituent) continue;
                                        if (ruleList[0].IsEpsilonRule())
                                            continue;
                                        //if we generated a predicted epsilon rule for that constituent, don't scan.
                                    }

                                    //selecting random word from vocabulary: (uncomment the next line)
                                    //var index = rand.Next(Vocabulary.POSWithPossibleWords[currentNode.Name].Count);

                                    //always selecting the same word from vocabulary is considerably faster because I do not re-parse the same sentence
                                    //but keep the counts of appearances of the sentence.
                                    //the parse of two sentences with identical sequence of POS is the same - regardless of the actual word selected.

                                    if (table[col.Index + 1].Token == "")
                                        //if the token was already written by a previous scan 
                                        //(for instance NP -> John, NP -> D N, D -> the, "John" was already written before "the")
                                    {
                                        var index = 0;
                                        table[col.Index + 1].Token =
                                            Grammar.Vocabulary.POSWithPossibleWords[term].ElementAt(index);
                                        Scan(table[col.Index + 1], state, term, table[col.Index + 1].Token);
                                    }
                                }
                            }
                        }
                    }
                }

                foreach (var state in finalColumn.GammaStates)
                    return state.Node.Children[0];
            }
            catch (LogException e)
            {
                var s = e.ToString();
                Console.WriteLine(s);
                Console.WriteLine(string.Format("sentence: {0}, grammar: {1}", text, Grammar));
            }
            catch (Exception e)
            {
                var s = e.ToString();
                Console.WriteLine(s);
            }

            if (!generator)
                throw new Exception("Parsing Failed!");
            throw new Exception("Generating Failed!");
        }
Пример #6
0
        public bool AllowsTracesPathBetweenLandingSites(Rule landingSiteRule, HashSet<string> tracesTerms,
            HashSet<string> landinbgsites)
        {
            var visitedRules = new HashSet<int>();

            var toVisit = new Queue<string>();
            toVisit.Enqueue(landingSiteRule.ComplementTerm);

            while (toVisit.Any())
            {
                var currentLHS = toVisit.Dequeue();
                if (Rules.ContainsKey(currentLHS))
                {
                    var ruleList = Rules[currentLHS];

                    foreach (var r in ruleList)
                    {
                        if (visitedRules.Contains(r.Number)) continue;
                        visitedRules.Add(r.Number);

                        if ((r.ComplementPosition == 1 && tracesTerms.Contains(r.NonComplementTerm))
                            || r.ComplementPosition == 0)
                        {
                            if (landinbgsites.Contains(r.ComplementTerm))
                                return true;
                            toVisit.Enqueue(r.ComplementTerm);
                        }
                    }
                }
            }
            return false;
        }
Пример #7
0
        public bool AreHeadRelationsConsistent(Rule rule)
        {
            var b1 = IsPOS(rule.HeadTerm);
            var b2 = false;
            if (rule.Production.Length > 1)
                b2 = !IsPOS(rule.NonHeadTerm);

            return b1 || b2;
        }
Пример #8
0
        public string AddRule(Rule rule)
        {
            EnforceHeadRelations(rule);

            // if production already exists under some other rule name, do not re-add the rule
            //that is, if exists A->BC and we encounter D->BC, then A=D.
            var inverseKey = new InverseKeyType(rule.Production, rule.HeadPosition, rule.Name.NonTerminal == StartSymbol);
            var isEpislonRule = rule.IsEpsilonRule();

            if (isEpislonRule || !inverseRules.ContainsKey(inverseKey))
            {
                // add the rule:
                //1) to inverse rules dictionary:
                numberOfRules++;
                rule.Number = GetNextAvailableRuleNumber(); //note: depends on value of self.numberOfRules

                if (!isEpislonRule)
                    inverseRules[inverseKey] = rule.Name.NonTerminal;
                //if the rule is epsilon rule, it does not have inverse key.
                else
                    nullableProductions[rule.Name] = 1.0f; //TODO - temporary probabilioty of 1.0.

                AddNonTerminalCounts(rule);

                //3) to rules dictionary
                if (!Rules.ContainsKey(rule.Name.NonTerminal))
                {
                    Rules[rule.Name.NonTerminal] = new List<Rule>();
                    rule.Occurrences = 1;
                }
                else
                {
                    if (rule.Occurrences == 0) //if rule does not come with positive occurrences (= 0):
                    {
                        //make the occurrences average of the current occurrences.
                        var l = Rules[rule.Name.NonTerminal];
                        var count = l.Count;
                        rule.Occurrences = l.Sum(x => x.Occurrences)/count;
                    }
                }

                Rules[rule.Name.NonTerminal].Add(rule);
                ruleNumberDictionary[rule.Number] = rule;
            }
            //for the sake of convenience, return the rule name that was added
            //it is useful when replacing no longer used symbols with the
            //new rule names.
            if (!isEpislonRule)
                return inverseRules[inverseKey];
            return rule.Name.NonTerminal;
        }
Пример #9
0
        public void GenerateDerivedRulesFromSchema()
        {
            if (!LandingSites.Any() || !Moveables.Any()) return;

            var toAdd = new List<Rule>();

            foreach (var moveable in Moveables)
            {
                var pop1 = new Rule(1, moveable, new[] {Epsilon}, 0, 0);
                pop1.Name.Stack = new NonTerminalStack(moveable);
                toAdd.Add(pop1);

                foreach (var landingSiteNonTerminal in LandingSites)
                {
                    var push = new Rule(1, landingSiteNonTerminal, new[] {moveable, landingSiteNonTerminal}, 1, 1);
                    push.Name.Stack = new NonTerminalStack(".");
                    push.Production[1].Stack = new NonTerminalStack(".");
                    push.Production[1].Stack = push.Production[1].Stack.Push(moveable);
                    toAdd.Add(push);
                }

                var pop2 = new Rule(1, "IP", new[] {moveable, "VP"}, 1, 1);
                pop2.Name.Stack = new NonTerminalStack(".");
                pop2.Name.Stack = pop2.Name.Stack.Push(moveable);
                pop2.Production[0].Stack = new NonTerminalStack(moveable);
                pop2.Production[1].Stack = new NonTerminalStack(".");
                toAdd.Add(pop2);
            }

            foreach (var item in toAdd)
                AddRule(item);
        }
Пример #10
0
        // if the head is some non-POS projection, then the LHS of the rule is of the same projection!
        // for instance, if X -> NP ADJUNCT, and NP is the head, then X = NP, i.e. NP -> NP ADJUNCT
        // another example: X -> NP VP. if VP is the head, then X = VP, i.e VP -> NP VP.
        public void EnforceHeadRelations(Rule rule)
        {
            var oldRuleName = rule.Name.NonTerminal;
            string projectionType = null;
            if (oldRuleName != StartSymbol && rule.HeadTerm != Epsilon)
            {
                var headType = NonTerminalsTypeDictionary[rule.HeadTerm];
                if (oldRuleName != null)
                {
                    if (!NonTerminalsTypeDictionary.ContainsKey(oldRuleName))
                    {
                        NonTerminalsTypeDictionary[oldRuleName] = headType;
                        //Console.WriteLine("added type {0} for the rule {1}", oldRuleName, rule);
                    }

                    projectionType = NonTerminalsTypeDictionary[oldRuleName];
                }

                if (oldRuleName == null || projectionType != headType)
                    rule.Name.NonTerminal = headType + "P";
            }
        }
Пример #11
0
        public void AddNonTerminalCounts(Rule rule)
        {
            var lhs = rule.Name.NonTerminal;

            if (!nonTerminalCounts.ContainsKey(lhs))
                nonTerminalCounts[lhs] = new NonTerminalCounts();

            nonTerminalCounts[lhs].lhsCounts++;

            if (!rule.IsEpsilonRule())
            {
                foreach (var item in rule.Production)
                {
                    var rhs = item.NonTerminal;
                    if (!nonTerminalCounts.ContainsKey(rhs))
                        nonTerminalCounts[rhs] = new NonTerminalCounts();
                    nonTerminalCounts[rhs].rhsCounts++;
                }
            }
        }
Пример #12
0
        public void RemoveNonTerminalCounts(Rule rule)
        {
            var lhs = rule.Name.NonTerminal;

            if (!nonTerminalCounts.ContainsKey(lhs))
                throw new Exception(
                    string.Format("nonterminal {0} in rule {1} is missing from NonTerminalCounts dictionary", lhs, rule));

            nonTerminalCounts[lhs].lhsCounts--;

            var productionNonterminals = new List<string>();
            if (!rule.IsEpsilonRule())
            {
                foreach (var item in rule.Production)
                {
                    var rhs = item.NonTerminal;
                    productionNonterminals.Add(rhs);
                    if (!nonTerminalCounts.ContainsKey(rhs))
                        throw new Exception(
                            string.Format("nonterminal {0} in rule {1} is missing from NonTerminalCounts dictionary",
                                lhs, rule));
                    nonTerminalCounts[rhs].rhsCounts--;
                }
            }

            var lhsCountsOfLHS = nonTerminalCounts[lhs]; //the counts of the Left-hand sided of the rule.

            //if the removed rule has a LHS that no longer has any other LHS appearances, we can replace all its RHS appearances with another nonterminal,
            //because we cannot invoke that non-terminal anymore.

            if (lhsCountsOfLHS.lhsCounts == 0 && lhsCountsOfLHS.rhsCounts > 0 && !POSTypes.Contains(lhs) &&
                lhs != StartSymbol)
            {
                //alternatively - do nothing. The resulting grammar will not use these rules.
            }

            var lhStoDelete = new List<string>();
            if (!rule.IsEpsilonRule())
            {
                foreach (var item in productionNonterminals)
                {
                    var rhsCountsofRhs = nonTerminalCounts[item]; //the counts of the right-hand sided of the rule terms

                    //if the removed rule has a specific RHS, X, that no longer has any other RHS appearances, we can delete the rules that has X as their LHS
                    //because they will not be triggered (orphaned rules)
                    if (rhsCountsofRhs.rhsCounts == 0 && rhsCountsofRhs.lhsCounts > 0 && !POSTypes.Contains(item) &&
                        item != StartSymbol)
                    {
                        var possiblePOS = item.Substring(0, item.Length - 1);
                        if (!POSTypes.Contains(possiblePOS))
                        {
                            lhStoDelete.Add(item);
                        }
                    }
                }
            }

            foreach (var item in lhStoDelete)
            {
                var rules = Rules[item];
                var removedRulesNumbers = rules.Select(x => x.Number);

                foreach (var removedRuleNumber in removedRulesNumbers.ToArray())
                    DeleteRule(ruleNumberDictionary[removedRuleNumber]);
            }

            //after updating all counts of nonterminals in the rule, check if the type is still used.
            //if not, remove it.
            if (lhsCountsOfLHS.lhsCounts == 0 && lhsCountsOfLHS.rhsCounts == 0 && !POSTypes.Contains(lhs) &&
                lhs != StartSymbol)
            {
                var possiblePOS = lhs.Substring(0, lhs.Length - 1);
                if (!POSTypes.Contains(possiblePOS))
                {
                    Console.WriteLine("removed type {0}", lhs);
                    NonTerminalsTypeDictionary.Remove(lhs);
                    nonTerminalCounts.Remove(lhs);
                }
            }
        }
Пример #13
0
        public void DeleteRule(Rule rule)
        {
            //delete from rules dictionary
            var lhs = rule.Name.NonTerminal;
            var rulesOfLHS = Rules[lhs];

            rulesOfLHS.Remove(rule);
            var numberOfRemainingRules = rulesOfLHS.Count;

            if (numberOfRemainingRules == 0)
                Rules.Remove(lhs);

            //delete from inverse rules dictionary
            if (!rule.IsEpsilonRule())
            {
                var inverseKey = new InverseKeyType(rule.Production, rule.HeadPosition,
                    rule.Name.NonTerminal == StartSymbol);
                inverseRules.Remove(inverseKey);
            }

            RemoveNonTerminalCounts(rule);
            ruleNumberDictionary.Remove(rule.Number);
            numberOfRules--;
            ReturnUnusedRuleNumber(rule.Number);
        }
Пример #14
0
 private static string RuleWithDotNotation(Rule rule, int dotIndex)
 {
     var terms = rule.Production.Select(x => x.ToString()).ToList();
     terms.Insert(dotIndex, "$");
     return string.Format("{0} -> {1}", rule.Name, string.Join(" ", terms));
 }
Пример #15
0
        public bool ChangeHeadOfRule(Grammar grammar)
        {
            for (var i = 0; i < NumberOfRetries; i++)
            {
                var rule = grammar.GetRandomRule();
                if (!rule.IsInitialRule()) continue; //do not change complements of schematic rules. (push/pop)

                if (rule.Production.Length > 1)
                {
                    var newRule = new Rule(rule);
                    newRule.HeadPosition = (rule.HeadPosition + 1)%rule.Production.Length;

                    if (grammar.AreHeadRelationsConsistent(newRule))
                    {
                        grammar.DeleteRule(rule);
                        grammar.AddRule(newRule);
                        return true;
                    }
                }
            }
            return false;
        }