Beispiel #1
0
        private void Predict(Column col, List<Rule> ruleList, State state, NonTerminalObject currObject)
        {
            if (generator)
            {
                var rule = Grammar.GetRandomRuleForAGivenLHS(currObject.NonTerminal, true);
                ruleList = new List<Rule> {rule};
            }

            foreach (var rule in ruleList)
            {
                //TODO: change later to skip -all- rules whose derivation leads to the empty string.
                //I.e,  A-> B.C , C -> D E. D -> epsilon, E -> epsilon. C itself is not an epsilon rule.
                if (rule.IsEpsilonRule() && Grammar.nullableProductions.ContainsKey(currObject))
                {
                    //states that are the result of a spontenous dot shift (due to nullable production) 
                    //have already been added to the agendas in Column.Add()
                    continue;
                }


                //if current stack is empty but predicted stack is not, mismatch - do not predict this rule.
                if (currObject.IsStackEmpty() && !rule.IsInitialOrDotStack()) continue;


                //prepare new rule based on the stack information contained in the current state
                //and based on the predicted rule.
                var createdRule = new Rule(rule);


                //if the rule is not a stack manipulating rule, 
                if (rule.IsInitialRule())
                {
                    var complementPositionObject = createdRule.Production[createdRule.ComplementPosition];

                    //if current stack is not empty, but the complement position does not allow for stacks (POS),
                    //mismatch - do not predict this rule.
                    if (!currObject.IsStackEmpty() && Grammar.IsPOS(complementPositionObject.NonTerminal)) continue;

                    //the stack of the LHS of the created rule is the stack of the current object:
                    createdRule.Name = currObject;

                    //copy the stack to the complement position.
                    complementPositionObject.Stack = currObject.Stack;
                }
                else
                {
                    if (createdRule.Name.Stack.Peek() != "." && currObject.Stack.Top != Grammar.Epsilon &&
                        currObject.Stack.Top != createdRule.Name.Stack.Peek())
                        continue;
                    //if tops of the stacks do not match, continue. e.g created rule PP[PP] -> epsilon, current object: PP[NP].

                    //create left hand side of new rule.
                    createdRule.Name.Stack = currObject.Stack;
                    createdRule.Name.NonTerminal = currObject.NonTerminal;

                    //create right hand side of new rule.
                    NonTerminalStack contentOfDot;
                    if (rule.Name.Stack.Peek() == ".")
                        contentOfDot = currObject.Stack; //e.g. A[..]
                    else
                        contentOfDot = currObject.Stack.GetPrefixListStackObjectOfGivenTop(rule.Name.Stack.Peek());
                            //e.g A[..X]

                    for (var i = 0; i < rule.Production.Length; i++)
                    {
                        var s = rule.Production[i].Stack;

                        if (s != null)
                        {
                            if (s.Peek() == ".")
                                createdRule.Production[i].Stack = contentOfDot; // e.g, A[..] pop rule.
                            else if (s.PrefixList == null)
                                createdRule.Production[i].Stack = s; //e.g. A[X] //secondary constituent.
                            else
                                createdRule.Production[i].Stack = new NonTerminalStack(s.Peek(), contentOfDot);
                            //e.g. A[..X] - push rule.

                            //calculate the new weight of the top of the stack from the weights of its sons.
                            //if (createdRule.Production[i].Stack != null)
                            //    createdRule.Production[i].Stack.Weight = createdRule.Production[i].Stack.PrefixList != null ? createdRule.Production[i].Stack.PrefixList.Sum(x => x.Weight) : 1;
                        }
                    }
                }


                var newState = new State(createdRule, 0, col, null) {LogProbability = ruleLogProbabilities[rule.Number]};

                if (newState.LogProbability < 0)
                    throw new Exception("wrong probability");

                var added = col.AddState(newState, ParsingOperation.Predict);

                if (Debug)
                    Console.WriteLine("{0} & {1} & {2} & Predicted from State {3}, added: {4}\\\\", newState.StateNumber,
                        newState,
                        col.Index, state.StateNumber, added);
            }
        }
Beispiel #2
0
        public Node ParseSentence(string text)
        {
            string[] arr;
            if (text == null)
            {
                generator = true;
                arr = Enumerable.Repeat("", 100).ToArray();
            }
            else
                arr = text.Split();


            //check below that the text appears in the vocabulary
            if (!generator && arr.Any(str => !Grammar.Vocabulary.ContainsWord(str)))
                throw new Exception("word in text does not appear in the vocabulary.");

            var table = new Column[arr.Length + 1];
            for (var i = 1; i < table.Length; i++)
                table[i] = new Column(i, arr[i - 1], Grammar);
            table[0] = new Column(0, "", Grammar);
            State.stateCounter = 0;
            var startRule = new Rule(0, GammaRule, new[] {Grammar.StartSymbol}, 0, 0);
            //startRule.Production[0].Stack = new NonTerminalStack(Grammar.EPSILON);
            var startState = new State(startRule, 0, table[0], null);
            startState.LogProbability = 0.0f;
            Node.grammar = Grammar;
            table[0].AddState(startState, ParsingOperation.Scan);
            var finalColumn = table[table.Length - 1];
            try
            {
                foreach (var col in table)
                {
                    var count = 0;
                    if (generator && !col.States.Any())
                    {
                        finalColumn = table[col.Index - 1];
                        break;
                    }
                    //1. complete
                    while (col.ActionableCompleteStates.Any())
                    {
                        count++;
                        TestForTooManyStatesInColumn(count, Debug);

                        var states = col.ActionableCompleteStates.First().Value;

                        var state = states.Dequeue();
                        if (!states.Any())
                            col.ActionableCompleteStates.Remove(state);

                        if (generator)
                            state.LogProbability = 0;

                        Complete(col, state);
                    }

                    //2. predict after complete:
                    while (col.ActionableNonCompleteStates.Any())
                    {
                        if (col.ActionableCompleteStates.Any())
                            throw new Exception(
                                "completed states queue should always be empty while processing predicted states.");
                        count++;
                        TestForTooManyStatesInColumn(count, Debug);

                        var state = col.ActionableNonCompleteStates.Dequeue();
                        if (generator)
                            state.LogProbability = 0;

                        var currObject = state.NextProductionTerm();
                        var term = currObject.NonTerminal;
                        var ruleList = Grammar[term];

                        if (ruleList != null)
                            Predict(col, ruleList, state, currObject);
                    }
                    //3. scan after predict.
                    foreach (var state in col)
                    {
                        if (!state.IsCompleted())
                        {
                            var currObject = state.NextProductionTerm();
                            var term = currObject.NonTerminal;

                            if (!generator)
                            {
                                if (col.Index + 1 < table.Length &&
                                    Grammar.Vocabulary[table[col.Index + 1].Token].Contains(term))
                                    Scan(table[col.Index + 1], state, term, table[col.Index + 1].Token);
                            }
                            else
                            {
                                if (Grammar.Vocabulary.POSWithPossibleWords.ContainsKey(term))
                                {
                                    var ruleList = Grammar[term];
                                    //if the term is a constituent, generate it given some probability. otherwise continue.
                                    if (ruleList != null)
                                    {
                                        if (rand.NextDouble() > ChanceToGenerateConstituent) continue;
                                        if (ruleList[0].IsEpsilonRule())
                                            continue;
                                        //if we generated a predicted epsilon rule for that constituent, don't scan.
                                    }

                                    //selecting random word from vocabulary: (uncomment the next line)
                                    //var index = rand.Next(Vocabulary.POSWithPossibleWords[currentNode.Name].Count);

                                    //always selecting the same word from vocabulary is considerably faster because I do not re-parse the same sentence
                                    //but keep the counts of appearances of the sentence.
                                    //the parse of two sentences with identical sequence of POS is the same - regardless of the actual word selected.

                                    if (table[col.Index + 1].Token == "")
                                        //if the token was already written by a previous scan 
                                        //(for instance NP -> John, NP -> D N, D -> the, "John" was already written before "the")
                                    {
                                        var index = 0;
                                        table[col.Index + 1].Token =
                                            Grammar.Vocabulary.POSWithPossibleWords[term].ElementAt(index);
                                        Scan(table[col.Index + 1], state, term, table[col.Index + 1].Token);
                                    }
                                }
                            }
                        }
                    }
                }

                foreach (var state in finalColumn.GammaStates)
                    return state.Node.Children[0];
            }
            catch (LogException e)
            {
                var s = e.ToString();
                Console.WriteLine(s);
                Console.WriteLine(string.Format("sentence: {0}, grammar: {1}", text, Grammar));
            }
            catch (Exception e)
            {
                var s = e.ToString();
                Console.WriteLine(s);
            }

            if (!generator)
                throw new Exception("Parsing Failed!");
            throw new Exception("Generating Failed!");
        }
Beispiel #3
0
        private void Scan(Column col, State state, string term, string token)
        {
            //if there is nonempty stack arriving to this part of speech term, stop here - the derivation is wrong.
            //SEFI - note: this is a restriction on the form of your grammar.
            //consider removing it to see the conequences.

            //if (Grammar.isPOS(term))
            {
                if (!state.NextProductionTerm().IsStackEmpty()) return;
            }

            var v = new Node(term, col.Index - 1, col.Index)
            {
                AssociatedTerminal = token,
                LogProbability = 0.0f,
                Bits = 1
            };
            var y = State.MakeNode(state, col.Index, v);
            var newState = new State(state.Rule, state.DotIndex + 1, state.StartColumn, y);

            col.AddState(newState, ParsingOperation.Scan);
            if (Debug)
                Console.WriteLine("{0} & {1} & {2} & Scanned from State {3}, word: {4}\\\\", newState.StateNumber,
                    newState, col.Index,
                    state.StateNumber, token);

            if (newState.Node.LogProbability < 0)
            {
                throw new LogException(string.Format("scanarrrr! NODE log probability lower than 0: {0}, state: {1}",
                    newState.Node.LogProbability, newState));
            }
        }
Beispiel #4
0
        private void Complete(Column col, State state)
        {
            if (state.Rule.Name.NonTerminal == GammaRule)
            {
                col.GammaStates.Add(state);
                return;
            }

            foreach (var st in state.StartColumn)
            {
                var term = st.NextProductionTerm();
                NonTerminalStack intersection;
                if (IsCompletedTermConsistentWithNextTerm(state.Rule.Name, term, out intersection))
                {
                    if (state.Node.LogProbability < 0)
                    {
                        throw new LogException(
                            string.Format(
                                "trrrr! NODE log probability lower than 0: {0}, reductor state: {1}, predecessor state {2}",
                                state.Node.LogProbability, state, st));
                    }
                    var y = State.MakeNode(st, state.EndColumn.Index, state.Node);
                    var newState = new State(st.Rule, st.DotIndex + 1, st.StartColumn, y);
                    newState.Rule.Production[st.DotIndex].Stack = intersection;

                    col.AddState(newState, ParsingOperation.Complete);
                    if (Debug)
                        Console.WriteLine("{0} & {1} & {2} & Completed from States {3} and {4}\\\\",
                            newState.StateNumber,
                            newState, col.Index, st.StateNumber, state.StateNumber);
                }
            }
        }
Beispiel #5
0
        public static Node MakeNode(State predecessorState, int endIndex, Node reductor)
        {
            Node y;
            var nextDotIndex = predecessorState.DotIndex + 1;
            var nodeName = RuleWithDotNotation(predecessorState.Rule, nextDotIndex);

            if (nextDotIndex == 1 && predecessorState.Rule.Production.Length > 1)
            {
                y = reductor;
                if (predecessorState.Node == null)
                {
                    y.LogProbability = predecessorState.LogProbability + reductor.LogProbability;
                    y.Bits = RequiredBitsGivenLogProbability(predecessorState.LogProbability) + reductor.Bits;
                    if (predecessorState.LogProbability < 0 || reductor.LogProbability < 0)
                    {
                        throw new Exception(
                            string.Format("first case y NODE log probability lower than 0: {0} < , {1} , {2}",
                                y.LogProbability, predecessorState.LogProbability, reductor.LogProbability));
                    }
                }
                else
                    throw new Exception("arrived in a clause that should not be possible. make_node");
            }
            else
            {
                y = new Node(nodeName, predecessorState.StartColumn.Index, endIndex);
                if (!y.HasChildren())
                    y.AddChildren(reductor, predecessorState.Node);
                if (predecessorState.Node == null)
                {
                    y.LogProbability = predecessorState.LogProbability + reductor.LogProbability;
                    y.Bits = RequiredBitsGivenLogProbability(predecessorState.LogProbability) + reductor.Bits;

                    if (predecessorState.LogProbability < 0 || reductor.LogProbability < 0)
                    {
                        throw new Exception(
                            string.Format("second case y NODE log probability lower than 0: {0} = , {1} + {2}",
                                y.LogProbability, predecessorState.LogProbability, reductor.LogProbability));
                    }
                }
                else
                {
                    y.LogProbability = predecessorState.Node.LogProbability + reductor.LogProbability;
                    y.Bits = predecessorState.Node.Bits + reductor.Bits;
                    if (predecessorState.Node.LogProbability < 0 || reductor.LogProbability < 0)
                    {
                        throw new Exception(
                            string.Format("third case y NODE log probability lower than 0: {0} = , {1} + {2}",
                                y.LogProbability, predecessorState.Node.LogProbability, reductor.LogProbability));
                    }
                }


                y.RuleNumber = predecessorState.Rule.Number;
            }

            return y;
        }