private IEnumerable <Production> RemoveUnits(IEnumerable <Production> productions) { var productionTable = new Dictionary <Production, Production>(new ProductionComparer()); foreach (var production in productions) { productionTable[production] = production; } var oldSumOfProbability = double.MaxValue; // we keep looping until the probability of any unit production has been driven to zero // as an invariant, we make sure that the sum of the unit probabilities goes down each iteration while (oldSumOfProbability > 0) { // TODO: don't need to build this table every round var productionsByNonterminal = GrammarHelpers.BuildLookupTable(productionTable.Keys); var newSumOfProbability = 0.0; var toAdd = new List <Production>(); var toRemove = new List <Production>(); // find all the unit productions and replace them with equivalent rules // X -> Y gets replaced with rules X -> Z for all Y -> Z foreach (var production in productionTable.Keys) { if (production.IsUnit()) { var thisProb = GetProbability(production, productionsByNonterminal); if (double.IsNaN(thisProb)) { continue; } newSumOfProbability += thisProb; var replacements = UnitReplacementProductions(production, productionsByNonterminal); toAdd.AddRange(replacements); toRemove.Add(production); } } if (oldSumOfProbability < newSumOfProbability) { throw new Exception("Invariant didn't hold, we want probability sums to decrease every iteration"); } oldSumOfProbability = newSumOfProbability; foreach (var production in toRemove) { production.Weight = 0.0; } MergeProductions(productionTable, toAdd); } return(productionTable.Keys.Where((p) => p.Weight > 0.0)); }
/// <summary> /// Actually performs the conversion and returns a new CNF grammar based on the old grammar /// </summary> internal CNFGrammar Convert() { if (_used) { throw new Exception("You can only use this object once"); } _used = true; var productions = GrammarHelpers.CloneGrammar(_grammar); StepStart(productions); StepTerm(productions); StepBin(productions); StepDel(productions); StepUnit(productions); var resultProductions = new List <Production>(); //var nonterminalProductions = new List<Production>(); //var terminalProductions = new List<Production>(); var producesEmptyWeight = 0.0; foreach (var production in productions) { if (production.Rhs.Count > 2) { throw new Exception("Didn't expect more than 2"); } else if (production.Rhs.Count == 2) { resultProductions.Add(production); } else if (production.Rhs.Count == 1) { var rhs = production.Rhs[0]; if (rhs.IsNonterminal) { throw new Exception("Didn't expect unit production"); } resultProductions.Add(production); } else if (production.Rhs.Count == 0) { producesEmptyWeight += production.Weight; // GetGrammarFromProductionList(production, productions); } } resultProductions.Add(new Production(_startSymbol, new Sentence(), producesEmptyWeight)); return(new CNFGrammar(resultProductions, _startSymbol)); }
/// <summary> /// Eliminate ε-rules /// </summary> /// <param name="productions"></param> // TODO: Does not preserve weights private void StepDel(ISet <Production> productions) { var nullableProbabilities = GrammarHelpers.GetNullable(productions); var newRules = new List <Production>(); foreach (var production in productions) { var toAdd = Nullate(production, nullableProbabilities); RemoveExtraneousNulls(toAdd); newRules.AddRange(toAdd); } productions.Clear(); productions.UnionWith(newRules); }
protected void BuildHelpers() { _weightTotalsByNonterminal = Cache.Create(() => Helpers.BuildLookup( () => this.Productions, (p) => p.Lhs, (p) => p.Weight, () => new Boxed <double>(0.0), (x, y) => x.Value += y )); this.Caches.Add(_weightTotalsByNonterminal); _nonterminals = Cache.Create(() => { var hs = new HashSet <Nonterminal>(); hs.Add(Start); foreach (var production in this.Productions) { hs.Add(production.Lhs); foreach (var word in production.Rhs) { var nonterminal = word as Nonterminal; if (nonterminal != null) { hs.Add(nonterminal); } } } return((ISet <Nonterminal>)hs); }); this.Caches.Add(_nonterminals); _terminals = Cache.Create(() => { var hs = new HashSet <Terminal>(); foreach (var production in this.Productions) { foreach (var word in production.Rhs) { var terminal = word as Terminal; if (terminal != null) { hs.Add(terminal); } } } return((ISet <Terminal>)hs); }); this.Caches.Add(_terminals); _nullableDict = Cache.Create(() => GrammarHelpers.GetNullable(new HashSet <Production>(Productions))); this.Caches.Add(_nullableDict); }