// TODO: should make sure the empty production is the actual empty production // TODO: should error if the production doesn't exist protected override void RemoveProductionWithoutSimplifying(Production production) { if (production.Lhs == this.Start && production.Rhs.Count == 0) { if (_emptyProductions.Count > 0) { _emptyProductions.Clear(); } else { throw new Exception("No production to remove"); } } else if (production.IsCnfNonterminal) { var ntprod = production; _nonterminalProductions.Remove(ntprod); } else { // TODO: might not actually be a terminal production var tprod = production; _terminalProductions.Remove(tprod); } InvalidateCaches(); }
public void TestProduction() { var actualp = CFGParser.Production(@"<S> -> <X> 'a' <S> [3.0]"); var expectedp = new Production( Nonterminal.Of("S"), new Sentence { Nonterminal.Of("X"), Terminal.Of("a"), Nonterminal.Of("S") }, 3.0 ); var unexpectedp = new Production( Nonterminal.Of("S"), new Sentence { Terminal.Of("a"), Nonterminal.Of("X"), Nonterminal.Of("S") }, 3.0 ); Assert.IsTrue(actualp.ValueEquals(expectedp)); Assert.IsFalse(actualp.ValueEquals(unexpectedp)); }
/// <summary> /// Turns a string like /// <X> -> <X> 'a' <X> /// into a production. /// Nonterminals must be surrounded by angled brackets, terminals must be surrounded by single quotes, and everything must be separated by spaces. /// </summary> /// <param name="s"></param> /// <returns></returns> public static Production Production(string s) { var match = ProductionRegex.Match(s); if (!match.Success) { throw new Exception("Didn't find valid string"); } var lhsMatch = match.Groups["lhs"]; var ntMatch = match.Groups["nt"]; var tMatch = match.Groups["t"]; var weightMatch = match.Groups["weight"]; if (!lhsMatch.Success) { throw new Exception("Didn't find LHS"); } double weight; if (!double.TryParse(weightMatch.Value, out weight)) { weight = 1.0; } var rhsList = new SortedList<int, Word>(); foreach (Capture capture in ntMatch.Captures) { var word = Nonterminal.Of(capture.Value); rhsList.Add(capture.Index, word); } foreach (Capture capture in tMatch.Captures) { var word = Terminal.Of(capture.Value); rhsList.Add(capture.Index, word); } var rhs = new Sentence(rhsList.Values); var lhs = Nonterminal.Of(lhsMatch.Value); var retval = new Production(lhs, rhs, weight); return retval; }
// TODO: slow //private static ISet<Nonterminal> GetReachesTerminal(ISet<Production> originalProductions) { // var reachesTerminal = new HashSet<Nonterminal>(); // var oldCount = -1; // while (oldCount != reachesTerminal.Count) { // oldCount = reachesTerminal.Count; // foreach (var production in originalProductions) { // if (!production.Rhs.OnlyNonterminals()) { // reachesTerminal.Add(production.Lhs); // } // foreach (var nt in production.Rhs) { // if (reachesTerminal.Contains(nt)) { // reachesTerminal.Add(production.Lhs); // } // } // } // } // return reachesTerminal; //} private static double GetProductionProbability(Production production, Dictionary<Nonterminal, int> nonterminalToIndex, double[] previousEstimates) { if (production.Rhs.Count == 0) { return 1.0; } // if it contains a terminal, then it always is non-empty if (!production.Rhs.OnlyNonterminals()) { return 0.0; } var product = 1.0; foreach (var word in production.Rhs) { var nt = (Nonterminal)word; var rhsIndex = nonterminalToIndex[nt]; var previous = previousEstimates[rhsIndex]; // if this is the first iteration, we assume that the previous values were 100% chance of yielding null if (previous == _magicStartProbability) { previous = 1.0; } product *= previous; } if (double.IsNaN(product)) { throw new Exception("Didn't expect to get NaN probability"); } return product; }
/// <summary> /// From a production, derive a set of productions for each combination of skipping nullable nonterminals. /// E.g., for production S -> AbB and nullable {A, B}, we get productions /// S -> AbB | Ab | bB | b /// </summary> /// <returns></returns> private static List<Production> Nullate(Production originalProduction, Dictionary<Nonterminal, double> nullableProbabilities) { var results = new List<Production>(); results.Add(originalProduction); if (originalProduction.IsEmpty) { return results; } for (int i = originalProduction.Rhs.Count - 1; i >= 0; i--) { var newResults = new List<Production>(); var toRemove = new List<Production>(); foreach (var production in results) { var word = production.Rhs[i]; var nt = word as Nonterminal; if (nt == null) { continue; } if (!nullableProbabilities.ContainsKey(nt)) { continue; } // var with = production.Clone(); var without = production.DeepClone(); without.Rhs.RemoveAt(i); var chanceNull = nullableProbabilities[nt]; var newWithoutWeight = without.Weight * chanceNull; var newWithWeight = production.Weight * (1.0 - chanceNull); if (newWithoutWeight > 0.0) { without.Weight = newWithoutWeight; newResults.Add(without); } if (newWithWeight <= 0.0) { toRemove.Add(production); } else { production.Weight = newWithWeight; } } results.AddRange(newResults); // TODO: we should just make it so that if a weight is set to 0, the production gets removed from the grammar automatically, and that operation should be fast results.RemoveMany(toRemove); } // NullateAux(production, nullableSet, 0, result); if (results.Count == 0) { return results; } // Get rid of productions with zero weight //for (int i = results.Count - 1; i >= 0; i--) { // var result = results[i]; // if (result.Weight == 0.0) { // results.RemoveAt(i); // } //} return results; }
private double GetProbability(Production production, Dictionary<Nonterminal, ICollection<Production>> productionsByNonterminal) { var sum = productionsByNonterminal.LookupEnumerable(production.Lhs).Sum((p) => p.Weight); return production.Weight / sum; }
private IEnumerable<Production> UnitReplacementProductions(Production unitProduction, Dictionary<Nonterminal, ICollection<Production>> productionsByNonterminal) { var retval = new List<Production>(); var productions = productionsByNonterminal.LookupEnumerable((Nonterminal)unitProduction.Rhs[0]); var weightSum = productions.Sum((p) => p.Weight); foreach (var production in productions) { var productionProb = production.Weight / weightSum; var newWeight = unitProduction.Weight * productionProb; var newProduction = new Production(unitProduction.Lhs, production.Rhs, newWeight); if (newProduction.IsSelfLoop) { continue; } retval.Add(newProduction); } return retval; }
public override void AddProduction(Production production) { if (production.Lhs == this.Start && production.Rhs.Count == 0) { if (_emptyProductions.Count > 0) { _emptyProductions.First().Weight += production.Weight; } else { _emptyProductions.Add(production); } } else if (production.IsCnfNonterminal) { var ntprod = production; AddToListWithoutDuplicating(_nonterminalProductions, ntprod); } else if (production.IsCnfTerminal) { var tprod = production; AddToListWithoutDuplicating(_terminalProductions, tprod); } else { // TODO: should look into the production and see if we can convert throw new Exception("You can't add that kind of production to this grammar"); } InvalidateCaches(); }
/// <summary> /// Checks whether the productions have the same parts /// </summary> public bool ValueEquals(Production other) { if (this.Lhs != other.Lhs) { return false; } if (!this.Rhs.SequenceEqual(other.Rhs)) { return false; } if (this.Weight != other.Weight) { return false; } return true; }
public override void AddProduction(Production production) { AddToListWithoutDuplicating(_productions, production); InvalidateCaches(); }
protected override void RemoveProductionWithoutSimplifying(Production production) { _productions.Remove(production); InvalidateCaches(); }
/// <summary> /// Adds a single production to the grammar. /// The grammar is kept simplified. /// </summary> public abstract void AddProduction(Production production);
protected abstract void RemoveProductionWithoutSimplifying(Production production);
/// <summary> /// Removes a single production from the grammar. /// The grammar is kept simplified. /// </summary> // public abstract void RemoveProduction(Production production); public void RemoveProduction(Production production) { RemoveProductionWithoutSimplifying(production); Simplify(); }