private static void MatchLexicon( LAPCFGrammar table, HyperCell cell, int wid, HyperEdgePool epool, HyperVertexPool vpool, int[] tagCapacity, bool[] allowedTags, double[] tagProbs, bool isRoot) { var tv = new HyperVertex (true, wid, 1); var trules = table.trules [wid]; foreach (var rule in trules) { if (rule == null) { break; } if (rule.tag == table.ROOTID && !isRoot) { continue; } if (allowedTags != null && !allowedTags[rule.tag]) { continue; } var xrule = rule; if (tagProbs != null) { var xprob = tagProbs[rule.tag]; if (double.IsNegativeInfinity(xprob)) { continue; } xrule = rule.Clone(); for (int i = 0; i < xrule.scores.Length; ++i) { if (!double.IsNegativeInfinity(xrule.scores[i])) { xrule.scores[i] += xprob; } } } var cap = tagCapacity == null ? -1 : tagCapacity [rule.tag]; cell.l1v [rule.tag] = vpool.Allocate (false, rule.tag, table.GetSubTagCount (rule.tag), cap); epool.Allocate (cell.l1v [rule.tag], tv, xrule.scores, null); if (isRoot && rule.tag != table.ROOTID) { continue; } cell.l2v [rule.tag] = vpool.Allocate (false, rule.tag, table.GetSubTagCount (rule.tag), cap); epool.Allocate (cell.l2v [rule.tag], cell.l1v [rule.tag]); } }
private static void MatchUnaryRules( LAPCFGrammar grammar, HyperCell cell, HyperEdgePool epool, HyperVertexPool vpool, int[] tagCapacity, bool isRoot) { foreach (var cv in cell.l1v) { if (cv == null) { continue; } var rules = grammar.urules [cv.tag]; if (rules != null) { foreach (var rule in rules) { if (rule == null) { break; } if (rule.ptag == grammar.ROOTID && !isRoot) { continue; } if (isRoot && rule.ptag != grammar.ROOTID) { continue; } if (cell.l2v [rule.ptag] == null) { var cap = tagCapacity == null ? -1 : tagCapacity [rule.ptag]; cell.l2v [rule.ptag] = vpool.Allocate (false, rule.ptag, grammar.GetSubTagCount (rule.ptag), cap); } epool.Allocate (cell.l2v [rule.ptag], cv, rule.scores, null); } } } }
private static void MatchBinaryRules( LAPCFGrammar grammar, HyperCell pcell, HyperCell lcell, HyperCell rcell, HyperEdgePool epool, HyperVertexPool vpool, int[] tagCapacity, bool isRoot) { foreach (var lv in lcell.l2v) { var rprules = grammar.brules [lv.tag]; if (rprules == null) { continue; } foreach (var rv in rcell.l2v) { var prules = rprules [rv.tag]; if (prules == null) { continue; } for (int p = 0; p < prules.Length; ++p) { var rule = prules [p]; if (rule == null) { break; } if (rule.ptag == grammar.ROOTID && !isRoot) { continue; } if (pcell.l1v [rule.ptag] == null) { var cap = tagCapacity == null ? -1 : tagCapacity [rule.ptag]; pcell.l1v [rule.ptag] = vpool.Allocate (false, rule.ptag, grammar.GetSubTagCount (rule.ptag), cap); } epool.Allocate (pcell.l1v [rule.ptag], lv, rv, rule.scores, null); } } } }
private static void CreateMergeMapping(LAPCFGrammar rules, List<MergeHelper> mergeCands, out int[][] subtagMap, out bool[][] isMerged, out int[] newSubTagCounts) { subtagMap = new int[rules.TotalTagCount][]; isMerged = new bool[rules.TotalTagCount][]; for (int i = 0; i < subtagMap.Length; ++i) { subtagMap [i] = new int[rules.GetSubTagCount (i)]; isMerged [i] = new bool[rules.GetSubTagCount (i)]; for (int j = 0; j < subtagMap[i].Length; ++j) { subtagMap [i] [j] = j; } } newSubTagCounts = new int[rules.TotalTagCount]; for (int i = 0; i < newSubTagCounts.Length; ++i) { newSubTagCounts [i] = rules.GetSubTagCount (i); } for (int i = 0; i < mergeCands.Count / 2; ++i) { var cand = mergeCands [i]; var t = cand.tag; var xt = cand.subtag; var lt = xt * 2; isMerged [t] [lt] = true; isMerged [t] [lt + 1] = true; newSubTagCounts [t] -= 1; for (int subt = lt + 1; subt < subtagMap[t].Length; ++subt) { subtagMap [t] [subt] -= 1; } } }
public static double[][] SubtagExpectedCounts( int nthread, Vocabulary vocab, TagSet tagset, //LALexiconBuilder lexicon, LAPCFGrammar rules, List<PhrasalTree> treebank) { var parser = new HyperGraphParser (vocab, tagset, rules); double[][][] tagExpectsArray = new double[nthread][][]; for (int tid = 0; tid < nthread; ++tid) { tagExpectsArray [tid] = new double[rules.TotalTagCount][]; var tagExpects = tagExpectsArray [tid]; for (int i = 0; i < tagExpects.Length; ++i) { tagExpects [i] = new double[rules.GetSubTagCount (i)]; } ArrayHelper.Fill (tagExpects, double.NegativeInfinity); } Parallel.For (0, nthread, threadid => { var tagExpects = tagExpectsArray [threadid]; for (int treeId = threadid; treeId < treebank.Count; treeId += nthread) { var tree = treebank [treeId]; var g = parser.BuildHyperGraph (tree); g.SumForward (); g.SumBackward (); double sentS = g.RootScore; if (double.IsNaN (sentS) || double.IsInfinity (sentS)) { continue; } foreach (var v in g.Vs) { if (v.TYPE == VTYPE.TERMINAL) { continue; } int t = v.tag; for (int st = 0; st < v.subtagCount; ++st) { if (double.IsNaN (v.alpha.v [st]) || double.IsInfinity (v.alpha.v [st]) || double.IsNaN (v.beta.v [st]) || double.IsInfinity (v.beta.v [st]) || v.alpha.pruned [st] || v.beta.pruned [st]) { continue; } tagExpects [t] [st] = MathHelper.LogAdd (v.alpha.v [st] + v.beta.v [st] - sentS, tagExpects [t] [st]); } } } } ); var te = tagExpectsArray [0]; for (int i = 1; i < nthread; ++i) { for (int j = 0; j < te.Length; ++j) { for (int k = 0; k < te[j].Length; ++k) { te [j] [k] = MathHelper.LogAdd (te [j] [k], tagExpectsArray [i] [j] [k]); } } } return te; }
public static void CheckProbs( //LALexiconBuilder lexicon, LAPCFGrammar rules) { double[][] expects = new double[rules.TotalTagCount][]; for (int i = 0; i < expects.Length; ++i) { expects [i] = new double[rules.GetSubTagCount (i)]; } ArrayHelper.Fill (expects, double.NegativeInfinity); LAPCFGrammar.CollectTagMass (expects, rules.brules); LAPCFGrammar.CollectTagMass (expects, rules.urules); for (int p = 0; p < expects.Length; ++p) { for (int sp = 0; sp < expects[p].Length; ++sp) { double s = expects [p] [sp]; if (double.IsNaN (s) || double.IsInfinity (s)) { continue; //throw new Exception("some rule in table has no mass!"); } if (Math.Abs (s) > 0.01) { throw new Exception ("table is not normalized!"); } } } }
public static void CalculateNewScores( //LALexiconBuilder lexicon, LAPCFGrammar rules, bool lexiconOnly = false) { //lexicon.CalculateNewScores(); //lexicon.ClearExpectedCounts(); double[][] expects = new double[rules.TotalTagCount][]; for (int i = 0; i < expects.Length; ++i) { expects [i] = new double[rules.GetSubTagCount (i)]; } ArrayHelper.Fill (expects, double.NegativeInfinity); if (lexiconOnly) { LAPCFGrammar.CollectTagMass(expects, rules.tposteriorCounts); LAPCFGrammar.CopyRules(rules.tposteriorCounts, rules.trules); LAPCFGrammar.Normalize(expects, rules.trules); LAPCFGrammar.ClearRules(rules.bposteriorCounts); LAPCFGrammar.ClearRules(rules.uposteriorCounts); LAPCFGrammar.ClearRules(rules.tposteriorCounts); } else { LAPCFGrammar.CollectTagMass(expects, rules.tposteriorCounts); LAPCFGrammar.CollectTagMass(expects, rules.uposteriorCounts); LAPCFGrammar.CollectTagMass(expects, rules.bposteriorCounts); LAPCFGrammar.CopyRules(rules.bposteriorCounts, rules.brules); LAPCFGrammar.Normalize(expects, rules.brules); LAPCFGrammar.CopyRules(rules.uposteriorCounts, rules.urules); LAPCFGrammar.Normalize(expects, rules.urules); LAPCFGrammar.CopyRules(rules.tposteriorCounts, rules.trules); LAPCFGrammar.Normalize(expects, rules.trules); LAPCFGrammar.ClearRules(rules.bposteriorCounts); LAPCFGrammar.ClearRules(rules.uposteriorCounts); LAPCFGrammar.ClearRules(rules.tposteriorCounts); } //rules.PropMaxUnaryPath(); }