private static double ParseGraphs(int nthread, List<PhrasalTree> treebank, LAPCFGrammar rules, Vocabulary vocab, TagSet tagSet, out int failed) { double llhd = 0; failed = 0; int xfail = 0; var handle = new object(); Parallel.For(0, nthread, threadid => { int fail = 0; double xllhd = 0; var parser = new HyperGraphParser(vocab, tagSet, rules); for (int i = threadid; i < treebank.Count; i += nthread) { try { var graph = parser.BuildHyperGraph(treebank [i]); graph.SumForward(); graph.SumBackward(); if (double.IsInfinity(graph.RootScore) || double.IsNaN(graph.RootScore)) { fail += 1; continue; } xllhd += graph.RootScore; } catch { fail += 1; } } lock (handle) { xfail += fail; llhd += xllhd; } } ); failed = xfail; return llhd; }
private static double ParseGraphAndCollect(int nthread, List<PhrasalTree> treebank, LAPCFGrammar rules, Vocabulary vocab, TagSet tagSet, out int failed) { double llhd = 0; failed = 0; int xfail = 0; var handle = new object(); var rulelist = new List<LAPCFGrammar>(); rulelist.Add(rules); while (rulelist.Count < nthread) { rulelist.Add(rules.CloneWithSharedParameters()); } Parallel.For(0, nthread, threadid => { int fail = 0; double xllhd = 0; var parser = new HyperGraphParser(vocab, tagSet, rulelist [threadid]); for (int i = threadid; i < treebank.Count; i += nthread) { try { var graph = parser.BuildHyperGraph(treebank [i]); graph.SumForward(); graph.SumBackward(); if (double.IsInfinity(graph.RootScore) || double.IsNaN(graph.RootScore)) { fail += 1; continue; } graph.CollectExpectedCount(); xllhd += graph.RootScore; } catch { fail += 1; } } lock (handle) { xfail += fail; llhd += xllhd; } } ); for (int i = 1; i < rulelist.Count; ++i) { LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.tposteriorCounts, rulelist [i].tposteriorCounts); LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.uposteriorCounts, rulelist [i].uposteriorCounts); LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.bposteriorCounts, rulelist [i].bposteriorCounts); } failed = xfail; //Console.Error.WriteLine("fail: {0}\tllhd: {1}", failed, llhd); return llhd; }
private static double[][] CollectMergeLoss(int nthread, Vocabulary vocab, TagSet tagset, LAPCFGrammar rules, List<PhrasalTree> treebank, double[][] tagProb) { double[][][] mlossList = new double[nthread][][]; for (int tid = 0; tid < nthread; ++tid) { double[][] mergeLoss = new double[rules.TotalTagCount][]; for (int i = 0; i < mergeLoss.Length; ++i) { if (tagProb [i].Length == 1) { continue; } mergeLoss [i] = new double[tagProb [i].Length / 2]; } ArrayHelper.Fill (mergeLoss, 0); mlossList [tid] = mergeLoss; } var parser = new HyperGraphParser (vocab, tagset, rules); Parallel.For (0, nthread, threadid => { var mergeLoss = mlossList [threadid]; for (int treeId = threadid; treeId < treebank.Count; treeId += nthread) { var tree = treebank [treeId]; var g = parser.BuildHyperGraph (tree); g.SumForward (); g.SumBackward (); double sentS = g.RootScore; if (double.IsNaN (sentS) || double.IsInfinity (sentS)) { continue; } foreach (var v in g.Vs) { if (v.TYPE == VTYPE.TERMINAL) { continue; } int t = v.tag; if (v.subtagCount == 1) { continue; } double[] marginals = new double[v.subtagCount]; for (int st = 0; st < v.subtagCount; ++st) { if (!v.alpha.pruned [st]) { marginals [st] = v.alpha.v [st] + v.beta.v [st]; } } for (int st = 0; st < v.subtagCount / 2; ++st) { int l = st * 2; int r = st * 2 + 1; if (double.IsNaN (v.alpha.v [l]) || double.IsInfinity (v.alpha.v [l]) || double.IsNaN (v.beta.v [l]) || double.IsInfinity (v.beta.v [l]) || double.IsNaN (v.alpha.v [r]) || double.IsInfinity (v.alpha.v [r]) || double.IsNaN (v.beta.v [r]) || double.IsInfinity (v.beta.v [r]) || v.alpha.pruned [l] || v.alpha.pruned [r]) { continue; } double lllhd = marginals [l]; double rllhd = marginals [r]; double mllhd = MathHelper.LogAdd (tagProb [t] [l] + v.alpha.v [l], tagProb [t] [r] + v.alpha.v [r]) + MathHelper.LogAdd (v.beta.v [l], v.beta.v [r]); marginals [l] = mllhd; marginals [r] = double.NegativeInfinity; double xSentScore = MathHelper.LogAdd (marginals); double sentScore = g.RootScore; mergeLoss [t] [st] += sentScore - xSentScore; //MathHelper.LogAdd(xSentScore - sentScore, mergeLoss[t][st]); marginals [l] = lllhd; marginals [r] = rllhd; } } } } ); var ml = mlossList [0]; for (int threadid = 1; threadid < mlossList.Length; ++threadid) { var xl = mlossList [threadid]; for (int i = 0; i < ml.Length; ++i) { if (ml [i] == null) { continue; } for (int j = 0; j < ml[i].Length; ++j) { ml [i] [j] += xl [i] [j]; } } } return ml; }
private static List<HyperGraph> BuildGraphs(int nthread, List<PhrasalTree> treebank, HyperGraphParser parser, out int failedNum) { failedNum = 0; var graphs = new List<HyperGraph>(); var handle = new object(); int xfail = 0; Parallel.For(0, nthread, threadid => { var glist = new List<HyperGraph>(); int fnum = 0; for (int i = threadid; i < treebank.Count; i += nthread) { try { var g = parser.BuildHyperGraph(treebank [i]); glist.Add(g); } catch { fnum += 1; } } lock (handle) { xfail += fnum; foreach (var g in glist) { graphs.Add(g); } } } ); failedNum = xfail; Console.Error.WriteLine("failed to build: {0}", failedNum); return graphs; }
public static double[][] SubtagExpectedCounts( int nthread, Vocabulary vocab, TagSet tagset, //LALexiconBuilder lexicon, LAPCFGrammar rules, List<PhrasalTree> treebank) { var parser = new HyperGraphParser (vocab, tagset, rules); double[][][] tagExpectsArray = new double[nthread][][]; for (int tid = 0; tid < nthread; ++tid) { tagExpectsArray [tid] = new double[rules.TotalTagCount][]; var tagExpects = tagExpectsArray [tid]; for (int i = 0; i < tagExpects.Length; ++i) { tagExpects [i] = new double[rules.GetSubTagCount (i)]; } ArrayHelper.Fill (tagExpects, double.NegativeInfinity); } Parallel.For (0, nthread, threadid => { var tagExpects = tagExpectsArray [threadid]; for (int treeId = threadid; treeId < treebank.Count; treeId += nthread) { var tree = treebank [treeId]; var g = parser.BuildHyperGraph (tree); g.SumForward (); g.SumBackward (); double sentS = g.RootScore; if (double.IsNaN (sentS) || double.IsInfinity (sentS)) { continue; } foreach (var v in g.Vs) { if (v.TYPE == VTYPE.TERMINAL) { continue; } int t = v.tag; for (int st = 0; st < v.subtagCount; ++st) { if (double.IsNaN (v.alpha.v [st]) || double.IsInfinity (v.alpha.v [st]) || double.IsNaN (v.beta.v [st]) || double.IsInfinity (v.beta.v [st]) || v.alpha.pruned [st] || v.beta.pruned [st]) { continue; } tagExpects [t] [st] = MathHelper.LogAdd (v.alpha.v [st] + v.beta.v [st] - sentS, tagExpects [t] [st]); } } } } ); var te = tagExpectsArray [0]; for (int i = 1; i < nthread; ++i) { for (int j = 0; j < te.Length; ++j) { for (int k = 0; k < te[j].Length; ++k) { te [j] [k] = MathHelper.LogAdd (te [j] [k], tagExpectsArray [i] [j] [k]); } } } return te; }