public void Build(TagSet tags, double smoothCount) { foreach (var kv in wordTagCount) { var word = kv.Key; double[] tp = new double[tags.PTCount]; for (int tid = 0; tid < tp.Length; ++tid) { var tstr = tags.GetTagString(tid); int wtc = kv.Value[tstr]; int tc = tagCount[tstr]; int ttc = tagTypeCount[tstr]; if (tc <= 0) { tp[tid] = double.NegativeInfinity; } else { tp[tid] = Math.Log((wtc + smoothCount) / (tc + (ttc + 1.0) * smoothCount)); } } probs.Add(word, tp); } }
private PhrasalNode ExtractViterbiParse(HyperVertex v, int subtag, TagSet tagSet) { if (v == null || v.TYPE == VTYPE.TERMINAL) { return null; } PhrasalNode node = new PhrasalNode(); node.Tag = tagSet.GetTagString(v.tag); var bestEdge = v.traces[subtag].edge; if (bestEdge == null) { return node; } switch (bestEdge.TYPE) { case ETYPE.BINARY: var l = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet); var r = ExtractViterbiParse(bestEdge.from1, v.traces[subtag].subtag1, tagSet); node.Children.Add(l); node.Children.Add(r); l.Parent = node; r.Parent = node; break; case ETYPE.UNARY: var c = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet); node.Children.Add(c); c.Parent = node; break; case ETYPE.TERMINAL: break; case ETYPE.DUMMY: node = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet); break; default: throw new Exception("unknown edge type!"); } return node; }
public void DumpToStream(TextModelWriter sw, TagSet tagSet, Vocabulary vocab) { var name = typeof(LAPCFGrammar).FullName; sw.Write(name); sw.WriteOption("VER", VER); sw.WriteOption("NTCount", NTCount); sw.WriteOption("PTCount", PTCount); sw.WriteOption("ROOTID", ROOTID); sw.Write("TerminalRule"); sw.NestLevel += 1; foreach (var x in trules) { if (x != null) { foreach (var y in x) { if (y != null) { var word = vocab.GetWordString(y.word); var tag = tagSet.GetTagString(y.tag); for (int p = 0; p < y.scores.Length; ++p) { if (!double.IsInfinity(y.scores [p]) && !double.IsNaN(y.scores [p])) { sw.Write(string.Format("{0}_{1}\t{2}\t{3}", tag, p, word, y.scores [p])); } } } } } } sw.NestLevel -= 1; sw.Write("UnaryRule"); sw.NestLevel += 1; foreach (var x in urules) { if (x != null) { foreach (var y in x) { if (y != null) { var ptag = tagSet.GetTagString(y.ptag); var ctag = tagSet.GetTagString(y.ctag); for (int c = 0; c < y.scores.Length; ++c) { for (int p = 0; p < y.scores[c].Length; ++p) { if (!double.IsInfinity(y.scores [c] [p]) && !double.IsNaN(y.scores [c] [p])) { sw.Write(string.Format("{0}_{1}\t{2}_{3}\t{4}", ptag, p, ctag, c, y.scores [c] [p])); } } } } } } } sw.NestLevel -= 1; sw.Write("BinaryRule"); sw.NestLevel += 1; foreach (var x in brules) { if (x != null) { foreach (var y in x) { if (y != null) { foreach (var z in y) { if (z != null) { var ptag = tagSet.GetTagString(z.ptag); var ltag = tagSet.GetTagString(z.ltag); var rtag = tagSet.GetTagString(z.rtag); for (int l = 0; l < z.scores.Length; ++l) { for (int r = 0; r < z.scores[l].Length; ++r) { for (int p = 0; p < z.scores[l][r].Length; ++p) { if (!double.IsInfinity(z.scores [l] [r] [p]) && !double.IsNaN(z.scores [l] [r] [p])) { sw.Write( string.Format("{0}_{1}\t{2}_{3}\t{4}_{5}\t{6}", ptag, p, ltag, l, rtag, r, z.scores [l] [r] [p]) ); } } } } } } } } } } sw.NestLevel -= 1; sw.WriteOption("TraceCount", subtagTraces.Count); foreach (var trace in subtagTraces) { sw.WriteOption("TRACE", trace.Length); sw.NestLevel += 1; foreach (var t in trace) { sw.Write(string.Join(" ", t)); } sw.NestLevel -= 1; } sw.Write(name); }