Esempio n. 1
0
        public void Build(TagSet tags, double smoothCount)
        {
            foreach (var kv in wordTagCount)
            {
                var word = kv.Key;

                double[] tp = new double[tags.PTCount];

                for (int tid = 0; tid < tp.Length; ++tid)
                {
                    var tstr = tags.GetTagString(tid);

                    int wtc = kv.Value[tstr];

                    int tc = tagCount[tstr];

                    int ttc = tagTypeCount[tstr];

                    if (tc <= 0)
                    {
                        tp[tid] = double.NegativeInfinity;
                    }
                    else
                    {
                        tp[tid] = Math.Log((wtc + smoothCount) / (tc + (ttc + 1.0) * smoothCount));
                    }
                }

                probs.Add(word, tp);
            }
        }
Esempio n. 2
0
        private PhrasalNode ExtractViterbiParse(HyperVertex v, int subtag, TagSet tagSet)
        {
            if (v == null || v.TYPE == VTYPE.TERMINAL)
            {
                return null;
            }

            PhrasalNode node = new PhrasalNode();
            node.Tag = tagSet.GetTagString(v.tag);

            var bestEdge = v.traces[subtag].edge;

            if (bestEdge == null)
            {
                return node;
            }

            switch (bestEdge.TYPE)
            {
                case ETYPE.BINARY:
                    var l = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet);
                    var r = ExtractViterbiParse(bestEdge.from1, v.traces[subtag].subtag1, tagSet);
                    node.Children.Add(l);
                    node.Children.Add(r);
                    l.Parent = node;
                    r.Parent = node;
                    break;
                case ETYPE.UNARY:
                    var c = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet);
                    node.Children.Add(c);
                    c.Parent = node;
                    break;
                case ETYPE.TERMINAL:
                    break;
                case ETYPE.DUMMY:
                    node = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet);
                    break;
                default:
                    throw new Exception("unknown edge type!");
            }
            return node;
        }
Esempio n. 3
0
        public void DumpToStream(TextModelWriter sw, TagSet tagSet, Vocabulary vocab)
        {
            var name = typeof(LAPCFGrammar).FullName;

            sw.Write(name);
            sw.WriteOption("VER", VER);
            sw.WriteOption("NTCount", NTCount);
            sw.WriteOption("PTCount", PTCount);
            sw.WriteOption("ROOTID", ROOTID);
            sw.Write("TerminalRule");
            sw.NestLevel += 1;
            foreach (var x in trules)
            {
                if (x != null)
                {
                    foreach (var y in x)
                    {
                        if (y != null)
                        {
                            var word = vocab.GetWordString(y.word);
                            var tag = tagSet.GetTagString(y.tag);
                            for (int p = 0; p < y.scores.Length; ++p)
                            {
                                if (!double.IsInfinity(y.scores [p]) && !double.IsNaN(y.scores [p]))
                                {
                                    sw.Write(string.Format("{0}_{1}\t{2}\t{3}", tag, p, word, y.scores [p]));
                                }
                            }
                        }
                    }
                }
            }
            sw.NestLevel -= 1;
            sw.Write("UnaryRule");
            sw.NestLevel += 1;
            foreach (var x in urules)
            {
                if (x != null)
                {
                    foreach (var y in x)
                    {
                        if (y != null)
                        {
                            var ptag = tagSet.GetTagString(y.ptag);
                            var ctag = tagSet.GetTagString(y.ctag);
                            for (int c = 0; c < y.scores.Length; ++c)
                            {
                                for (int p = 0; p < y.scores[c].Length; ++p)
                                {
                                    if (!double.IsInfinity(y.scores [c] [p]) && !double.IsNaN(y.scores [c] [p]))
                                    {
                                        sw.Write(string.Format("{0}_{1}\t{2}_{3}\t{4}", ptag, p, ctag, c, y.scores [c] [p]));
                                    }
                                }
                            }
                        }
                    }
                }
            }
            sw.NestLevel -= 1;
            sw.Write("BinaryRule");
            sw.NestLevel += 1;
            foreach (var x in brules)
            {
                if (x != null)
                {
                    foreach (var y in x)
                    {
                        if (y != null)
                        {
                            foreach (var z in y)
                            {
                                if (z != null)
                                {
                                    var ptag = tagSet.GetTagString(z.ptag);
                                    var ltag = tagSet.GetTagString(z.ltag);
                                    var rtag = tagSet.GetTagString(z.rtag);
                                    for (int l = 0; l < z.scores.Length; ++l)
                                    {
                                        for (int r = 0; r < z.scores[l].Length; ++r)
                                        {
                                            for (int p = 0; p < z.scores[l][r].Length; ++p)
                                            {
                                                if (!double.IsInfinity(z.scores [l] [r] [p]) && !double.IsNaN(z.scores [l] [r] [p]))
                                                {
                                                    sw.Write(
                                                        string.Format("{0}_{1}\t{2}_{3}\t{4}_{5}\t{6}",
                                                        ptag, p, ltag, l, rtag, r, z.scores [l] [r] [p])
                                                    );
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            sw.NestLevel -= 1;

            sw.WriteOption("TraceCount", subtagTraces.Count);
            foreach (var trace in subtagTraces)
            {
                sw.WriteOption("TRACE", trace.Length);
                sw.NestLevel += 1;
                foreach (var t in trace)
                {
                    sw.Write(string.Join(" ", t));
                }
                sw.NestLevel -= 1;
            }

            sw.Write(name);
        }