Пример #1
0
        public void BuildHyperGraph(LAPCFGrammar grammar, HyperEdgePool epool, HyperVertexPool vpool, int[] tagCapacity = null)
        {
            epool.Recycle ();
            vpool.Recycle ();
            this.ROOTID = grammar.ROOTID;

            var maxSubTag = grammar.subTagCounts.Max ();

            lbuf = new double[maxSubTag + 1];

            // CYK
            for (int i = 0; i < wids.Length; ++i) {
                bool isRoot = i == 0 && i == wids.Length - 1;
                chart [i, i] = new HyperCell (i, i + 1, grammar.TotalTagCount);
                MatchLexicon (grammar, chart [i, i], wids [i], epool, vpool, tagCapacity,
                    allowedPoSTags == null ? null : allowedPoSTags[i],
                    rawTagProbs == null ? null : rawTagProbs[i], isRoot);

                MatchUnaryRules (grammar, chart [i, i], epool, vpool, tagCapacity, isRoot);
                chart [i, i].Finish ();
            }

            for (int spanL = 2; spanL <= wids.Length; ++spanL) {
                for (int beg = 0; beg + spanL <= wids.Length; ++beg) {
                    int end = beg + spanL;
                    int l = beg;
                    int r = end - 1;
                    bool isRoot = l == 0 && r == wids.Length - 1;
                    chart [l, r] = new HyperCell (beg, end, grammar.TotalTagCount);
                    for (int mid = l; mid < r; ++mid) {
                        MatchBinaryRules (grammar, chart [l, r], chart [l, mid], chart [mid + 1, r], epool, vpool, tagCapacity, isRoot);
                    }

                    for (int i = 0; i < chart[l, r].l1v.Length; ++i) {
                        var c = chart [l, r].l1v [i];

                        if (c != null) {
                            if (isRoot && c.tag != ROOTID) {
                                continue;
                            }
                            chart [l, r].l2v [i] = vpool.Allocate (false, c.tag, c.beta.Length, c.beta.v.Length);
                            epool.Allocate (chart [l, r].l2v [i], c);
                        }

                    }
                    MatchUnaryRules (grammar, chart [l, r], epool, vpool, tagCapacity, isRoot);
                    chart [l, r].Finish ();
                }
            }
        }
Пример #2
0
        public int ExpandHyperGraph(LAPCFGrammar grammar, int[] tagMap, HyperEdgePool epool, HyperVertexPool vpool, int[] tagCapacity = null)
        {
            this.ROOTID = grammar.ROOTID;

            var maxSubTag = grammar.subTagCounts.Max ();

            lbuf = new double[maxSubTag + 1];

            int prunedCell = 0;

            var lv1flags = new TimedArray<bool> (grammar.TotalTagCount);
            var lv2flags = new TimedArray<bool> (grammar.TotalTagCount);

            // CYK
            for (int i = 0; i < wids.Length; ++i) {
                var oldcell = chart [i, i];
                lv1flags.Clear ();
                foreach (var v in oldcell.l1v) {
                    if (v != null) {
                        lv1flags [v.tag] = true;
                    }
                }
                lv2flags.Clear ();
                foreach (var v in oldcell.l2v) {
                    if (v != null) {
                        lv2flags [v.tag] = true;
                    }
                }
                bool isRoot = i == 0 && i == wids.Length - 1;
                chart [i, i] = new HyperCell (i, i + 1, grammar.TotalTagCount);
                ExpandLexicon (
                    grammar, chart [i, i], lv1flags, lv2flags, tagMap,
                    wids [i], epool, vpool, tagCapacity,
                    allowedPoSTags == null ? null: allowedPoSTags[i],
                    rawTagProbs == null ? null : rawTagProbs[i], isRoot);
                ExpandUnaryRules (grammar, chart [i, i], lv1flags, lv2flags, tagMap, epool, vpool, tagCapacity, isRoot);
                chart [i, i].Finish ();
            }

            for (int spanL = 2; spanL <= wids.Length; ++spanL) {
                for (int beg = 0; beg + spanL <= wids.Length; ++beg) {
                    int end = beg + spanL;
                    int l = beg;
                    int r = end - 1;

                    var oldcell = chart [l, r];
                    lv1flags.Clear ();
                    foreach (var v in oldcell.l1v) {
                        if (v != null) {
                            lv1flags [v.tag] = true;
                        }
                    }
                    lv2flags.Clear ();
                    foreach (var v in oldcell.l2v) {
                        if (v != null) {
                            lv2flags [v.tag] = true;
                        }
                    }
                    bool isRoot = l == 0 && r == wids.Length - 1;
                    chart [l, r] = new HyperCell (beg, end, grammar.TotalTagCount);
                    if (!oldcell.IsEmptyCell ()) {
                        for (int mid = l; mid < r; ++mid) {
                            ExpandBinaryRules (grammar, chart [l, r], chart [l, mid], chart [mid + 1, r],
                                               lv1flags, lv2flags, tagMap,
                                               epool, vpool, tagCapacity, isRoot);
                        }

                        for (int i = 0; i < chart[l, r].l1v.Length; ++i) {
                            var c = chart [l, r].l1v [i];

                            if (c != null) {
                                if (isRoot && c.tag != ROOTID) {
                                    continue;
                                }
                                chart [l, r].l2v [i] = vpool.Allocate (false, c.tag, c.beta.Length, c.beta.v.Length);
                                epool.Allocate (chart [l, r].l2v [i], c);
                            }

                        }
                        ExpandUnaryRules (grammar, chart [l, r],
                                          lv1flags, lv2flags, tagMap,
                                          epool, vpool, tagCapacity, isRoot);
                    } else {
                        prunedCell += 1;
                    }
                    chart [l, r].Finish ();
                }
            }
            return prunedCell;
        }
Пример #3
0
        private static void MatchUnaryRules(
            LAPCFGrammar grammar,
            HyperCell cell,
            HyperEdgePool epool,
            HyperVertexPool vpool,
            int[] tagCapacity,
            bool isRoot)
        {
            foreach (var cv in cell.l1v) {
                if (cv == null) {
                    continue;
                }
                var rules = grammar.urules [cv.tag];

                if (rules != null) {
                    foreach (var rule in rules) {
                        if (rule == null) {
                            break;
                        }

                        if (rule.ptag == grammar.ROOTID && !isRoot) {
                            continue;
                        }

                        if (isRoot && rule.ptag != grammar.ROOTID) {
                            continue;
                        }

                        if (cell.l2v [rule.ptag] == null) {
                            var cap = tagCapacity == null ? -1 : tagCapacity [rule.ptag];
                            cell.l2v [rule.ptag] = vpool.Allocate (false, rule.ptag, grammar.GetSubTagCount (rule.ptag), cap);
                        }

                        epool.Allocate (cell.l2v [rule.ptag], cv, rule.scores, null);
                    }
                }
            }
        }
Пример #4
0
        private static void MatchLexicon(
            LAPCFGrammar table,
            HyperCell cell,
            int wid,
            HyperEdgePool epool,
            HyperVertexPool vpool,
            int[] tagCapacity,
            bool[] allowedTags,
            double[] tagProbs,
            bool isRoot)
        {
            var tv = new HyperVertex (true, wid, 1);

            var trules = table.trules [wid];

            foreach (var rule in trules) {
                if (rule == null) {
                    break;
                }
                if (rule.tag == table.ROOTID && !isRoot) {
                    continue;
                }

                if (allowedTags != null && !allowedTags[rule.tag])
                {
                    continue;
                }

                var xrule = rule;

                if (tagProbs != null)
                {
                    var xprob = tagProbs[rule.tag];
                    if (double.IsNegativeInfinity(xprob))
                    {
                        continue;
                    }
                    xrule = rule.Clone();

                    for (int i = 0; i < xrule.scores.Length; ++i)
                    {
                        if (!double.IsNegativeInfinity(xrule.scores[i]))
                        {
                            xrule.scores[i] += xprob;
                        }
                    }
                }
                var cap = tagCapacity == null ? -1 : tagCapacity [rule.tag];
                cell.l1v [rule.tag] = vpool.Allocate (false, rule.tag, table.GetSubTagCount (rule.tag), cap);
                epool.Allocate (cell.l1v [rule.tag], tv, xrule.scores, null);
                if (isRoot && rule.tag != table.ROOTID)
                {
                    continue;
                }
                cell.l2v [rule.tag] = vpool.Allocate (false, rule.tag, table.GetSubTagCount (rule.tag), cap);
                epool.Allocate (cell.l2v [rule.tag], cell.l1v [rule.tag]);
            }
        }
Пример #5
0
        private static void MatchBinaryRules(
            LAPCFGrammar grammar,
            HyperCell pcell,
            HyperCell lcell,
            HyperCell rcell,
            HyperEdgePool epool,
            HyperVertexPool vpool,
            int[] tagCapacity,
            bool isRoot)
        {
            foreach (var lv in lcell.l2v) {
                var rprules = grammar.brules [lv.tag];
                if (rprules == null) {
                    continue;
                }
                foreach (var rv in rcell.l2v) {
                    var prules = rprules [rv.tag];

                    if (prules == null) {
                        continue;
                    }

                    for (int p = 0; p < prules.Length; ++p) {
                        var rule = prules [p];

                        if (rule == null) {
                            break;
                        }

                        if (rule.ptag == grammar.ROOTID && !isRoot) {
                            continue;
                        }

                        if (pcell.l1v [rule.ptag] == null) {
                            var cap = tagCapacity == null ? -1 : tagCapacity [rule.ptag];
                            pcell.l1v [rule.ptag] = vpool.Allocate (false, rule.ptag, grammar.GetSubTagCount (rule.ptag), cap);
                        }

                        epool.Allocate (pcell.l1v [rule.ptag], lv, rv, rule.scores, null);
                    }
                }
            }
        }