예제 #1
0
        private static void MatchLexicon(
            LAPCFGrammar table,
            HyperCell cell,
            int wid,
            HyperEdgePool epool,
            HyperVertexPool vpool,
            int[] tagCapacity,
            bool[] allowedTags,
            double[] tagProbs,
            bool isRoot)
        {
            var tv = new HyperVertex (true, wid, 1);

            var trules = table.trules [wid];

            foreach (var rule in trules) {
                if (rule == null) {
                    break;
                }
                if (rule.tag == table.ROOTID && !isRoot) {
                    continue;
                }

                if (allowedTags != null && !allowedTags[rule.tag])
                {
                    continue;
                }

                var xrule = rule;

                if (tagProbs != null)
                {
                    var xprob = tagProbs[rule.tag];
                    if (double.IsNegativeInfinity(xprob))
                    {
                        continue;
                    }
                    xrule = rule.Clone();

                    for (int i = 0; i < xrule.scores.Length; ++i)
                    {
                        if (!double.IsNegativeInfinity(xrule.scores[i]))
                        {
                            xrule.scores[i] += xprob;
                        }
                    }
                }
                var cap = tagCapacity == null ? -1 : tagCapacity [rule.tag];
                cell.l1v [rule.tag] = vpool.Allocate (false, rule.tag, table.GetSubTagCount (rule.tag), cap);
                epool.Allocate (cell.l1v [rule.tag], tv, xrule.scores, null);
                if (isRoot && rule.tag != table.ROOTID)
                {
                    continue;
                }
                cell.l2v [rule.tag] = vpool.Allocate (false, rule.tag, table.GetSubTagCount (rule.tag), cap);
                epool.Allocate (cell.l2v [rule.tag], cell.l1v [rule.tag]);
            }
        }
예제 #2
0
        private static void MatchUnaryRules(
            LAPCFGrammar grammar,
            HyperCell cell,
            HyperEdgePool epool,
            HyperVertexPool vpool,
            int[] tagCapacity,
            bool isRoot)
        {
            foreach (var cv in cell.l1v) {
                if (cv == null) {
                    continue;
                }
                var rules = grammar.urules [cv.tag];

                if (rules != null) {
                    foreach (var rule in rules) {
                        if (rule == null) {
                            break;
                        }

                        if (rule.ptag == grammar.ROOTID && !isRoot) {
                            continue;
                        }

                        if (isRoot && rule.ptag != grammar.ROOTID) {
                            continue;
                        }

                        if (cell.l2v [rule.ptag] == null) {
                            var cap = tagCapacity == null ? -1 : tagCapacity [rule.ptag];
                            cell.l2v [rule.ptag] = vpool.Allocate (false, rule.ptag, grammar.GetSubTagCount (rule.ptag), cap);
                        }

                        epool.Allocate (cell.l2v [rule.ptag], cv, rule.scores, null);
                    }
                }
            }
        }
예제 #3
0
        private static void MatchBinaryRules(
            LAPCFGrammar grammar,
            HyperCell pcell,
            HyperCell lcell,
            HyperCell rcell,
            HyperEdgePool epool,
            HyperVertexPool vpool,
            int[] tagCapacity,
            bool isRoot)
        {
            foreach (var lv in lcell.l2v) {
                var rprules = grammar.brules [lv.tag];
                if (rprules == null) {
                    continue;
                }
                foreach (var rv in rcell.l2v) {
                    var prules = rprules [rv.tag];

                    if (prules == null) {
                        continue;
                    }

                    for (int p = 0; p < prules.Length; ++p) {
                        var rule = prules [p];

                        if (rule == null) {
                            break;
                        }

                        if (rule.ptag == grammar.ROOTID && !isRoot) {
                            continue;
                        }

                        if (pcell.l1v [rule.ptag] == null) {
                            var cap = tagCapacity == null ? -1 : tagCapacity [rule.ptag];
                            pcell.l1v [rule.ptag] = vpool.Allocate (false, rule.ptag, grammar.GetSubTagCount (rule.ptag), cap);
                        }

                        epool.Allocate (pcell.l1v [rule.ptag], lv, rv, rule.scores, null);
                    }
                }
            }
        }
예제 #4
0
        private static void CreateMergeMapping(LAPCFGrammar rules, List<MergeHelper> mergeCands, out int[][] subtagMap, out bool[][] isMerged, out int[] newSubTagCounts)
        {
            subtagMap = new int[rules.TotalTagCount][];

            isMerged = new bool[rules.TotalTagCount][];

            for (int i = 0; i < subtagMap.Length; ++i) {
                subtagMap [i] = new int[rules.GetSubTagCount (i)];

                isMerged [i] = new bool[rules.GetSubTagCount (i)];

                for (int j = 0; j < subtagMap[i].Length; ++j) {
                    subtagMap [i] [j] = j;
                }
            }

            newSubTagCounts = new int[rules.TotalTagCount];

            for (int i = 0; i < newSubTagCounts.Length; ++i) {
                newSubTagCounts [i] = rules.GetSubTagCount (i);
            }

            for (int i = 0; i < mergeCands.Count / 2; ++i) {
                var cand = mergeCands [i];

                var t = cand.tag;

                var xt = cand.subtag;

                var lt = xt * 2;

                isMerged [t] [lt] = true;
                isMerged [t] [lt + 1] = true;
                newSubTagCounts [t] -= 1;

                for (int subt = lt + 1; subt < subtagMap[t].Length; ++subt) {
                    subtagMap [t] [subt] -= 1;
                }
            }
        }
예제 #5
0
        public static double[][] SubtagExpectedCounts(
            int nthread,
            Vocabulary vocab,
            TagSet tagset,
            //LALexiconBuilder lexicon,
            LAPCFGrammar rules,
            List<PhrasalTree> treebank)
        {
            var parser = new HyperGraphParser (vocab, tagset, rules);

            double[][][] tagExpectsArray = new double[nthread][][];

            for (int tid = 0; tid < nthread; ++tid) {
                tagExpectsArray [tid] = new double[rules.TotalTagCount][];
                var tagExpects = tagExpectsArray [tid];
                for (int i = 0; i < tagExpects.Length; ++i) {
                    tagExpects [i] = new double[rules.GetSubTagCount (i)];
                }
                ArrayHelper.Fill (tagExpects, double.NegativeInfinity);
            }

            Parallel.For (0, nthread, threadid =>
            {
                var tagExpects = tagExpectsArray [threadid];

                for (int treeId = threadid; treeId < treebank.Count; treeId += nthread) {
                    var tree = treebank [treeId];
                    var g = parser.BuildHyperGraph (tree);

                    g.SumForward ();
                    g.SumBackward ();

                    double sentS = g.RootScore;

                    if (double.IsNaN (sentS) || double.IsInfinity (sentS)) {
                        continue;
                    }
                    foreach (var v in g.Vs) {
                        if (v.TYPE == VTYPE.TERMINAL) {
                            continue;
                        }

                        int t = v.tag;

                        for (int st = 0; st < v.subtagCount; ++st) {
                            if (double.IsNaN (v.alpha.v [st]) || double.IsInfinity (v.alpha.v [st])
                                || double.IsNaN (v.beta.v [st]) || double.IsInfinity (v.beta.v [st])
                                || v.alpha.pruned [st] || v.beta.pruned [st]) {
                                continue;
                            }

                            tagExpects [t] [st] = MathHelper.LogAdd (v.alpha.v [st] + v.beta.v [st] - sentS, tagExpects [t] [st]);
                        }
                    }
                }
            }
            );

            var te = tagExpectsArray [0];
            for (int i = 1; i < nthread; ++i) {
                for (int j = 0; j < te.Length; ++j) {
                    for (int k = 0; k < te[j].Length; ++k) {
                        te [j] [k] = MathHelper.LogAdd (te [j] [k], tagExpectsArray [i] [j] [k]);
                    }
                }
            }

            return te;
        }
예제 #6
0
        public static void CheckProbs(
            //LALexiconBuilder lexicon,
            LAPCFGrammar rules)
        {
            double[][] expects = new double[rules.TotalTagCount][];

            for (int i = 0; i < expects.Length; ++i) {
                expects [i] = new double[rules.GetSubTagCount (i)];
            }

            ArrayHelper.Fill (expects, double.NegativeInfinity);

            LAPCFGrammar.CollectTagMass (expects, rules.brules);
            LAPCFGrammar.CollectTagMass (expects, rules.urules);

            for (int p = 0; p < expects.Length; ++p) {
                for (int sp = 0; sp < expects[p].Length; ++sp) {
                    double s = expects [p] [sp];

                    if (double.IsNaN (s) || double.IsInfinity (s)) {
                        continue;
                        //throw new Exception("some rule in table has no mass!");
                    }

                    if (Math.Abs (s) > 0.01) {
                        throw new Exception ("table is not normalized!");
                    }
                }
            }
        }
예제 #7
0
        public static void CalculateNewScores(
            //LALexiconBuilder lexicon,
            LAPCFGrammar rules,
            bool lexiconOnly = false)
        {
            //lexicon.CalculateNewScores();
            //lexicon.ClearExpectedCounts();

            double[][] expects = new double[rules.TotalTagCount][];

            for (int i = 0; i < expects.Length; ++i) {
                expects [i] = new double[rules.GetSubTagCount (i)];
            }

            ArrayHelper.Fill (expects, double.NegativeInfinity);

            if (lexiconOnly)
            {
                LAPCFGrammar.CollectTagMass(expects, rules.tposteriorCounts);
                LAPCFGrammar.CopyRules(rules.tposteriorCounts, rules.trules);
                LAPCFGrammar.Normalize(expects, rules.trules);
                LAPCFGrammar.ClearRules(rules.bposteriorCounts);
                LAPCFGrammar.ClearRules(rules.uposteriorCounts);
                LAPCFGrammar.ClearRules(rules.tposteriorCounts);
            }
            else
            {
                LAPCFGrammar.CollectTagMass(expects, rules.tposteriorCounts);
                LAPCFGrammar.CollectTagMass(expects, rules.uposteriorCounts);
                LAPCFGrammar.CollectTagMass(expects, rules.bposteriorCounts);

                LAPCFGrammar.CopyRules(rules.bposteriorCounts, rules.brules);
                LAPCFGrammar.Normalize(expects, rules.brules);
                LAPCFGrammar.CopyRules(rules.uposteriorCounts, rules.urules);
                LAPCFGrammar.Normalize(expects, rules.urules);
                LAPCFGrammar.CopyRules(rules.tposteriorCounts, rules.trules);
                LAPCFGrammar.Normalize(expects, rules.trules);
                LAPCFGrammar.ClearRules(rules.bposteriorCounts);
                LAPCFGrammar.ClearRules(rules.uposteriorCounts);
                LAPCFGrammar.ClearRules(rules.tposteriorCounts);
            }

            //rules.PropMaxUnaryPath();
        }