Пример #1
0
        private static double ParseGraphs(int nthread,
            List<PhrasalTree> treebank,
            LAPCFGrammar rules,
            Vocabulary vocab,
            TagSet tagSet,
            out int failed)
        {
            double llhd = 0;
            failed = 0;

            int xfail = 0;
            var handle = new object();
            Parallel.For(0, nthread, threadid =>
            {
                int fail = 0;
                double xllhd = 0;
                var parser = new HyperGraphParser(vocab, tagSet, rules);
                for (int i = threadid; i < treebank.Count; i += nthread)
                {
                    try
                    {
                        var graph = parser.BuildHyperGraph(treebank [i]);

                        graph.SumForward();
                        graph.SumBackward();

                        if (double.IsInfinity(graph.RootScore) || double.IsNaN(graph.RootScore))
                        {
                            fail += 1;
                            continue;
                        }
                        xllhd += graph.RootScore;
                    } catch
                    {
                        fail += 1;
                    }

                }

                lock (handle)
                {
                    xfail += fail;
                    llhd += xllhd;
                }
            }
            );
            failed = xfail;
            return llhd;
        }
Пример #2
0
        private static double ParseGraphAndCollect(int nthread,
            List<PhrasalTree> treebank,
            LAPCFGrammar rules,
            Vocabulary vocab,
            TagSet tagSet,
            out int failed)
        {
            double llhd = 0;
            failed = 0;

            int xfail = 0;
            var handle = new object();
            var rulelist = new List<LAPCFGrammar>();
            rulelist.Add(rules);
            while (rulelist.Count < nthread)
            {
                rulelist.Add(rules.CloneWithSharedParameters());
            }
            Parallel.For(0, nthread, threadid =>
            {
                int fail = 0;
                double xllhd = 0;
                var parser = new HyperGraphParser(vocab, tagSet, rulelist [threadid]);
                for (int i = threadid; i < treebank.Count; i += nthread)
                {
                    try
                    {
                        var graph = parser.BuildHyperGraph(treebank [i]);

                        graph.SumForward();
                        graph.SumBackward();

                        if (double.IsInfinity(graph.RootScore) || double.IsNaN(graph.RootScore))
                        {
                            fail += 1;
                            continue;
                        }

                        graph.CollectExpectedCount();
                        xllhd += graph.RootScore;
                    } catch
                    {
                        fail += 1;
                    }

                }

                lock (handle)
                {
                    xfail += fail;
                    llhd += xllhd;
                }
            }
            );

            for (int i = 1; i < rulelist.Count; ++i)
            {
                LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.tposteriorCounts, rulelist [i].tposteriorCounts);
                LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.uposteriorCounts, rulelist [i].uposteriorCounts);
                LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.bposteriorCounts, rulelist [i].bposteriorCounts);
            }
            failed = xfail;
            //Console.Error.WriteLine("fail: {0}\tllhd: {1}", failed, llhd);
            return llhd;
        }
Пример #3
0
        private static double[][] CollectMergeLoss(int nthread, Vocabulary vocab, TagSet tagset, LAPCFGrammar rules, List<PhrasalTree> treebank, double[][] tagProb)
        {
            double[][][] mlossList = new double[nthread][][];
            for (int tid = 0; tid < nthread; ++tid) {
                double[][] mergeLoss = new double[rules.TotalTagCount][];

                for (int i = 0; i < mergeLoss.Length; ++i) {
                    if (tagProb [i].Length == 1) {
                        continue;
                    }

                    mergeLoss [i] = new double[tagProb [i].Length / 2];
                }

                ArrayHelper.Fill (mergeLoss, 0);

                mlossList [tid] = mergeLoss;
            }

            var parser = new HyperGraphParser (vocab, tagset, rules);

            Parallel.For (0, nthread, threadid =>
            {
                var mergeLoss = mlossList [threadid];
                for (int treeId = threadid; treeId < treebank.Count; treeId += nthread) {
                    var tree = treebank [treeId];

                    var g = parser.BuildHyperGraph (tree);

                    g.SumForward ();
                    g.SumBackward ();

                    double sentS = g.RootScore;

                    if (double.IsNaN (sentS) || double.IsInfinity (sentS)) {
                        continue;
                    }
                    foreach (var v in g.Vs) {
                        if (v.TYPE == VTYPE.TERMINAL) {
                            continue;
                        }

                        int t = v.tag;

                        if (v.subtagCount == 1) {
                            continue;
                        }

                        double[] marginals = new double[v.subtagCount];

                        for (int st = 0; st < v.subtagCount; ++st) {
                            if (!v.alpha.pruned [st]) {
                                marginals [st] = v.alpha.v [st] + v.beta.v [st];
                            }
                        }

                        for (int st = 0; st < v.subtagCount / 2; ++st) {
                            int l = st * 2;
                            int r = st * 2 + 1;
                            if (double.IsNaN (v.alpha.v [l]) || double.IsInfinity (v.alpha.v [l])
                                || double.IsNaN (v.beta.v [l]) || double.IsInfinity (v.beta.v [l])
                                || double.IsNaN (v.alpha.v [r]) || double.IsInfinity (v.alpha.v [r])
                                || double.IsNaN (v.beta.v [r]) || double.IsInfinity (v.beta.v [r])
                                || v.alpha.pruned [l] || v.alpha.pruned [r]) {
                                continue;
                            }

                            double lllhd = marginals [l];
                            double rllhd = marginals [r];

                            double mllhd = MathHelper.LogAdd (tagProb [t] [l] + v.alpha.v [l], tagProb [t] [r] + v.alpha.v [r])
                                + MathHelper.LogAdd (v.beta.v [l], v.beta.v [r]);

                            marginals [l] = mllhd;
                            marginals [r] = double.NegativeInfinity;

                            double xSentScore = MathHelper.LogAdd (marginals);

                            double sentScore = g.RootScore;

                            mergeLoss [t] [st] += sentScore - xSentScore;
                            //MathHelper.LogAdd(xSentScore - sentScore, mergeLoss[t][st]);

                            marginals [l] = lllhd;
                            marginals [r] = rllhd;
                        }
                    }
                }
            }
            );

            var ml = mlossList [0];

            for (int threadid = 1; threadid < mlossList.Length; ++threadid) {
                var xl = mlossList [threadid];
                for (int i = 0; i < ml.Length; ++i) {
                    if (ml [i] == null) {
                        continue;
                    }

                    for (int j = 0; j < ml[i].Length; ++j) {
                        ml [i] [j] += xl [i] [j];
                    }

                }
            }

            return ml;
        }
Пример #4
0
        private static List<HyperGraph> BuildGraphs(int nthread, List<PhrasalTree> treebank, HyperGraphParser parser, out int failedNum)
        {
            failedNum = 0;
            var graphs = new List<HyperGraph>();
            var handle = new object();
            int xfail = 0;
            Parallel.For(0, nthread, threadid =>
            {
                var glist = new List<HyperGraph>();
                int fnum = 0;
                for (int i = threadid; i < treebank.Count; i += nthread)
                {
                    try
                    {
                        var g = parser.BuildHyperGraph(treebank [i]);

                        glist.Add(g);
                    } catch
                    {
                        fnum += 1;
                    }
                }
                lock (handle)
                {
                    xfail += fnum;
                    foreach (var g in glist)
                    {
                        graphs.Add(g);
                    }
                }
            }
            );

            failedNum = xfail;
            Console.Error.WriteLine("failed to build: {0}", failedNum);
            return graphs;
        }
Пример #5
0
        public static double[][] SubtagExpectedCounts(
            int nthread,
            Vocabulary vocab,
            TagSet tagset,
            //LALexiconBuilder lexicon,
            LAPCFGrammar rules,
            List<PhrasalTree> treebank)
        {
            var parser = new HyperGraphParser (vocab, tagset, rules);

            double[][][] tagExpectsArray = new double[nthread][][];

            for (int tid = 0; tid < nthread; ++tid) {
                tagExpectsArray [tid] = new double[rules.TotalTagCount][];
                var tagExpects = tagExpectsArray [tid];
                for (int i = 0; i < tagExpects.Length; ++i) {
                    tagExpects [i] = new double[rules.GetSubTagCount (i)];
                }
                ArrayHelper.Fill (tagExpects, double.NegativeInfinity);
            }

            Parallel.For (0, nthread, threadid =>
            {
                var tagExpects = tagExpectsArray [threadid];

                for (int treeId = threadid; treeId < treebank.Count; treeId += nthread) {
                    var tree = treebank [treeId];
                    var g = parser.BuildHyperGraph (tree);

                    g.SumForward ();
                    g.SumBackward ();

                    double sentS = g.RootScore;

                    if (double.IsNaN (sentS) || double.IsInfinity (sentS)) {
                        continue;
                    }
                    foreach (var v in g.Vs) {
                        if (v.TYPE == VTYPE.TERMINAL) {
                            continue;
                        }

                        int t = v.tag;

                        for (int st = 0; st < v.subtagCount; ++st) {
                            if (double.IsNaN (v.alpha.v [st]) || double.IsInfinity (v.alpha.v [st])
                                || double.IsNaN (v.beta.v [st]) || double.IsInfinity (v.beta.v [st])
                                || v.alpha.pruned [st] || v.beta.pruned [st]) {
                                continue;
                            }

                            tagExpects [t] [st] = MathHelper.LogAdd (v.alpha.v [st] + v.beta.v [st] - sentS, tagExpects [t] [st]);
                        }
                    }
                }
            }
            );

            var te = tagExpectsArray [0];
            for (int i = 1; i < nthread; ++i) {
                for (int j = 0; j < te.Length; ++j) {
                    for (int k = 0; k < te[j].Length; ++k) {
                        te [j] [k] = MathHelper.LogAdd (te [j] [k], tagExpectsArray [i] [j] [k]);
                    }
                }
            }

            return te;
        }