예제 #1
0
        public static LAPCFGrammar MergeSymbols(double percentage,
            Vocabulary vocab,
            TagSet tagset,
            LAPCFGrammar rules,
            List<PhrasalTree> treebank,
            int nthread)
        {
            rules.InitializeExpectedCounts ();
            double[][] tagProb = SubtagExpectedCounts (nthread, vocab, tagset, rules, treebank);

            bool[] isSplit = new bool[tagProb.Length];

            for (int i = 0; i < tagProb.Length; ++i) {
                if (tagProb [i].Length == 1) {
                    tagProb [i] [0] = 0;
                    isSplit [i] = false;
                } else {
                    isSplit [i] = true;
                    for (int j = 0; j < tagProb[i].Length / 2; ++j) {
                        double z = MathHelper.LogAdd (tagProb [i] [2 * j], tagProb [i] [2 * j + 1]);
                        tagProb [i] [2 * j] -= z;
                        tagProb [i] [2 * j + 1] -= z;
                    }
                }
            }

            double[][] mergeLoss = CollectMergeLoss (nthread, vocab, tagset, rules, treebank, tagProb);

            var mergeCands = new List<MergeHelper> ();
            for (int t = 0; t < mergeLoss.Length; ++t) {
                if (mergeLoss [t] == null) {
                    continue;
                }

                for (int st = 0; st < mergeLoss[t].Length; ++st) {
                    mergeCands.Add (new MergeHelper (t, st, mergeLoss [t] [st]));
                }
            }

            mergeCands.Sort ((a, b) => {
                return a.loss.CompareTo (b.loss); }
            );

            //mergeCands.Reverse();

            int[][] subtagMap;
            bool[][] isMerged;
            int[] newSubTagCounts;

            CreateMergeMapping (rules, mergeCands, out subtagMap, out isMerged, out newSubTagCounts);

            var newRules = MergeRuleTable (rules, tagProb, subtagMap, isMerged, newSubTagCounts);

            newRules.InitializeExpectedCounts ();

            return newRules;
        }
예제 #2
0
 public LAPCFGrammar Clone()
 {
     var clone = new LAPCFGrammar();
     clone.brules = LAPCFGrammar.CloneRules(brules);
     clone.urules = LAPCFGrammar.CloneRules(urules);
     clone.trules = LAPCFGrammar.CloneRules(trules);
     clone.NTCount = NTCount;
     clone.PTCount = PTCount;
     clone.ROOTID = ROOTID;
     clone.subTagCounts = (int[])subTagCounts.Clone();
     clone.subtagTraces = new List<int[][]>();
     foreach (var trace in subtagTraces)
     {
         clone.subtagTraces.Add(ArrayHelper.Clone(trace));
     }
     clone.InitializeExpectedCounts();
     return clone;
 }
예제 #3
0
 public LAPCFGrammar CloneWithSharedParameters()
 {
     var clone = new LAPCFGrammar();
     clone.brules = brules;
     clone.urules = urules;
     clone.trules = trules;
     clone.NTCount = NTCount;
     clone.PTCount = PTCount;
     clone.ROOTID = ROOTID;
     clone.subTagCounts = subTagCounts;
     clone.subtagTraces = subtagTraces;
     clone.InitializeExpectedCounts();
     return clone;
 }
예제 #4
0
        public LAPCFGrammar SplitSymbols(Random RNG, double randomness)
        {
            int[] newSubTagCounts = new int[subTagCounts.Length];

            for (int tid = 0; tid < newSubTagCounts.Length; ++tid)
            {
                if (tid == ROOTID)
                {
                    newSubTagCounts [tid] = subTagCounts [tid];
                } else
                {
                    newSubTagCounts [tid] = subTagCounts [tid] * 2;
                }
            }

            var newbRules = brules.Select(
                x => x == null ? null : x.Select(
                    y => y == null ? null : y.Select(
                        z => z == null ? null : z.SplitSymbols(subTagCounts, newSubTagCounts, RNG, randomness)
            ).ToArray()
            ).ToArray()
            ).ToArray();

            var newuRules = urules.Select(
                x => x == null ? null : x.Select(
                    y => y == null ? null : y.SplitSymbols(subTagCounts, newSubTagCounts, RNG, randomness)
            ).ToArray()
            ).ToArray();

            var newtRules = trules.Select(
                x => x == null ? null : x.Select(
                    y => y == null ? null : y.SplitSymbols(subTagCounts, newSubTagCounts, RNG, randomness)
            ).ToArray()
            ).ToArray();

            var newTable = new LAPCFGrammar();
            newTable.NTCount = NTCount;
            newTable.PTCount = PTCount;
            newTable.ROOTID = ROOTID;
            newTable.brules = newbRules;
            newTable.urules = newuRules;
            newTable.trules = newtRules;
            newTable.subTagCounts = newSubTagCounts;

            newTable.InitializeExpectedCounts();

            foreach (var trace in subtagTraces)
            {
                newTable.subtagTraces.Add(trace);
            }

            int[][] newTrace = new int[TotalTagCount][];

            for (int i = 0; i < newTrace.Length; ++i)
            {
                newTrace [i] = new int[newSubTagCounts [i]];

                int splitFactor = newSubTagCounts [i] == subTagCounts [i] ? 1 : 2;
                for (int j = 0; j < newTrace[i].Length; ++j)
                {
                    newTrace [i] [j] = j / splitFactor;
                }
            }

            newTable.subtagTraces.Add(newTrace);

            return newTable;
        }