public static LAPCFGrammar MergeSymbols(double percentage, Vocabulary vocab, TagSet tagset, LAPCFGrammar rules, List<PhrasalTree> treebank, int nthread) { rules.InitializeExpectedCounts (); double[][] tagProb = SubtagExpectedCounts (nthread, vocab, tagset, rules, treebank); bool[] isSplit = new bool[tagProb.Length]; for (int i = 0; i < tagProb.Length; ++i) { if (tagProb [i].Length == 1) { tagProb [i] [0] = 0; isSplit [i] = false; } else { isSplit [i] = true; for (int j = 0; j < tagProb[i].Length / 2; ++j) { double z = MathHelper.LogAdd (tagProb [i] [2 * j], tagProb [i] [2 * j + 1]); tagProb [i] [2 * j] -= z; tagProb [i] [2 * j + 1] -= z; } } } double[][] mergeLoss = CollectMergeLoss (nthread, vocab, tagset, rules, treebank, tagProb); var mergeCands = new List<MergeHelper> (); for (int t = 0; t < mergeLoss.Length; ++t) { if (mergeLoss [t] == null) { continue; } for (int st = 0; st < mergeLoss[t].Length; ++st) { mergeCands.Add (new MergeHelper (t, st, mergeLoss [t] [st])); } } mergeCands.Sort ((a, b) => { return a.loss.CompareTo (b.loss); } ); //mergeCands.Reverse(); int[][] subtagMap; bool[][] isMerged; int[] newSubTagCounts; CreateMergeMapping (rules, mergeCands, out subtagMap, out isMerged, out newSubTagCounts); var newRules = MergeRuleTable (rules, tagProb, subtagMap, isMerged, newSubTagCounts); newRules.InitializeExpectedCounts (); return newRules; }
public LAPCFGrammar Clone() { var clone = new LAPCFGrammar(); clone.brules = LAPCFGrammar.CloneRules(brules); clone.urules = LAPCFGrammar.CloneRules(urules); clone.trules = LAPCFGrammar.CloneRules(trules); clone.NTCount = NTCount; clone.PTCount = PTCount; clone.ROOTID = ROOTID; clone.subTagCounts = (int[])subTagCounts.Clone(); clone.subtagTraces = new List<int[][]>(); foreach (var trace in subtagTraces) { clone.subtagTraces.Add(ArrayHelper.Clone(trace)); } clone.InitializeExpectedCounts(); return clone; }
public LAPCFGrammar CloneWithSharedParameters() { var clone = new LAPCFGrammar(); clone.brules = brules; clone.urules = urules; clone.trules = trules; clone.NTCount = NTCount; clone.PTCount = PTCount; clone.ROOTID = ROOTID; clone.subTagCounts = subTagCounts; clone.subtagTraces = subtagTraces; clone.InitializeExpectedCounts(); return clone; }
public LAPCFGrammar SplitSymbols(Random RNG, double randomness) { int[] newSubTagCounts = new int[subTagCounts.Length]; for (int tid = 0; tid < newSubTagCounts.Length; ++tid) { if (tid == ROOTID) { newSubTagCounts [tid] = subTagCounts [tid]; } else { newSubTagCounts [tid] = subTagCounts [tid] * 2; } } var newbRules = brules.Select( x => x == null ? null : x.Select( y => y == null ? null : y.Select( z => z == null ? null : z.SplitSymbols(subTagCounts, newSubTagCounts, RNG, randomness) ).ToArray() ).ToArray() ).ToArray(); var newuRules = urules.Select( x => x == null ? null : x.Select( y => y == null ? null : y.SplitSymbols(subTagCounts, newSubTagCounts, RNG, randomness) ).ToArray() ).ToArray(); var newtRules = trules.Select( x => x == null ? null : x.Select( y => y == null ? null : y.SplitSymbols(subTagCounts, newSubTagCounts, RNG, randomness) ).ToArray() ).ToArray(); var newTable = new LAPCFGrammar(); newTable.NTCount = NTCount; newTable.PTCount = PTCount; newTable.ROOTID = ROOTID; newTable.brules = newbRules; newTable.urules = newuRules; newTable.trules = newtRules; newTable.subTagCounts = newSubTagCounts; newTable.InitializeExpectedCounts(); foreach (var trace in subtagTraces) { newTable.subtagTraces.Add(trace); } int[][] newTrace = new int[TotalTagCount][]; for (int i = 0; i < newTrace.Length; ++i) { newTrace [i] = new int[newSubTagCounts [i]]; int splitFactor = newSubTagCounts [i] == subTagCounts [i] ? 1 : 2; for (int j = 0; j < newTrace[i].Length; ++j) { newTrace [i] [j] = j / splitFactor; } } newTable.subtagTraces.Add(newTrace); return newTable; }