public FilterConfusingRules(LexicalizedParser parser) { BinaryGrammar binaryGrammar = parser.bg; UnaryGrammar unaryGrammar = parser.ug; Options op = parser.GetOp(); IIndex <string> stateIndex = parser.stateIndex; foreach (UnaryRule unaryRule in unaryGrammar) { // only make one matrix for each parent state, and only use the // basic category for that string childState = stateIndex.Get(unaryRule.child); string childBasic = op.Langpack().BasicCategory(childState); unaryRules.Add(childBasic); } foreach (BinaryRule binaryRule in binaryGrammar) { // only make one matrix for each parent state, and only use the // basic category for that string leftState = stateIndex.Get(binaryRule.leftChild); string leftBasic = op.Langpack().BasicCategory(leftState); string rightState = stateIndex.Get(binaryRule.rightChild); string rightBasic = op.Langpack().BasicCategory(rightState); binaryRules.Add(leftBasic, rightBasic); } }
public virtual LexicalizedParser GetParserDataFromTreebank(Treebank trainTreebank) { log.Info("Binarizing training trees..."); IList <Tree> binaryTrainTrees = GetAnnotatedBinaryTreebankFromTreebank(trainTreebank); Timing.Tick("done."); IIndex <string> stateIndex = new HashIndex <string>(); log.Info("Extracting PCFG..."); IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex); Pair <UnaryGrammar, BinaryGrammar> bgug = bgExtractor.Extract(binaryTrainTrees); BinaryGrammar bg = bgug.second; bg.SplitRules(); UnaryGrammar ug = bgug.first; ug.PurgeRules(); Timing.Tick("done."); log.Info("Extracting Lexicon..."); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); ILexicon lex = op.tlpParams.Lex(op, wordIndex, tagIndex); lex.InitializeTraining(binaryTrainTrees.Count); lex.Train(binaryTrainTrees); lex.FinishTraining(); Timing.Tick("done."); IExtractor <IDependencyGrammar> dgExtractor = op.tlpParams.DependencyGrammarExtractor(op, wordIndex, tagIndex); IDependencyGrammar dg = null; if (op.doDep) { log.Info("Extracting Dependencies..."); dg = dgExtractor.Extract(binaryTrainTrees); dg.SetLexicon(lex); Timing.Tick("done."); } log.Info("Done extracting grammars and lexicon."); return(new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op)); }
/// <param name="op">the parameters of the parser</param> public DVModel(Options op, IIndex <string> stateIndex, UnaryGrammar unaryGrammar, BinaryGrammar binaryGrammar) { this.op = op; rand = new Random(op.trainOptions.randomSeed); ReadWordVectors(); // Binary matrices will be n*2n+1, unary matrices will be n*n+1 numRows = op.lexOptions.numHid; numCols = op.lexOptions.numHid; // Build one matrix for each basic category. // We assume that each state that has the same basic // category is using the same transformation matrix. // Use TreeMap for because we want values to be // sorted by key later on when building theta vectors binaryTransform = TwoDimensionalMap.TreeMap(); unaryTransform = Generics.NewTreeMap(); binaryScore = TwoDimensionalMap.TreeMap(); unaryScore = Generics.NewTreeMap(); numBinaryMatrices = 0; numUnaryMatrices = 0; binaryTransformSize = numRows * (numCols * 2 + 1); unaryTransformSize = numRows * (numCols + 1); binaryScoreSize = numCols; unaryScoreSize = numCols; if (op.trainOptions.useContextWords) { binaryTransformSize += numRows * numCols * 2; unaryTransformSize += numRows * numCols * 2; } identity = SimpleMatrix.Identity(numRows); foreach (UnaryRule unaryRule in unaryGrammar) { // only make one matrix for each parent state, and only use the // basic category for that string childState = stateIndex.Get(unaryRule.child); string childBasic = BasicCategory(childState); AddRandomUnaryMatrix(childBasic); } foreach (BinaryRule binaryRule in binaryGrammar) { // only make one matrix for each parent state, and only use the // basic category for that string leftState = stateIndex.Get(binaryRule.leftChild); string leftBasic = BasicCategory(leftState); string rightState = stateIndex.Get(binaryRule.rightChild); string rightBasic = BasicCategory(rightState); AddRandomBinaryMatrix(leftBasic, rightBasic); } }
internal NullGrammarProjection(BinaryGrammar bg, UnaryGrammar ug) { this.ug = ug; this.bg = bg; }