/// <summary>Creates new form ParserPanel</summary> public ParserPanel() { // constants for language specification // one second in milliseconds // parser takes approximately a minute to load // parser takes 5-60 seconds to parse a sentence // constants for finding nearest sentence boundary // for highlighting // worker threads to handle long operations // to monitor progress of long operations //private ProgressMonitor progressMonitor; // progress count // use glass pane to block input to components other than progressMonitor InitComponents(); // create dialogs for file selection jfc = new JFileChooser(Runtime.GetProperty("user.dir")); pageDialog = new OpenPageDialog(new Frame(), true); pageDialog.SetFileChooser(jfc); jfcLocation = new ParserPanel.JFileChooserLocation(jfc); tlp = new PennTreebankLanguagePack(); encoding = tlp.GetEncoding(); SetFont(); // create a timer timer = new Timer(OneSecond, new ParserPanel.TimerListener(this)); // for (un)highlighting text highlightStyle = new SimpleAttributeSet(); normalStyle = new SimpleAttributeSet(); StyleConstants.SetBackground(highlightStyle, Color.yellow); StyleConstants.SetBackground(normalStyle, textPane.GetBackground()); this.chooseJarParser = new JarFileChooser(".*\\.ser\\.gz", this); }
/// <summary> /// demoDP demonstrates turning a file into tokens and then parse /// trees. /// </summary> /// <remarks> /// demoDP demonstrates turning a file into tokens and then parse /// trees. Note that the trees are printed by calling pennPrint on /// the Tree object. It is also possible to pass a PrintWriter to /// pennPrint if you want to capture the output. /// This code will work with any supported language. /// </remarks> public static void DemoDP(LexicalizedParser lp, string filename) { // This option shows loading, sentence-segmenting and tokenizing // a file using DocumentPreprocessor. ITreebankLanguagePack tlp = lp.TreebankLanguagePack(); // a PennTreebankLanguagePack for English IGrammaticalStructureFactory gsf = null; if (tlp.SupportsGrammaticalStructures()) { gsf = tlp.GrammaticalStructureFactory(); } // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (IList <IHasWord> sentence in new DocumentPreprocessor(filename)) { Tree parse = lp.Apply(sentence); parse.PennPrint(); System.Console.Out.WriteLine(); if (gsf != null) { GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); ICollection tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); } } }
public TreeCollinizer(ITreebankLanguagePack tlp, bool deletePunct, bool fixCollinsBaseNP, int whOption) { this.tlp = tlp; this.deletePunct = deletePunct; this.fixCollinsBaseNP = fixCollinsBaseNP; this.whOption = whOption; }
private static void RemoveDeleteSplittersFromSplitters(ITreebankLanguagePack tlp, Options op) { if (op.trainOptions.deleteSplitters != null) { IList <string> deleted = new List <string>(); foreach (string del in op.trainOptions.deleteSplitters) { string baseDel = tlp.BasicCategory(del); bool checkBasic = del.Equals(baseDel); for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();) { string elem = it.Current; string baseElem = tlp.BasicCategory(elem); bool delStr = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del); if (delStr) { it.Remove(); deleted.Add(elem); } } } if (op.testOptions.verbose) { log.Info("Removed from vertical splitters: " + deleted); } } }
/// <summary> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. /// </summary> /// <remarks> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. Output is handled with a /// TreePrint object. Note that the options used when creating the /// TreePrint can determine what results to print out. Once again, /// one can capture the output by passing a PrintWriter to /// TreePrint.printTree. This code is for English. /// </remarks> public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words string[] sent = new string[] { "This", "is", "an", "easy", "sentence", "." }; IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent); Tree parse = lp.Apply(rawWords); parse.PennPrint(); System.Console.Out.WriteLine(); // This option shows loading and using an explicit tokenizer string sent2 = "This is another sentence."; ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty); ITokenizer <CoreLabel> tok = tokenizerFactory.GetTokenizer(new StringReader(sent2)); IList <CoreLabel> rawWords2 = tok.Tokenize(); parse = lp.Apply(rawWords2); ITreebankLanguagePack tlp = lp.TreebankLanguagePack(); // PennTreebankLanguagePack for English IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory(); GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); // You can also use a TreePrint object to print trees and dependencies TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.PrintTree(parse); }
public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup, IList <TreeNormalizer> tns) { this.tlp = tlp; this.nodeCleanup = nodeCleanup; root = tlp.StartSymbol(); Sharpen.Collections.AddAll(this.tns, tns); }
public NegraPennTreeReaderFactory(int nodeCleanup, bool treeNormalizerInsertNPinPP, bool treeNormalizerLeaveGF, ITreebankLanguagePack tlp) { // = 0; // = false; this.nodeCleanup = nodeCleanup; this.treeNormalizerInsertNPinPP = treeNormalizerInsertNPinPP; this.tlp = tlp; }
public NegraPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup) { // non-unary root this.tlp = tlp; this.nodeCleanup = nodeCleanup; emptyFilter = new _IPredicate_46(); aOverAFilter = new _IPredicate_56(); }
/// <summary>Stores the passed-in TreebankLanguagePack and sets up charset encodings.</summary> /// <param name="tlp">The treebank language pack to use</param> protected internal AbstractTreebankParserParams(ITreebankLanguagePack tlp) { // end class RemoveGFSubcategoryStripper this.tlp = tlp; inputEncoding = tlp.GetEncoding(); outputEncoding = tlp.GetEncoding(); generateOriginalDependencies = false; }
public virtual int GetBaseTag(int tag, ITreebankLanguagePack tlp) { if (tagsToBaseTags == null) { PopulateTagsToBaseTags(tlp); } return(tagsToBaseTags[tag]); }
public ParserAnnotator(string annotatorName, Properties props) { string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc); if (model == null) { throw new ArgumentException("No model specified for Parser annotator " + annotatorName); } this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false); string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags")); this.parser = LoadModel(model, Verbose, flags); this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1); string treeMapClass = props.GetProperty(annotatorName + ".treemap"); if (treeMapClass == null) { this.treeMap = null; } else { this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props); } this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1); this.kBest = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1); this.keepPunct = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true); string buildGraphsProperty = annotatorName + ".buildgraphs"; if (!this.parser.GetTLPParams().SupportsBasicDependencies()) { if (PropertiesUtils.GetBool(props, buildGraphsProperty)) { log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies"); } this.BuildGraphs = false; } else { this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true); } if (this.BuildGraphs) { bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false); parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies); ITreebankLanguagePack tlp = parser.GetTLPParams().TreebankLanguagePack(); IPredicate <string> punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter(); this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder()); } else { this.gsf = null; } this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1)); bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props); this.saveBinaryTrees = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary); this.noSquash = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false); this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
public ChineseMaxentLexicon(Options op, IIndex <string> wordIndex, IIndex <string> tagIndex, int featureLevel) { this.op = op; this.tlpParams = op.tlpParams; this.ctlp = op.tlpParams.TreebankLanguagePack(); this.wordIndex = wordIndex; this.tagIndex = tagIndex; this.featureLevel = featureLevel; }
public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup) { // public TueBaDZPennTreeNormalizer() { // this(new TueBaDZLanguagePack(), 0); // } this.tlp = tlp; this.nodeCleanup = nodeCleanup; root = tlp.StartSymbol(); }
public DybroFrenchHeadFinder(ITreebankLanguagePack tlp) : base(tlp) { //French POS: // A (adjective), ADV (adverb), C (conjunction and subordinating conjunction), CL (clitics), // CS (subordinating conjunction) but occurs only once!, // D (determiner), ET (foreign word), I (interjection), N (noun), // P (preposition), PREF (prefix), PRO (strong pronoun -- very confusing), V (verb), PUNC (punctuation) // There is also the expanded French CC tagset. // V, A, ADV, PRO, C, CL, N, D are all split into multiple tags. // http://www.linguist.univ-paris-diderot.fr/~mcandito/Publications/crabbecandi-taln2008-final.pdf // (perhaps you can find an English translation somewhere) nonTerminalInfo = Generics.NewHashMap(); // "sentence" nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; nonTerminalInfo["SENT"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // adjectival phrases nonTerminalInfo["AP"] = new string[][] { new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right", "ET" }, new string[] { "rightdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "rightdis", "ADV", "ADVWH" } }; // adverbial phrases nonTerminalInfo["AdP"] = new string[][] { new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // coordinated phrases nonTerminalInfo["COORD"] = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } }; // noun phrases nonTerminalInfo["NP"] = new string[][] { new string[] { "leftdis", "N", "NPP", "NC", "PRO", "PROWH", "PROREL" }, new string[] { "left", "NP" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left", "AP", "I", "VPpart" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "AdP", "ET" }, new string[] { "leftdis", "D", "DET", "DETWH" } }; // prepositional phrases nonTerminalInfo["PP"] = new string[][] { new string[] { "left", "P" }, new string[] { "left" } }; // verbal nucleus nonTerminalInfo["VN"] = new string[][] { new string[] { "right", "V", "VPinf" }, new string[] { "right" } }; // infinitive clauses nonTerminalInfo["VPinf"] = new string[][] { new string[] { "left", "VN" }, new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } }; // nonfinite clauses nonTerminalInfo["VPpart"] = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left", "VN" }, new string[] { "left" } }; // relative clauses nonTerminalInfo["Srel"] = new string[][] { new string[] { "right", "VN", "AP", "NP" }, new string[] { "right" } }; // subordinate clauses nonTerminalInfo["Ssub"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // parenthetical clauses nonTerminalInfo["Sint"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // adverbes //nonTerminalInfo.put("ADV", new String[][] {{"left", "ADV", "PP", "P"}}); // compound categories: start with MW: D, A, C, N, ADV, V, P, PRO, CL nonTerminalInfo["MWD"] = new string[][] { new string[] { "leftdis", "D", "DET", "DETWH" }, new string[] { "left" } }; nonTerminalInfo["MWA"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right" } }; nonTerminalInfo["MWC"] = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } }; nonTerminalInfo["MWN"] = new string[][] { new string[] { "rightdis", "N", "NPP", "NC" }, new string[] { "rightdis", "ET" }, new string[] { "right" } }; nonTerminalInfo["MWV"] = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } }; nonTerminalInfo["MWP"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "left" } }; nonTerminalInfo["MWPRO"] = new string[][] { new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left" } }; nonTerminalInfo["MWCL"] = new string[][] { new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "right" } }; nonTerminalInfo["MWADV"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left" } }; nonTerminalInfo["MWI"] = new string[][] { new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "P" }, new string[] { "left" } }; nonTerminalInfo["MWET"] = new string[][] { new string[] { "left", "ET" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "left" } }; //TODO: wsg2011: For phrasal nodes that lacked a label. nonTerminalInfo[FrenchXMLTreeReader.MissingPhrasal] = new string[][] { new string[] { "left" } }; }
public ExhaustiveDependencyParser(IDependencyGrammar dg, ILexicon lex, Options op, IIndex <string> wordIndex, IIndex <string> tagIndex) { this.dg = dg; this.lex = lex; this.op = op; this.tlp = op.Langpack(); this.wordIndex = wordIndex; this.tagIndex = tagIndex; tf = new LabeledScoredTreeFactory(); }
public ArabicRawTreeNormalizer(ATBArabicDataset _enclosing, PrintWriter outFile, PrintWriter flatFile) { this._enclosing = _enclosing; this.encodingMap = (this._enclosing.encoding == Dataset.Encoding.Utf8) ? new Buckwalter() : new Buckwalter(true); this.outfile = outFile; this.flatFile = flatFile; this.nullFilter = new ArabicTreeNormalizer.ArabicEmptyFilter(); this.aOverAFilter = new BobChrisTreeNormalizer.AOverAFilter(); this.tf = new LabeledScoredTreeFactory(); this.tlp = new ArabicTreebankLanguagePack(); }
public static void Main(string[] args) { ITreebankLangParserParams tlpParams = new ChineseTreebankParserParams(); ITreebankLanguagePack ctlp = tlpParams.TreebankLanguagePack(); Options op = new Options(tlpParams); TreeAnnotator ta = new TreeAnnotator(tlpParams.HeadFinder(), tlpParams, op); log.Info("Reading Trees..."); IFileFilter trainFilter = new NumberRangesFileFilter(args[1], true); Treebank trainTreebank = tlpParams.MemoryTreebank(); trainTreebank.LoadPath(args[0], trainFilter); log.Info("Annotating trees..."); ICollection <Tree> trainTrees = new List <Tree>(); foreach (Tree tree in trainTreebank) { trainTrees.Add(ta.TransformTree(tree)); } trainTreebank = null; // saves memory log.Info("Training lexicon..."); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); int featureLevel = DefaultFeatureLevel; if (args.Length > 3) { featureLevel = System.Convert.ToInt32(args[3]); } Edu.Stanford.Nlp.Parser.Lexparser.ChineseMaxentLexicon lex = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseMaxentLexicon(op, wordIndex, tagIndex, featureLevel); lex.InitializeTraining(trainTrees.Count); lex.Train(trainTrees); lex.FinishTraining(); log.Info("Testing"); IFileFilter testFilter = new NumberRangesFileFilter(args[2], true); Treebank testTreebank = tlpParams.MemoryTreebank(); testTreebank.LoadPath(args[0], testFilter); IList <TaggedWord> testWords = new List <TaggedWord>(); foreach (Tree t in testTreebank) { foreach (TaggedWord tw in t.TaggedYield()) { testWords.Add(tw); } } //testWords.addAll(t.taggedYield()); int[] totalAndCorrect = lex.TestOnTreebank(testWords); log.Info("done."); System.Console.Out.WriteLine(totalAndCorrect[1] + " correct out of " + totalAndCorrect[0] + " -- ACC: " + ((double)totalAndCorrect[1]) / totalAndCorrect[0]); }
// pcfgPE.printGoodBad(); private static IList <TaggedWord> CleanTags(IList <TaggedWord> twList, ITreebankLanguagePack tlp) { int sz = twList.Count; IList <TaggedWord> l = new List <TaggedWord>(sz); foreach (TaggedWord tw in twList) { TaggedWord tw2 = new TaggedWord(tw.Word(), tlp.BasicCategory(tw.Tag())); l.Add(tw2); } return(l); }
private void PopulateTagsToBaseTags(ITreebankLanguagePack tlp) { int total = tagIndex.Size(); tagsToBaseTags = new int[total]; for (int i = 0; i < total; i++) { string tag = tagIndex.Get(i); string baseTag = tlp.BasicCategory(tag); int j = tagIndex.AddToIndex(baseTag); tagsToBaseTags[i] = j; } }
public SpanishHeadFinder(ITreebankLanguagePack tlp) : base(tlp) { nonTerminalInfo = Generics.NewHashMap(); // "sentence" string[][] rootRules = new string[][] { new string[] { "right", "grup.verb", "s.a", "sn" }, new string[] { "left", "S" }, new string[] { "right", "sadv", "grup.adv", "neg", "interjeccio", "i", "sp", "grup.prep" }, InsertVerbs(new string[] { "rightdis" }, new string[] { "nc0s000", "nc0p000", "nc00000", "np00000", "rg", "rn" }) }; nonTerminalInfo[tlp.StartSymbol()] = rootRules; nonTerminalInfo["S"] = rootRules; nonTerminalInfo["sentence"] = rootRules; nonTerminalInfo["inc"] = rootRules; // adjectival phrases string[][] adjectivePhraseRules = new string[][] { new string[] { "leftdis", "grup.a", "s.a", "spec" } }; nonTerminalInfo["s.a"] = adjectivePhraseRules; nonTerminalInfo["sa"] = adjectivePhraseRules; nonTerminalInfo["grup.a"] = new string[][] { new string[] { "rightdis", "aq0000", "ao0000" }, InsertVerbs(new string[] { "right" }, new string[] { }), new string[] { "right", "rg", "rn" } }; // adverbial phrases nonTerminalInfo["sadv"] = new string[][] { new string[] { "left", "grup.adv", "sadv" } }; nonTerminalInfo["grup.adv"] = new string[][] { new string[] { "left", "conj" }, new string[] { "rightdis", "rg", "rn", "neg", "grup.adv" }, new string[] { "rightdis", "pr000000", "pi000000", "nc0s000", "nc0p000", "nc00000", "np00000" } }; nonTerminalInfo["neg"] = new string[][] { new string[] { "leftdis", "rg", "rn" } }; // noun phrases nonTerminalInfo["sn"] = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000" }, new string[] { "left", "grup.nom", "grup.w", "grup.z", "sn" }, new string[] { "leftdis", "spec" } }; nonTerminalInfo["grup.nom"] = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000", "np00000", "w", "grup.w" }, new string[] { "leftdis", "pi000000", "pd000000" }, new string[] { "left", "grup.nom", "sp" }, new string[] { "leftdis", "pn000000", "aq0000", "ao0000" }, new string[] { "left", "grup.a", "i", "grup.verb" }, new string[] { "leftdis", "grup.adv" } }; // verb phrases nonTerminalInfo["grup.verb"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] { }) }; nonTerminalInfo["infinitiu"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] { "infinitiu" }) }; nonTerminalInfo["gerundi"] = new string[][] { new string[] { "left", "vmg0000", "vag0000", "vsg0000", "gerundi" } }; nonTerminalInfo["participi"] = new string[][] { new string[] { "left", "aq", "vmp0000", "vap0000", "vsp0000", "grup.a" } }; // specifiers nonTerminalInfo["spec"] = new string[][] { new string[] { "left", "conj", "spec" }, new string[] { "leftdis", "da0000", "de0000", "di0000", "dd0000", "dp0000", "dn0000", "dt0000" }, new string[] { "leftdis", "z0", "grup.z" }, new string[] { "left", "rg", "rn" }, new string[] { "leftdis", "pt000000", "pe000000", "pd000000", "pp000000", "pi000000", "pn000000", "pr000000" }, new string[] { "left", "grup.adv", "w" } }; // entre A y B // etc. nonTerminalInfo["conj"] = new string[][] { new string[] { "leftdis", "cs", "cc" }, new string[] { "leftdis", "grup.cc", "grup.cs" }, new string[] { "left", "sp" } }; nonTerminalInfo["interjeccio"] = new string[][] { new string[] { "leftdis", "i", "nc0s000", "nc0p000", "nc00000", "np00000", "pi000000" }, new string[] { "left", "interjeccio" } }; nonTerminalInfo["relatiu"] = new string[][] { new string[] { "left", "pr000000" } }; // prepositional phrases nonTerminalInfo["sp"] = new string[][] { new string[] { "left", "prep", "sp" } }; nonTerminalInfo["prep"] = new string[][] { new string[] { "leftdis", "sp000", "prep", "grup.prep" } }; // custom categories nonTerminalInfo["grup.cc"] = new string[][] { new string[] { "left", "cs" } }; nonTerminalInfo["grup.cs"] = new string[][] { new string[] { "left", "cs" } }; nonTerminalInfo["grup.prep"] = new string[][] { new string[] { "left", "prep", "grup.prep", "s" } }; nonTerminalInfo["grup.pron"] = new string[][] { new string[] { "rightdis", "px000000" } }; nonTerminalInfo["grup.w"] = new string[][] { new string[] { "right", "w" }, new string[] { "leftdis", "z0" }, new string[] { "left" } }; nonTerminalInfo["grup.z"] = new string[][] { new string[] { "leftdis", "z0", "zu", "zp", "zd", "zm" }, new string[] { "right", "nc0s000", "nc0p000", "nc00000", "np00000" } }; }
public AbstractDependencyGrammar(ITreebankLanguagePack tlp, ITagProjection tagProjection, bool directional, bool useDistance, bool useCoarseDistance, Options op, IIndex <string> wordIndex, IIndex <string> tagIndex) { this.tlp = tlp; this.tagProjection = tagProjection; this.directional = directional; this.useDistance = useDistance; this.useCoarseDistance = useCoarseDistance; this.op = op; this.wordIndex = wordIndex; this.tagIndex = tagIndex; stopTW = new IntTaggedWord(IntTaggedWord.StopWordInt, IntTaggedWord.StopTagInt); wildTW = new IntTaggedWord(IntTaggedWord.AnyWordInt, IntTaggedWord.AnyTagInt); InitTagBins(); }
/// <summary>Adds a sentence final punctuation mark to sentences that lack one.</summary> /// <remarks> /// Adds a sentence final punctuation mark to sentences that lack one. /// This method adds a period (the first sentence final punctuation word /// in a parser language pack) to sentences that don't have one within /// the last 3 words (to allow for close parentheses, etc.). It checks /// tags for punctuation, if available, otherwise words. /// </remarks> /// <param name="sentence">The sentence to check</param> /// <param name="length">The length of the sentence (just to avoid recomputation)</param> private bool AddSentenceFinalPunctIfNeeded(IList <IHasWord> sentence, int length) { int start = length - 3; if (start < 0) { start = 0; } ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack(); for (int i = length - 1; i >= start; i--) { IHasWord item = sentence[i]; // An object (e.g., CoreLabel) can implement HasTag but not actually store // a tag so we need to check that there is something there for this case. // If there is, use only it, since word tokens can be ambiguous. string tag = null; if (item is IHasTag) { tag = ((IHasTag)item).Tag(); } if (tag != null && !tag.IsEmpty()) { if (tlp.IsSentenceFinalPunctuationTag(tag)) { return(false); } } else { string str = item.Word(); if (tlp.IsPunctuationWord(str)) { return(false); } } } // none found so add one. if (op.testOptions.verbose) { log.Info("Adding missing final punctuation to sentence."); } string[] sfpWords = tlp.SentenceFinalPunctuationWords(); if (sfpWords.Length > 0) { sentence.Add(new Word(sfpWords[0])); } return(true); }
public BikelChineseHeadFinder(ITreebankLanguagePack tlp) : base(tlp) { nonTerminalInfo = Generics.NewHashMap(); // these are first-cut rules defaultRule = new string[] { "right" }; // ROOT is not always unary for chinese -- PAIR is a special notation // that the Irish people use for non-unary ones.... nonTerminalInfo["ROOT"] = new string[][] { new string[] { "left", "IP" } }; nonTerminalInfo["PAIR"] = new string[][] { new string[] { "left", "IP" } }; // Major syntactic categories nonTerminalInfo["ADJP"] = new string[][] { new string[] { "right", "ADJP", "JJ" }, new string[] { "right", "AD", "NN", "CS" } }; nonTerminalInfo["ADVP"] = new string[][] { new string[] { "right", "ADVP", "AD" } }; nonTerminalInfo["CLP"] = new string[][] { new string[] { "right", "CLP", "M" } }; nonTerminalInfo["CP"] = new string[][] { new string[] { "right", "DEC", "SP" }, new string[] { "left", "ADVP", "CS" }, new string[] { "right", "CP", "IP" } }; nonTerminalInfo["DNP"] = new string[][] { new string[] { "right", "DNP", "DEG" }, new string[] { "right", "DEC" } }; nonTerminalInfo["DP"] = new string[][] { new string[] { "left", "DP", "DT" } }; nonTerminalInfo["DVP"] = new string[][] { new string[] { "right", "DVP", "DEV" } }; nonTerminalInfo["FRAG"] = new string[][] { new string[] { "right", "VV", "NR", "NN" } }; nonTerminalInfo["INTJ"] = new string[][] { new string[] { "right", "INTJ", "IJ" } }; nonTerminalInfo["IP"] = new string[][] { new string[] { "right", "IP", "VP" }, new string[] { "right", "VV" } }; nonTerminalInfo["LCP"] = new string[][] { new string[] { "right", "LCP", "LC" } }; nonTerminalInfo["LST"] = new string[][] { new string[] { "left", "LST", "CD", "OD" } }; nonTerminalInfo["NP"] = new string[][] { new string[] { "right", "NP", "NN", "NT", "NR", "QP" } }; nonTerminalInfo["PP"] = new string[][] { new string[] { "left", "PP", "P" } }; nonTerminalInfo["PRN"] = new string[][] { new string[] { "right", "NP", "IP", "VP", "NT", "NR", "NN" } }; nonTerminalInfo["QP"] = new string[][] { new string[] { "right", "QP", "CLP", "CD", "OD" } }; nonTerminalInfo["UCP"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["VP"] = new string[][] { new string[] { "left", "VP", "VA", "VC", "VE", "VV", "BA", "LB", "VCD", "VSB", "VRD", "VNV", "VCP" } }; nonTerminalInfo["VCD"] = new string[][] { new string[] { "right", "VCD", "VV", "VA", "VC", "VE" } }; nonTerminalInfo["VCP"] = new string[][] { new string[] { "right", "VCP", "VV", "VA", "VC", "VE" } }; nonTerminalInfo["VRD"] = new string[][] { new string[] { "right", "VRD", "VV", "VA", "VC", "VE" } }; nonTerminalInfo["VSB"] = new string[][] { new string[] { "right", "VSB", "VV", "VA", "VC", "VE" } }; nonTerminalInfo["VNV"] = new string[][] { new string[] { "right", "VNV", "VV", "VA", "VC", "VE" } }; nonTerminalInfo["VPT"] = new string[][] { new string[] { "right", "VNV", "VV", "VA", "VC", "VE" } }; // VNV typo for VPT? None of either in ctb4. nonTerminalInfo["WHNP"] = new string[][] { new string[] { "right", "WHNP", "NP", "NN", "NT", "NR", "QP" } }; nonTerminalInfo["WHPP"] = new string[][] { new string[] { "left", "WHPP", "PP", "P" } }; // some POS tags apparently sit where phrases are supposed to be nonTerminalInfo["CD"] = new string[][] { new string[] { "right", "CD" } }; nonTerminalInfo["NN"] = new string[][] { new string[] { "right", "NN" } }; nonTerminalInfo["NR"] = new string[][] { new string[] { "right", "NR" } }; // parsing. It shouldn't affect anything else because heads of preterminals are not // generally queried - GMA nonTerminalInfo["VV"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VA"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VC"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VE"] = new string[][] { new string[] { "left" } }; }
public static ICollection <Constituent> SimplifyConstituents(ITreebankLanguagePack tlp, ICollection <Constituent> constituents) { ICollection <Constituent> newConstituents = new HashSet <Constituent>(); foreach (Constituent con in constituents) { if (!(con is LabeledConstituent)) { throw new AssertionError("Unexpected constituent type " + con.GetType()); } LabeledConstituent labeled = (LabeledConstituent)con; newConstituents.Add(new LabeledConstituent(labeled.Start(), labeled.End(), tlp.BasicCategory(labeled.Value()))); } return(newConstituents); }
/// <summary>Build a custom binarizer for Trees.</summary> /// <param name="hf">the HeadFinder to use in binarization</param> /// <param name="tlp">the TreebankLanguagePack to use</param> /// <param name="insideFactor">whether to do inside markovization</param> /// <param name="markovFactor">whether to markovize the binary rules</param> /// <param name="markovOrder">the markov order to use; only relevant with markovFactor=true</param> /// <param name="useWrappingLabels">whether to use state names (labels) that allow wrapping from right to left</param> /// <param name="unaryAtTop"> /// Whether to actually materialize the unary that rewrites /// a passive state to the active rule at the top of an original local /// tree. This is used only when compaction is happening /// </param> /// <param name="selectiveSplitThreshold">if selective split is used, this will be the threshold used to decide which state splits to keep</param> /// <param name="markFinalStates">whether or not to make the state names (labels) of the final active states distinctive</param> /// <param name="noRebinarization">if true, a node which already has exactly two children is not altered</param> public TreeBinarizer(IHeadFinder hf, ITreebankLanguagePack tlp, bool insideFactor, bool markovFactor, int markovOrder, bool useWrappingLabels, bool unaryAtTop, double selectiveSplitThreshold, bool markFinalStates, bool simpleLabels, bool noRebinarization ) { this.hf = hf; this.tlp = tlp; this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.insideFactor = insideFactor; this.markovFactor = markovFactor; this.markovOrder = markovOrder; this.useWrappingLabels = useWrappingLabels; this.unaryAtTop = unaryAtTop; this.selectiveSplitThreshold = selectiveSplitThreshold; this.markFinalStates = markFinalStates; this.simpleLabels = simpleLabels; this.noRebinarization = noRebinarization; }
/// <param name="tlp"> /// TreebankLanguagePack describing the language being /// parsed /// </param> /// <param name="labels"> /// A list of possible dependency relation labels, with /// the ROOT relation label as the first element /// </param> public ParsingSystem(ITreebankLanguagePack tlp, IList <string> labels, IList <string> transitions, bool verbose) { // TODO pass labels as Map<String, GrammaticalRelation>; use GrammaticalRelation throughout this.tlp = tlp; this.labels = new List <string>(labels); //NOTE: assume that the first element of labels is rootLabel rootLabel = labels[0]; this.transitions = transitions; if (verbose) { log.Info(Config.Separator); log.Info("#Transitions: " + NumTransitions()); log.Info("#Labels: " + labels.Count); log.Info("ROOTLABEL: " + rootLabel); } }
private void SetProperties(Properties props) { trainingThreads = PropertiesUtils.GetInt(props, "trainingThreads", trainingThreads); wordCutOff = PropertiesUtils.GetInt(props, "wordCutOff", wordCutOff); initRange = PropertiesUtils.GetDouble(props, "initRange", initRange); maxIter = PropertiesUtils.GetInt(props, "maxIter", maxIter); batchSize = PropertiesUtils.GetInt(props, "batchSize", batchSize); adaEps = PropertiesUtils.GetDouble(props, "adaEps", adaEps); adaAlpha = PropertiesUtils.GetDouble(props, "adaAlpha", adaAlpha); regParameter = PropertiesUtils.GetDouble(props, "regParameter", regParameter); dropProb = PropertiesUtils.GetDouble(props, "dropProb", dropProb); hiddenSize = PropertiesUtils.GetInt(props, "hiddenSize", hiddenSize); embeddingSize = PropertiesUtils.GetInt(props, "embeddingSize", embeddingSize); numPreComputed = PropertiesUtils.GetInt(props, "numPreComputed", numPreComputed); evalPerIter = PropertiesUtils.GetInt(props, "evalPerIter", evalPerIter); clearGradientsPerIter = PropertiesUtils.GetInt(props, "clearGradientsPerIter", clearGradientsPerIter); saveIntermediate = PropertiesUtils.GetBool(props, "saveIntermediate", saveIntermediate); unlabeled = PropertiesUtils.GetBool(props, "unlabeled", unlabeled); cPOS = PropertiesUtils.GetBool(props, "cPOS", cPOS); noPunc = PropertiesUtils.GetBool(props, "noPunc", noPunc); doWordEmbeddingGradUpdate = PropertiesUtils.GetBool(props, "doWordEmbeddingGradUpdate", doWordEmbeddingGradUpdate); // Runtime parsing options sentenceDelimiter = PropertiesUtils.GetString(props, "sentenceDelimiter", sentenceDelimiter); tagger = PropertiesUtils.GetString(props, "tagger.model", tagger); string escaperClass = props.GetProperty("escaper"); escaper = escaperClass != null?ReflectionLoading.LoadByReflection(escaperClass) : null; // Language options language = props.Contains("language") ? GetLanguage(props.GetProperty("language")) : language; tlp = [email protected](); // if a tlp was specified go with that string tlpCanonicalName = props.GetProperty("tlp"); if (tlpCanonicalName != null) { try { tlp = ReflectionLoading.LoadByReflection(tlpCanonicalName); System.Console.Error.WriteLine("Loaded TreebankLanguagePack: " + tlpCanonicalName); } catch (Exception) { System.Console.Error.WriteLine("Error: Failed to load TreebankLanguagePack: " + tlpCanonicalName); } } }
// static only /// <summary> /// Counts how many spans are present in goldTree, including /// preterminals, but not present in guessTree, along with how many /// spans are present in guessTree and not goldTree. /// </summary> /// <remarks> /// Counts how many spans are present in goldTree, including /// preterminals, but not present in guessTree, along with how many /// spans are present in guessTree and not goldTree. Each one counts /// as an error, meaning that something like a mislabeled span or /// preterminal counts as two errors. /// <br /> /// Span labels are compared using the basicCategory() function /// from the passed in TreebankLanguagePack. /// </remarks> public static int CountSpanErrors(ITreebankLanguagePack tlp, Tree goldTree, Tree guessTree) { ICollection <Constituent> goldConstituents = goldTree.Constituents(LabeledConstituent.Factory()); ICollection <Constituent> guessConstituents = guessTree.Constituents(LabeledConstituent.Factory()); ICollection <Constituent> simpleGoldConstituents = SimplifyConstituents(tlp, goldConstituents); ICollection <Constituent> simpleGuessConstituents = SimplifyConstituents(tlp, guessConstituents); //System.out.println(simpleGoldConstituents); //System.out.println(simpleGuessConstituents); int errors = 0; foreach (Constituent gold in simpleGoldConstituents) { if (!simpleGuessConstituents.Contains(gold)) { ++errors; } } foreach (Constituent guess in simpleGuessConstituents) { if (!simpleGoldConstituents.Contains(guess)) { ++errors; } } // The spans returned by constituents() doesn't include the // preterminals, so we need to count those ourselves now IList <TaggedWord> goldWords = goldTree.TaggedYield(); IList <TaggedWord> guessWords = guessTree.TaggedYield(); int len = Math.Min(goldWords.Count, guessWords.Count); for (int i = 0; i < len; ++i) { string goldTag = tlp.BasicCategory(goldWords[i].Tag()); string guessTag = tlp.BasicCategory(guessWords[i].Tag()); if (!goldTag.Equals(guessTag)) { // we count one error for each span that is present in the // gold and not in the guess, and one error for each span that // is present in the guess and not the gold, so this counts as // two errors errors += 2; } } return(errors); }
public SunJurafskyChineseHeadFinder(ITreebankLanguagePack tlp) : base(tlp) { defaultRule = new string[] { "right" }; nonTerminalInfo = Generics.NewHashMap(); nonTerminalInfo["ROOT"] = new string[][] { new string[] { "left", "IP" } }; nonTerminalInfo["PAIR"] = new string[][] { new string[] { "left", "IP" } }; nonTerminalInfo["ADJP"] = new string[][] { new string[] { "right", "ADJP", "JJ", "AD" } }; nonTerminalInfo["ADVP"] = new string[][] { new string[] { "right", "ADVP", "AD", "CS", "JJ", "NP", "PP", "P", "VA", "VV" } }; nonTerminalInfo["CLP"] = new string[][] { new string[] { "right", "CLP", "M", "NN", "NP" } }; nonTerminalInfo["CP"] = new string[][] { new string[] { "right", "CP", "IP", "VP" } }; nonTerminalInfo["DNP"] = new string[][] { new string[] { "right", "DEG", "DNP", "DEC", "QP" } }; nonTerminalInfo["DP"] = new string[][] { new string[] { "left", "M", "DP", "DT", "OD" } }; nonTerminalInfo["DVP"] = new string[][] { new string[] { "right", "DEV", "AD", "VP" } }; nonTerminalInfo["IP"] = new string[][] { new string[] { "right", "VP", "IP", "NP" } }; nonTerminalInfo["LCP"] = new string[][] { new string[] { "right", "LCP", "LC" } }; nonTerminalInfo["LST"] = new string[][] { new string[] { "right", "CD", "NP", "QP" } }; nonTerminalInfo["NP"] = new string[][] { new string[] { "right", "NP", "NN", "IP", "NR", "NT" } }; nonTerminalInfo["PP"] = new string[][] { new string[] { "left", "P", "PP" } }; nonTerminalInfo["PRN"] = new string[][] { new string[] { "left", "PU" } }; nonTerminalInfo["QP"] = new string[][] { new string[] { "right", "QP", "CLP", "CD" } }; nonTerminalInfo["UCP"] = new string[][] { new string[] { "left", "IP", "NP", "VP" } }; nonTerminalInfo["VCD"] = new string[][] { new string[] { "left", "VV", "VA", "VE" } }; nonTerminalInfo["VP"] = new string[][] { new string[] { "left", "VE", "VC", "VV", "VNV", "VPT", "VRD", "VSB", "VCD", "VP" } }; nonTerminalInfo["VPT"] = new string[][] { new string[] { "left", "VA", "VV" } }; nonTerminalInfo["VCP"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VNV"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VRD"] = new string[][] { new string[] { "left", "VV", "VA" } }; nonTerminalInfo["VSB"] = new string[][] { new string[] { "right", "VV", "VE" } }; nonTerminalInfo["FRAG"] = new string[][] { new string[] { "right", "VV", "NN" } }; //FRAG seems only to be used for bits at the beginnings of articles: "Xinwenshe<DATE>" and "(wan)" // some POS tags apparently sit where phrases are supposed to be nonTerminalInfo["CD"] = new string[][] { new string[] { "right", "CD" } }; nonTerminalInfo["NN"] = new string[][] { new string[] { "right", "NN" } }; nonTerminalInfo["NR"] = new string[][] { new string[] { "right", "NR" } }; // I'm adding these POS tags to do primitive morphology for character-level // parsing. It shouldn't affect anything else because heads of preterminals are not // generally queried - GMA nonTerminalInfo["VV"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VA"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VC"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VE"] = new string[][] { new string[] { "left" } }; }
public CollinsHeadFinder(ITreebankLanguagePack tlp, params string[] categoriesToAvoid) : base(tlp, categoriesToAvoid) { nonTerminalInfo = Generics.NewHashMap(); // This version from Collins' diss (1999: 236-238) nonTerminalInfo["ADJP"] = new string[][] { new string[] { "left", "NNS", "QP", "NN", "$", "ADVP", "JJ", "VBN", "VBG", "ADJP", "JJR", "NP", "JJS", "DT", "FW", "RBR", "RBS", "SBAR", "RB" } }; nonTerminalInfo["ADVP"] = new string[][] { new string[] { "right", "RB", "RBR", "RBS", "FW", "ADVP", "TO", "CD", "JJR", "JJ", "IN", "NP", "JJS", "NN" } }; nonTerminalInfo["CONJP"] = new string[][] { new string[] { "right", "CC", "RB", "IN" } }; nonTerminalInfo["FRAG"] = new string[][] { new string[] { "right" } }; // crap nonTerminalInfo["INTJ"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["LST"] = new string[][] { new string[] { "right", "LS", ":" } }; nonTerminalInfo["NAC"] = new string[][] { new string[] { "left", "NN", "NNS", "NNP", "NNPS", "NP", "NAC", "EX", "$", "CD", "QP", "PRP", "VBG", "JJ", "JJS", "JJR", "ADJP", "FW" } }; nonTerminalInfo["NX"] = new string[][] { new string[] { "left" } }; // crap nonTerminalInfo["PP"] = new string[][] { new string[] { "right", "IN", "TO", "VBG", "VBN", "RP", "FW" } }; // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite))) nonTerminalInfo["PRN"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["PRT"] = new string[][] { new string[] { "right", "RP" } }; nonTerminalInfo["QP"] = new string[][] { new string[] { "left", "$", "IN", "NNS", "NN", "JJ", "RB", "DT", "CD", "NCD", "QP", "JJR", "JJS" } }; nonTerminalInfo["RRC"] = new string[][] { new string[] { "right", "VP", "NP", "ADVP", "ADJP", "PP" } }; nonTerminalInfo["S"] = new string[][] { new string[] { "left", "TO", "IN", "VP", "S", "SBAR", "ADJP", "UCP", "NP" } }; nonTerminalInfo["SBAR"] = new string[][] { new string[] { "left", "WHNP", "WHPP", "WHADVP", "WHADJP", "IN", "DT", "S", "SQ", "SINV", "SBAR", "FRAG" } }; nonTerminalInfo["SBARQ"] = new string[][] { new string[] { "left", "SQ", "S", "SINV", "SBARQ", "FRAG" } }; nonTerminalInfo["SINV"] = new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VP", "S", "SINV", "ADJP", "NP" } }; nonTerminalInfo["SQ"] = new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VP", "SQ" } }; nonTerminalInfo["UCP"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["VP"] = new string[][] { new string[] { "left", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "AUX", "AUXG", "VP", "ADJP", "NN", "NNS", "NP" } }; nonTerminalInfo["WHADJP"] = new string[][] { new string[] { "left", "CC", "WRB", "JJ", "ADJP" } }; nonTerminalInfo["WHADVP"] = new string[][] { new string[] { "right", "CC", "WRB" } }; nonTerminalInfo["WHNP"] = new string[][] { new string[] { "left", "WDT", "WP", "WP$", "WHADJP", "WHPP", "WHNP" } }; nonTerminalInfo["WHPP"] = new string[][] { new string[] { "right", "IN", "TO", "FW" } }; nonTerminalInfo["X"] = new string[][] { new string[] { "right" } }; // crap rule nonTerminalInfo["NP"] = new string[][] { new string[] { "rightdis", "NN", "NNP", "NNPS", "NNS", "NX", "POS", "JJR" }, new string[] { "left", "NP" }, new string[] { "rightdis", "$", "ADJP", "PRN" }, new string[] { "right", "CD" }, new string[] { "rightdis", "JJ", "JJS", "RB", "QP" } }; nonTerminalInfo["TYPO"] = new string[][] { new string[] { "left" } }; // another crap rule, for Brown (Roger) nonTerminalInfo["EDITED"] = new string[][] { new string[] { "left" } }; // crap rule for Switchboard (if don't delete EDITED nodes) nonTerminalInfo["XS"] = new string[][] { new string[] { "right", "IN" } }; }
public BobChrisTreeNormalizer(ITreebankLanguagePack tlp) { this.tlp = tlp; }
public CategoryAndFunctionStringFunction(ITreebankLanguagePack tlp) { this.tlp = tlp; }