Пример #1
0
 /// <summary>Creates new form ParserPanel</summary>
 public ParserPanel()
 {
     // constants for language specification
     // one second in milliseconds
     // parser takes approximately a minute to load
     // parser takes 5-60 seconds to parse a sentence
     // constants for finding nearest sentence boundary
     // for highlighting
     // worker threads to handle long operations
     // to monitor progress of long operations
     //private ProgressMonitor progressMonitor;
     // progress count
     // use glass pane to block input to components other than progressMonitor
     InitComponents();
     // create dialogs for file selection
     jfc        = new JFileChooser(Runtime.GetProperty("user.dir"));
     pageDialog = new OpenPageDialog(new Frame(), true);
     pageDialog.SetFileChooser(jfc);
     jfcLocation = new ParserPanel.JFileChooserLocation(jfc);
     tlp         = new PennTreebankLanguagePack();
     encoding    = tlp.GetEncoding();
     SetFont();
     // create a timer
     timer = new Timer(OneSecond, new ParserPanel.TimerListener(this));
     // for (un)highlighting text
     highlightStyle = new SimpleAttributeSet();
     normalStyle    = new SimpleAttributeSet();
     StyleConstants.SetBackground(highlightStyle, Color.yellow);
     StyleConstants.SetBackground(normalStyle, textPane.GetBackground());
     this.chooseJarParser = new JarFileChooser(".*\\.ser\\.gz", this);
 }
Пример #2
0
        /// <summary>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.
        /// </summary>
        /// <remarks>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.  Note that the trees are printed by calling pennPrint on
        /// the Tree object.  It is also possible to pass a PrintWriter to
        /// pennPrint if you want to capture the output.
        /// This code will work with any supported language.
        /// </remarks>
        public static void DemoDP(LexicalizedParser lp, string filename)
        {
            // This option shows loading, sentence-segmenting and tokenizing
            // a file using DocumentPreprocessor.
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // a PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = null;

            if (tlp.SupportsGrammaticalStructures())
            {
                gsf = tlp.GrammaticalStructureFactory();
            }
            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (IList <IHasWord> sentence in new DocumentPreprocessor(filename))
            {
                Tree parse = lp.Apply(sentence);
                parse.PennPrint();
                System.Console.Out.WriteLine();
                if (gsf != null)
                {
                    GrammaticalStructure gs  = gsf.NewGrammaticalStructure(parse);
                    ICollection          tdl = gs.TypedDependenciesCCprocessed();
                    System.Console.Out.WriteLine(tdl);
                    System.Console.Out.WriteLine();
                }
            }
        }
 public TreeCollinizer(ITreebankLanguagePack tlp, bool deletePunct, bool fixCollinsBaseNP, int whOption)
 {
     this.tlp              = tlp;
     this.deletePunct      = deletePunct;
     this.fixCollinsBaseNP = fixCollinsBaseNP;
     this.whOption         = whOption;
 }
Пример #4
0
 private static void RemoveDeleteSplittersFromSplitters(ITreebankLanguagePack tlp, Options op)
 {
     if (op.trainOptions.deleteSplitters != null)
     {
         IList <string> deleted = new List <string>();
         foreach (string del in op.trainOptions.deleteSplitters)
         {
             string baseDel    = tlp.BasicCategory(del);
             bool   checkBasic = del.Equals(baseDel);
             for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();)
             {
                 string elem     = it.Current;
                 string baseElem = tlp.BasicCategory(elem);
                 bool   delStr   = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del);
                 if (delStr)
                 {
                     it.Remove();
                     deleted.Add(elem);
                 }
             }
         }
         if (op.testOptions.verbose)
         {
             log.Info("Removed from vertical splitters: " + deleted);
         }
     }
 }
Пример #5
0
        /// <summary>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.
        /// </summary>
        /// <remarks>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.  Output is handled with a
        /// TreePrint object.  Note that the options used when creating the
        /// TreePrint can determine what results to print out.  Once again,
        /// one can capture the output by passing a PrintWriter to
        /// TreePrint.printTree. This code is for English.
        /// </remarks>
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            string[]          sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
            IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent);
            Tree parse = lp.Apply(rawWords);

            parse.PennPrint();
            System.Console.Out.WriteLine();
            // This option shows loading and using an explicit tokenizer
            string sent2 = "This is another sentence.";
            ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty);
            ITokenizer <CoreLabel>        tok       = tokenizerFactory.GetTokenizer(new StringReader(sent2));
            IList <CoreLabel>             rawWords2 = tok.Tokenize();

            parse = lp.Apply(rawWords2);
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory();
            GrammaticalStructure         gs  = gsf.NewGrammaticalStructure(parse);
            IList <TypedDependency>      tdl = gs.TypedDependenciesCCprocessed();

            System.Console.Out.WriteLine(tdl);
            System.Console.Out.WriteLine();
            // You can also use a TreePrint object to print trees and dependencies
            TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.PrintTree(parse);
        }
 public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup, IList <TreeNormalizer> tns)
 {
     this.tlp         = tlp;
     this.nodeCleanup = nodeCleanup;
     root             = tlp.StartSymbol();
     Sharpen.Collections.AddAll(this.tns, tns);
 }
Пример #7
0
 public NegraPennTreeReaderFactory(int nodeCleanup, bool treeNormalizerInsertNPinPP, bool treeNormalizerLeaveGF, ITreebankLanguagePack tlp)
 {
     // = 0;
     // = false;
     this.nodeCleanup = nodeCleanup;
     this.treeNormalizerInsertNPinPP = treeNormalizerInsertNPinPP;
     this.tlp = tlp;
 }
Пример #8
0
 public NegraPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup)
 {
     // non-unary root
     this.tlp         = tlp;
     this.nodeCleanup = nodeCleanup;
     emptyFilter      = new _IPredicate_46();
     aOverAFilter     = new _IPredicate_56();
 }
Пример #9
0
 /// <summary>Stores the passed-in TreebankLanguagePack and sets up charset encodings.</summary>
 /// <param name="tlp">The treebank language pack to use</param>
 protected internal AbstractTreebankParserParams(ITreebankLanguagePack tlp)
 {
     // end class RemoveGFSubcategoryStripper
     this.tlp       = tlp;
     inputEncoding  = tlp.GetEncoding();
     outputEncoding = tlp.GetEncoding();
     generateOriginalDependencies = false;
 }
Пример #10
0
 public virtual int GetBaseTag(int tag, ITreebankLanguagePack tlp)
 {
     if (tagsToBaseTags == null)
     {
         PopulateTagsToBaseTags(tlp);
     }
     return(tagsToBaseTags[tag]);
 }
Пример #11
0
        public ParserAnnotator(string annotatorName, Properties props)
        {
            string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc);

            if (model == null)
            {
                throw new ArgumentException("No model specified for Parser annotator " + annotatorName);
            }
            this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false);
            string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags"));
            this.parser            = LoadModel(model, Verbose, flags);
            this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1);
            string treeMapClass = props.GetProperty(annotatorName + ".treemap");

            if (treeMapClass == null)
            {
                this.treeMap = null;
            }
            else
            {
                this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props);
            }
            this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1);
            this.kBest        = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1);
            this.keepPunct    = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true);
            string buildGraphsProperty = annotatorName + ".buildgraphs";

            if (!this.parser.GetTLPParams().SupportsBasicDependencies())
            {
                if (PropertiesUtils.GetBool(props, buildGraphsProperty))
                {
                    log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies");
                }
                this.BuildGraphs = false;
            }
            else
            {
                this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true);
            }
            if (this.BuildGraphs)
            {
                bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false);
                parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies);
                ITreebankLanguagePack tlp         = parser.GetTLPParams().TreebankLanguagePack();
                IPredicate <string>   punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter();
                this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder());
            }
            else
            {
                this.gsf = null;
            }
            this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1));
            bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props);

            this.saveBinaryTrees   = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary);
            this.noSquash          = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false);
            this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }
Пример #12
0
 public ChineseMaxentLexicon(Options op, IIndex <string> wordIndex, IIndex <string> tagIndex, int featureLevel)
 {
     this.op           = op;
     this.tlpParams    = op.tlpParams;
     this.ctlp         = op.tlpParams.TreebankLanguagePack();
     this.wordIndex    = wordIndex;
     this.tagIndex     = tagIndex;
     this.featureLevel = featureLevel;
 }
 public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup)
 {
     //  public TueBaDZPennTreeNormalizer() {
     //    this(new TueBaDZLanguagePack(), 0);
     //  }
     this.tlp         = tlp;
     this.nodeCleanup = nodeCleanup;
     root             = tlp.StartSymbol();
 }
Пример #14
0
 public DybroFrenchHeadFinder(ITreebankLanguagePack tlp)
     : base(tlp)
 {
     //French POS:
     // A (adjective), ADV (adverb), C (conjunction and subordinating conjunction), CL (clitics),
     // CS (subordinating conjunction) but occurs only once!,
     // D (determiner), ET (foreign word), I (interjection), N (noun),
     // P (preposition), PREF (prefix), PRO (strong pronoun -- very confusing), V (verb), PUNC (punctuation)
     // There is also the expanded French CC tagset.
     // V, A, ADV, PRO, C, CL, N, D are all split into multiple tags.
     // http://www.linguist.univ-paris-diderot.fr/~mcandito/Publications/crabbecandi-taln2008-final.pdf
     // (perhaps you can find an English translation somewhere)
     nonTerminalInfo = Generics.NewHashMap();
     // "sentence"
     nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     nonTerminalInfo["SENT"]            = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // adjectival phrases
     nonTerminalInfo["AP"] = new string[][] { new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right", "ET" }, new string[] { "rightdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "rightdis", "ADV", "ADVWH" } };
     // adverbial phrases
     nonTerminalInfo["AdP"] = new string[][] { new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // coordinated phrases
     nonTerminalInfo["COORD"] = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } };
     // noun phrases
     nonTerminalInfo["NP"] = new string[][] { new string[] { "leftdis", "N", "NPP", "NC", "PRO", "PROWH", "PROREL" }, new string[] { "left", "NP" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left", "AP", "I", "VPpart" }, new
                                              string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "AdP", "ET" }, new string[] { "leftdis", "D", "DET", "DETWH" } };
     // prepositional phrases
     nonTerminalInfo["PP"] = new string[][] { new string[] { "left", "P" }, new string[] { "left" } };
     // verbal nucleus
     nonTerminalInfo["VN"] = new string[][] { new string[] { "right", "V", "VPinf" }, new string[] { "right" } };
     // infinitive clauses
     nonTerminalInfo["VPinf"] = new string[][] { new string[] { "left", "VN" }, new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } };
     // nonfinite clauses
     nonTerminalInfo["VPpart"] = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left", "VN" }, new string[] { "left" } };
     // relative clauses
     nonTerminalInfo["Srel"] = new string[][] { new string[] { "right", "VN", "AP", "NP" }, new string[] { "right" } };
     // subordinate clauses
     nonTerminalInfo["Ssub"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // parenthetical clauses
     nonTerminalInfo["Sint"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // adverbes
     //nonTerminalInfo.put("ADV", new String[][] {{"left", "ADV", "PP", "P"}});
     // compound categories: start with MW: D, A, C, N, ADV, V, P, PRO, CL
     nonTerminalInfo["MWD"]   = new string[][] { new string[] { "leftdis", "D", "DET", "DETWH" }, new string[] { "left" } };
     nonTerminalInfo["MWA"]   = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right" } };
     nonTerminalInfo["MWC"]   = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } };
     nonTerminalInfo["MWN"]   = new string[][] { new string[] { "rightdis", "N", "NPP", "NC" }, new string[] { "rightdis", "ET" }, new string[] { "right" } };
     nonTerminalInfo["MWV"]   = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } };
     nonTerminalInfo["MWP"]   = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "left" } };
     nonTerminalInfo["MWPRO"] = new string[][] { new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left" } };
     nonTerminalInfo["MWCL"]  = new string[][] { new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "right" } };
     nonTerminalInfo["MWADV"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left" } };
     nonTerminalInfo["MWI"]   = new string[][] { new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "P" }, new string[] { "left" } };
     nonTerminalInfo["MWET"]  = new string[][] { new string[] { "left", "ET" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "left" } };
     //TODO: wsg2011: For phrasal nodes that lacked a label.
     nonTerminalInfo[FrenchXMLTreeReader.MissingPhrasal] = new string[][] { new string[] { "left" } };
 }
 public ExhaustiveDependencyParser(IDependencyGrammar dg, ILexicon lex, Options op, IIndex <string> wordIndex, IIndex <string> tagIndex)
 {
     this.dg        = dg;
     this.lex       = lex;
     this.op        = op;
     this.tlp       = op.Langpack();
     this.wordIndex = wordIndex;
     this.tagIndex  = tagIndex;
     tf             = new LabeledScoredTreeFactory();
 }
 public ArabicRawTreeNormalizer(ATBArabicDataset _enclosing, PrintWriter outFile, PrintWriter flatFile)
 {
     this._enclosing   = _enclosing;
     this.encodingMap  = (this._enclosing.encoding == Dataset.Encoding.Utf8) ? new Buckwalter() : new Buckwalter(true);
     this.outfile      = outFile;
     this.flatFile     = flatFile;
     this.nullFilter   = new ArabicTreeNormalizer.ArabicEmptyFilter();
     this.aOverAFilter = new BobChrisTreeNormalizer.AOverAFilter();
     this.tf           = new LabeledScoredTreeFactory();
     this.tlp          = new ArabicTreebankLanguagePack();
 }
Пример #17
0
        public static void Main(string[] args)
        {
            ITreebankLangParserParams tlpParams = new ChineseTreebankParserParams();
            ITreebankLanguagePack     ctlp      = tlpParams.TreebankLanguagePack();
            Options       op = new Options(tlpParams);
            TreeAnnotator ta = new TreeAnnotator(tlpParams.HeadFinder(), tlpParams, op);

            log.Info("Reading Trees...");
            IFileFilter trainFilter   = new NumberRangesFileFilter(args[1], true);
            Treebank    trainTreebank = tlpParams.MemoryTreebank();

            trainTreebank.LoadPath(args[0], trainFilter);
            log.Info("Annotating trees...");
            ICollection <Tree> trainTrees = new List <Tree>();

            foreach (Tree tree in trainTreebank)
            {
                trainTrees.Add(ta.TransformTree(tree));
            }
            trainTreebank = null;
            // saves memory
            log.Info("Training lexicon...");
            IIndex <string> wordIndex    = new HashIndex <string>();
            IIndex <string> tagIndex     = new HashIndex <string>();
            int             featureLevel = DefaultFeatureLevel;

            if (args.Length > 3)
            {
                featureLevel = System.Convert.ToInt32(args[3]);
            }
            Edu.Stanford.Nlp.Parser.Lexparser.ChineseMaxentLexicon lex = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseMaxentLexicon(op, wordIndex, tagIndex, featureLevel);
            lex.InitializeTraining(trainTrees.Count);
            lex.Train(trainTrees);
            lex.FinishTraining();
            log.Info("Testing");
            IFileFilter testFilter   = new NumberRangesFileFilter(args[2], true);
            Treebank    testTreebank = tlpParams.MemoryTreebank();

            testTreebank.LoadPath(args[0], testFilter);
            IList <TaggedWord> testWords = new List <TaggedWord>();

            foreach (Tree t in testTreebank)
            {
                foreach (TaggedWord tw in t.TaggedYield())
                {
                    testWords.Add(tw);
                }
            }
            //testWords.addAll(t.taggedYield());
            int[] totalAndCorrect = lex.TestOnTreebank(testWords);
            log.Info("done.");
            System.Console.Out.WriteLine(totalAndCorrect[1] + " correct out of " + totalAndCorrect[0] + " -- ACC: " + ((double)totalAndCorrect[1]) / totalAndCorrect[0]);
        }
Пример #18
0
        // pcfgPE.printGoodBad();
        private static IList <TaggedWord> CleanTags(IList <TaggedWord> twList, ITreebankLanguagePack tlp)
        {
            int sz = twList.Count;
            IList <TaggedWord> l = new List <TaggedWord>(sz);

            foreach (TaggedWord tw in twList)
            {
                TaggedWord tw2 = new TaggedWord(tw.Word(), tlp.BasicCategory(tw.Tag()));
                l.Add(tw2);
            }
            return(l);
        }
Пример #19
0
        private void PopulateTagsToBaseTags(ITreebankLanguagePack tlp)
        {
            int total = tagIndex.Size();

            tagsToBaseTags = new int[total];
            for (int i = 0; i < total; i++)
            {
                string tag     = tagIndex.Get(i);
                string baseTag = tlp.BasicCategory(tag);
                int    j       = tagIndex.AddToIndex(baseTag);
                tagsToBaseTags[i] = j;
            }
        }
 public SpanishHeadFinder(ITreebankLanguagePack tlp)
     : base(tlp)
 {
     nonTerminalInfo = Generics.NewHashMap();
     // "sentence"
     string[][] rootRules = new string[][] { new string[] { "right", "grup.verb", "s.a", "sn" }, new string[] { "left", "S" }, new string[] { "right", "sadv", "grup.adv", "neg", "interjeccio", "i", "sp", "grup.prep" }, InsertVerbs(new string[] {
             "rightdis"
         }, new string[] { "nc0s000", "nc0p000", "nc00000", "np00000", "rg", "rn" }) };
     nonTerminalInfo[tlp.StartSymbol()] = rootRules;
     nonTerminalInfo["S"]        = rootRules;
     nonTerminalInfo["sentence"] = rootRules;
     nonTerminalInfo["inc"]      = rootRules;
     // adjectival phrases
     string[][] adjectivePhraseRules = new string[][] { new string[] { "leftdis", "grup.a", "s.a", "spec" } };
     nonTerminalInfo["s.a"]    = adjectivePhraseRules;
     nonTerminalInfo["sa"]     = adjectivePhraseRules;
     nonTerminalInfo["grup.a"] = new string[][] { new string[] { "rightdis", "aq0000", "ao0000" }, InsertVerbs(new string[] { "right" }, new string[] {  }), new string[] { "right", "rg", "rn" } };
     // adverbial phrases
     nonTerminalInfo["sadv"]     = new string[][] { new string[] { "left", "grup.adv", "sadv" } };
     nonTerminalInfo["grup.adv"] = new string[][] { new string[] { "left", "conj" }, new string[] { "rightdis", "rg", "rn", "neg", "grup.adv" }, new string[] { "rightdis", "pr000000", "pi000000", "nc0s000", "nc0p000", "nc00000", "np00000" } };
     nonTerminalInfo["neg"]      = new string[][] { new string[] { "leftdis", "rg", "rn" } };
     // noun phrases
     nonTerminalInfo["sn"]       = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000" }, new string[] { "left", "grup.nom", "grup.w", "grup.z", "sn" }, new string[] { "leftdis", "spec" } };
     nonTerminalInfo["grup.nom"] = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000", "np00000", "w", "grup.w" }, new string[] { "leftdis", "pi000000", "pd000000" }, new string[] { "left", "grup.nom", "sp" }, new string[]
                                                    { "leftdis", "pn000000", "aq0000", "ao0000" }, new string[] { "left", "grup.a", "i", "grup.verb" }, new string[] { "leftdis", "grup.adv" } };
     // verb phrases
     nonTerminalInfo["grup.verb"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] {  }) };
     nonTerminalInfo["infinitiu"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] { "infinitiu" }) };
     nonTerminalInfo["gerundi"]   = new string[][] { new string[] { "left", "vmg0000", "vag0000", "vsg0000", "gerundi" } };
     nonTerminalInfo["participi"] = new string[][] { new string[] { "left", "aq", "vmp0000", "vap0000", "vsp0000", "grup.a" } };
     // specifiers
     nonTerminalInfo["spec"] = new string[][] { new string[] { "left", "conj", "spec" }, new string[] { "leftdis", "da0000", "de0000", "di0000", "dd0000", "dp0000", "dn0000", "dt0000" }, new string[] { "leftdis", "z0", "grup.z" }, new string[] {
                                                    "left", "rg", "rn"
                                                }, new string[] { "leftdis", "pt000000", "pe000000", "pd000000", "pp000000", "pi000000", "pn000000", "pr000000" }, new string[] { "left", "grup.adv", "w" } };
     // entre A y B
     // etc.
     nonTerminalInfo["conj"]        = new string[][] { new string[] { "leftdis", "cs", "cc" }, new string[] { "leftdis", "grup.cc", "grup.cs" }, new string[] { "left", "sp" } };
     nonTerminalInfo["interjeccio"] = new string[][] { new string[] { "leftdis", "i", "nc0s000", "nc0p000", "nc00000", "np00000", "pi000000" }, new string[] { "left", "interjeccio" } };
     nonTerminalInfo["relatiu"]     = new string[][] { new string[] { "left", "pr000000" } };
     // prepositional phrases
     nonTerminalInfo["sp"]   = new string[][] { new string[] { "left", "prep", "sp" } };
     nonTerminalInfo["prep"] = new string[][] { new string[] { "leftdis", "sp000", "prep", "grup.prep" } };
     // custom categories
     nonTerminalInfo["grup.cc"]   = new string[][] { new string[] { "left", "cs" } };
     nonTerminalInfo["grup.cs"]   = new string[][] { new string[] { "left", "cs" } };
     nonTerminalInfo["grup.prep"] = new string[][] { new string[] { "left", "prep", "grup.prep", "s" } };
     nonTerminalInfo["grup.pron"] = new string[][] { new string[] { "rightdis", "px000000" } };
     nonTerminalInfo["grup.w"]    = new string[][] { new string[] { "right", "w" }, new string[] { "leftdis", "z0" }, new string[] { "left" } };
     nonTerminalInfo["grup.z"]    = new string[][] { new string[] { "leftdis", "z0", "zu", "zp", "zd", "zm" }, new string[] { "right", "nc0s000", "nc0p000", "nc00000", "np00000" } };
 }
Пример #21
0
 public AbstractDependencyGrammar(ITreebankLanguagePack tlp, ITagProjection tagProjection, bool directional, bool useDistance, bool useCoarseDistance, Options op, IIndex <string> wordIndex, IIndex <string> tagIndex)
 {
     this.tlp               = tlp;
     this.tagProjection     = tagProjection;
     this.directional       = directional;
     this.useDistance       = useDistance;
     this.useCoarseDistance = useCoarseDistance;
     this.op        = op;
     this.wordIndex = wordIndex;
     this.tagIndex  = tagIndex;
     stopTW         = new IntTaggedWord(IntTaggedWord.StopWordInt, IntTaggedWord.StopTagInt);
     wildTW         = new IntTaggedWord(IntTaggedWord.AnyWordInt, IntTaggedWord.AnyTagInt);
     InitTagBins();
 }
        /// <summary>Adds a sentence final punctuation mark to sentences that lack one.</summary>
        /// <remarks>
        /// Adds a sentence final punctuation mark to sentences that lack one.
        /// This method adds a period (the first sentence final punctuation word
        /// in a parser language pack) to sentences that don't have one within
        /// the last 3 words (to allow for close parentheses, etc.).  It checks
        /// tags for punctuation, if available, otherwise words.
        /// </remarks>
        /// <param name="sentence">The sentence to check</param>
        /// <param name="length">The length of the sentence (just to avoid recomputation)</param>
        private bool AddSentenceFinalPunctIfNeeded(IList <IHasWord> sentence, int length)
        {
            int start = length - 3;

            if (start < 0)
            {
                start = 0;
            }
            ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack();

            for (int i = length - 1; i >= start; i--)
            {
                IHasWord item = sentence[i];
                // An object (e.g., CoreLabel) can implement HasTag but not actually store
                // a tag so we need to check that there is something there for this case.
                // If there is, use only it, since word tokens can be ambiguous.
                string tag = null;
                if (item is IHasTag)
                {
                    tag = ((IHasTag)item).Tag();
                }
                if (tag != null && !tag.IsEmpty())
                {
                    if (tlp.IsSentenceFinalPunctuationTag(tag))
                    {
                        return(false);
                    }
                }
                else
                {
                    string str = item.Word();
                    if (tlp.IsPunctuationWord(str))
                    {
                        return(false);
                    }
                }
            }
            // none found so add one.
            if (op.testOptions.verbose)
            {
                log.Info("Adding missing final punctuation to sentence.");
            }
            string[] sfpWords = tlp.SentenceFinalPunctuationWords();
            if (sfpWords.Length > 0)
            {
                sentence.Add(new Word(sfpWords[0]));
            }
            return(true);
        }
Пример #23
0
 public BikelChineseHeadFinder(ITreebankLanguagePack tlp)
     : base(tlp)
 {
     nonTerminalInfo = Generics.NewHashMap();
     // these are first-cut rules
     defaultRule = new string[] { "right" };
     // ROOT is not always unary for chinese -- PAIR is a special notation
     // that the Irish people use for non-unary ones....
     nonTerminalInfo["ROOT"] = new string[][] { new string[] { "left", "IP" } };
     nonTerminalInfo["PAIR"] = new string[][] { new string[] { "left", "IP" } };
     // Major syntactic categories
     nonTerminalInfo["ADJP"] = new string[][] { new string[] { "right", "ADJP", "JJ" }, new string[] { "right", "AD", "NN", "CS" } };
     nonTerminalInfo["ADVP"] = new string[][] { new string[] { "right", "ADVP", "AD" } };
     nonTerminalInfo["CLP"]  = new string[][] { new string[] { "right", "CLP", "M" } };
     nonTerminalInfo["CP"]   = new string[][] { new string[] { "right", "DEC", "SP" }, new string[] { "left", "ADVP", "CS" }, new string[] { "right", "CP", "IP" } };
     nonTerminalInfo["DNP"]  = new string[][] { new string[] { "right", "DNP", "DEG" }, new string[] { "right", "DEC" } };
     nonTerminalInfo["DP"]   = new string[][] { new string[] { "left", "DP", "DT" } };
     nonTerminalInfo["DVP"]  = new string[][] { new string[] { "right", "DVP", "DEV" } };
     nonTerminalInfo["FRAG"] = new string[][] { new string[] { "right", "VV", "NR", "NN" } };
     nonTerminalInfo["INTJ"] = new string[][] { new string[] { "right", "INTJ", "IJ" } };
     nonTerminalInfo["IP"]   = new string[][] { new string[] { "right", "IP", "VP" }, new string[] { "right", "VV" } };
     nonTerminalInfo["LCP"]  = new string[][] { new string[] { "right", "LCP", "LC" } };
     nonTerminalInfo["LST"]  = new string[][] { new string[] { "left", "LST", "CD", "OD" } };
     nonTerminalInfo["NP"]   = new string[][] { new string[] { "right", "NP", "NN", "NT", "NR", "QP" } };
     nonTerminalInfo["PP"]   = new string[][] { new string[] { "left", "PP", "P" } };
     nonTerminalInfo["PRN"]  = new string[][] { new string[] { "right", "NP", "IP", "VP", "NT", "NR", "NN" } };
     nonTerminalInfo["QP"]   = new string[][] { new string[] { "right", "QP", "CLP", "CD", "OD" } };
     nonTerminalInfo["UCP"]  = new string[][] { new string[] { "right" } };
     nonTerminalInfo["VP"]   = new string[][] { new string[] { "left", "VP", "VA", "VC", "VE", "VV", "BA", "LB", "VCD", "VSB", "VRD", "VNV", "VCP" } };
     nonTerminalInfo["VCD"]  = new string[][] { new string[] { "right", "VCD", "VV", "VA", "VC", "VE" } };
     nonTerminalInfo["VCP"]  = new string[][] { new string[] { "right", "VCP", "VV", "VA", "VC", "VE" } };
     nonTerminalInfo["VRD"]  = new string[][] { new string[] { "right", "VRD", "VV", "VA", "VC", "VE" } };
     nonTerminalInfo["VSB"]  = new string[][] { new string[] { "right", "VSB", "VV", "VA", "VC", "VE" } };
     nonTerminalInfo["VNV"]  = new string[][] { new string[] { "right", "VNV", "VV", "VA", "VC", "VE" } };
     nonTerminalInfo["VPT"]  = new string[][] { new string[] { "right", "VNV", "VV", "VA", "VC", "VE" } };
     // VNV typo for VPT? None of either in ctb4.
     nonTerminalInfo["WHNP"] = new string[][] { new string[] { "right", "WHNP", "NP", "NN", "NT", "NR", "QP" } };
     nonTerminalInfo["WHPP"] = new string[][] { new string[] { "left", "WHPP", "PP", "P" } };
     // some POS tags apparently sit where phrases are supposed to be
     nonTerminalInfo["CD"] = new string[][] { new string[] { "right", "CD" } };
     nonTerminalInfo["NN"] = new string[][] { new string[] { "right", "NN" } };
     nonTerminalInfo["NR"] = new string[][] { new string[] { "right", "NR" } };
     // parsing.  It shouldn't affect anything else because heads of preterminals are not
     // generally queried - GMA
     nonTerminalInfo["VV"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VA"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VC"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VE"] = new string[][] { new string[] { "left" } };
 }
        public static ICollection <Constituent> SimplifyConstituents(ITreebankLanguagePack tlp, ICollection <Constituent> constituents)
        {
            ICollection <Constituent> newConstituents = new HashSet <Constituent>();

            foreach (Constituent con in constituents)
            {
                if (!(con is LabeledConstituent))
                {
                    throw new AssertionError("Unexpected constituent type " + con.GetType());
                }
                LabeledConstituent labeled = (LabeledConstituent)con;
                newConstituents.Add(new LabeledConstituent(labeled.Start(), labeled.End(), tlp.BasicCategory(labeled.Value())));
            }
            return(newConstituents);
        }
Пример #25
0
 /// <summary>Build a custom binarizer for Trees.</summary>
 /// <param name="hf">the HeadFinder to use in binarization</param>
 /// <param name="tlp">the TreebankLanguagePack to use</param>
 /// <param name="insideFactor">whether to do inside markovization</param>
 /// <param name="markovFactor">whether to markovize the binary rules</param>
 /// <param name="markovOrder">the markov order to use; only relevant with markovFactor=true</param>
 /// <param name="useWrappingLabels">whether to use state names (labels) that allow wrapping from right to left</param>
 /// <param name="unaryAtTop">
 /// Whether to actually materialize the unary that rewrites
 /// a passive state to the active rule at the top of an original local
 /// tree.  This is used only when compaction is happening
 /// </param>
 /// <param name="selectiveSplitThreshold">if selective split is used, this will be the threshold used to decide which state splits to keep</param>
 /// <param name="markFinalStates">whether or not to make the state names (labels) of the final active states distinctive</param>
 /// <param name="noRebinarization">if true, a node which already has exactly two children is not altered</param>
 public TreeBinarizer(IHeadFinder hf, ITreebankLanguagePack tlp, bool insideFactor, bool markovFactor, int markovOrder, bool useWrappingLabels, bool unaryAtTop, double selectiveSplitThreshold, bool markFinalStates, bool simpleLabels, bool noRebinarization
                      )
 {
     this.hf                      = hf;
     this.tlp                     = tlp;
     this.tf                      = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
     this.insideFactor            = insideFactor;
     this.markovFactor            = markovFactor;
     this.markovOrder             = markovOrder;
     this.useWrappingLabels       = useWrappingLabels;
     this.unaryAtTop              = unaryAtTop;
     this.selectiveSplitThreshold = selectiveSplitThreshold;
     this.markFinalStates         = markFinalStates;
     this.simpleLabels            = simpleLabels;
     this.noRebinarization        = noRebinarization;
 }
Пример #26
0
 /// <param name="tlp">
 /// TreebankLanguagePack describing the language being
 /// parsed
 /// </param>
 /// <param name="labels">
 /// A list of possible dependency relation labels, with
 /// the ROOT relation label as the first element
 /// </param>
 public ParsingSystem(ITreebankLanguagePack tlp, IList <string> labels, IList <string> transitions, bool verbose)
 {
     // TODO pass labels as Map<String, GrammaticalRelation>; use GrammaticalRelation throughout
     this.tlp    = tlp;
     this.labels = new List <string>(labels);
     //NOTE: assume that the first element of labels is rootLabel
     rootLabel        = labels[0];
     this.transitions = transitions;
     if (verbose)
     {
         log.Info(Config.Separator);
         log.Info("#Transitions: " + NumTransitions());
         log.Info("#Labels: " + labels.Count);
         log.Info("ROOTLABEL: " + rootLabel);
     }
 }
Пример #27
0
        private void SetProperties(Properties props)
        {
            trainingThreads       = PropertiesUtils.GetInt(props, "trainingThreads", trainingThreads);
            wordCutOff            = PropertiesUtils.GetInt(props, "wordCutOff", wordCutOff);
            initRange             = PropertiesUtils.GetDouble(props, "initRange", initRange);
            maxIter               = PropertiesUtils.GetInt(props, "maxIter", maxIter);
            batchSize             = PropertiesUtils.GetInt(props, "batchSize", batchSize);
            adaEps                = PropertiesUtils.GetDouble(props, "adaEps", adaEps);
            adaAlpha              = PropertiesUtils.GetDouble(props, "adaAlpha", adaAlpha);
            regParameter          = PropertiesUtils.GetDouble(props, "regParameter", regParameter);
            dropProb              = PropertiesUtils.GetDouble(props, "dropProb", dropProb);
            hiddenSize            = PropertiesUtils.GetInt(props, "hiddenSize", hiddenSize);
            embeddingSize         = PropertiesUtils.GetInt(props, "embeddingSize", embeddingSize);
            numPreComputed        = PropertiesUtils.GetInt(props, "numPreComputed", numPreComputed);
            evalPerIter           = PropertiesUtils.GetInt(props, "evalPerIter", evalPerIter);
            clearGradientsPerIter = PropertiesUtils.GetInt(props, "clearGradientsPerIter", clearGradientsPerIter);
            saveIntermediate      = PropertiesUtils.GetBool(props, "saveIntermediate", saveIntermediate);
            unlabeled             = PropertiesUtils.GetBool(props, "unlabeled", unlabeled);
            cPOS   = PropertiesUtils.GetBool(props, "cPOS", cPOS);
            noPunc = PropertiesUtils.GetBool(props, "noPunc", noPunc);
            doWordEmbeddingGradUpdate = PropertiesUtils.GetBool(props, "doWordEmbeddingGradUpdate", doWordEmbeddingGradUpdate);
            // Runtime parsing options
            sentenceDelimiter = PropertiesUtils.GetString(props, "sentenceDelimiter", sentenceDelimiter);
            tagger            = PropertiesUtils.GetString(props, "tagger.model", tagger);
            string escaperClass = props.GetProperty("escaper");

            escaper = escaperClass != null?ReflectionLoading.LoadByReflection(escaperClass) : null;

            // Language options
            language = props.Contains("language") ? GetLanguage(props.GetProperty("language")) : language;
            tlp      = [email protected]();
            // if a tlp was specified go with that
            string tlpCanonicalName = props.GetProperty("tlp");

            if (tlpCanonicalName != null)
            {
                try
                {
                    tlp = ReflectionLoading.LoadByReflection(tlpCanonicalName);
                    System.Console.Error.WriteLine("Loaded TreebankLanguagePack: " + tlpCanonicalName);
                }
                catch (Exception)
                {
                    System.Console.Error.WriteLine("Error: Failed to load TreebankLanguagePack: " + tlpCanonicalName);
                }
            }
        }
        // static only
        /// <summary>
        /// Counts how many spans are present in goldTree, including
        /// preterminals, but not present in guessTree, along with how many
        /// spans are present in guessTree and not goldTree.
        /// </summary>
        /// <remarks>
        /// Counts how many spans are present in goldTree, including
        /// preterminals, but not present in guessTree, along with how many
        /// spans are present in guessTree and not goldTree.  Each one counts
        /// as an error, meaning that something like a mislabeled span or
        /// preterminal counts as two errors.
        /// <br />
        /// Span labels are compared using the basicCategory() function
        /// from the passed in TreebankLanguagePack.
        /// </remarks>
        public static int CountSpanErrors(ITreebankLanguagePack tlp, Tree goldTree, Tree guessTree)
        {
            ICollection <Constituent> goldConstituents        = goldTree.Constituents(LabeledConstituent.Factory());
            ICollection <Constituent> guessConstituents       = guessTree.Constituents(LabeledConstituent.Factory());
            ICollection <Constituent> simpleGoldConstituents  = SimplifyConstituents(tlp, goldConstituents);
            ICollection <Constituent> simpleGuessConstituents = SimplifyConstituents(tlp, guessConstituents);
            //System.out.println(simpleGoldConstituents);
            //System.out.println(simpleGuessConstituents);
            int errors = 0;

            foreach (Constituent gold in simpleGoldConstituents)
            {
                if (!simpleGuessConstituents.Contains(gold))
                {
                    ++errors;
                }
            }
            foreach (Constituent guess in simpleGuessConstituents)
            {
                if (!simpleGoldConstituents.Contains(guess))
                {
                    ++errors;
                }
            }
            // The spans returned by constituents() doesn't include the
            // preterminals, so we need to count those ourselves now
            IList <TaggedWord> goldWords  = goldTree.TaggedYield();
            IList <TaggedWord> guessWords = guessTree.TaggedYield();
            int len = Math.Min(goldWords.Count, guessWords.Count);

            for (int i = 0; i < len; ++i)
            {
                string goldTag  = tlp.BasicCategory(goldWords[i].Tag());
                string guessTag = tlp.BasicCategory(guessWords[i].Tag());
                if (!goldTag.Equals(guessTag))
                {
                    // we count one error for each span that is present in the
                    // gold and not in the guess, and one error for each span that
                    // is present in the guess and not the gold, so this counts as
                    // two errors
                    errors += 2;
                }
            }
            return(errors);
        }
 public SunJurafskyChineseHeadFinder(ITreebankLanguagePack tlp)
     : base(tlp)
 {
     defaultRule             = new string[] { "right" };
     nonTerminalInfo         = Generics.NewHashMap();
     nonTerminalInfo["ROOT"] = new string[][] { new string[] { "left", "IP" } };
     nonTerminalInfo["PAIR"] = new string[][] { new string[] { "left", "IP" } };
     nonTerminalInfo["ADJP"] = new string[][] { new string[] { "right", "ADJP", "JJ", "AD" } };
     nonTerminalInfo["ADVP"] = new string[][] { new string[] { "right", "ADVP", "AD", "CS", "JJ", "NP", "PP", "P", "VA", "VV" } };
     nonTerminalInfo["CLP"]  = new string[][] { new string[] { "right", "CLP", "M", "NN", "NP" } };
     nonTerminalInfo["CP"]   = new string[][] { new string[] { "right", "CP", "IP", "VP" } };
     nonTerminalInfo["DNP"]  = new string[][] { new string[] { "right", "DEG", "DNP", "DEC", "QP" } };
     nonTerminalInfo["DP"]   = new string[][] { new string[] { "left", "M", "DP", "DT", "OD" } };
     nonTerminalInfo["DVP"]  = new string[][] { new string[] { "right", "DEV", "AD", "VP" } };
     nonTerminalInfo["IP"]   = new string[][] { new string[] { "right", "VP", "IP", "NP" } };
     nonTerminalInfo["LCP"]  = new string[][] { new string[] { "right", "LCP", "LC" } };
     nonTerminalInfo["LST"]  = new string[][] { new string[] { "right", "CD", "NP", "QP" } };
     nonTerminalInfo["NP"]   = new string[][] { new string[] { "right", "NP", "NN", "IP", "NR", "NT" } };
     nonTerminalInfo["PP"]   = new string[][] { new string[] { "left", "P", "PP" } };
     nonTerminalInfo["PRN"]  = new string[][] { new string[] { "left", "PU" } };
     nonTerminalInfo["QP"]   = new string[][] { new string[] { "right", "QP", "CLP", "CD" } };
     nonTerminalInfo["UCP"]  = new string[][] { new string[] { "left", "IP", "NP", "VP" } };
     nonTerminalInfo["VCD"]  = new string[][] { new string[] { "left", "VV", "VA", "VE" } };
     nonTerminalInfo["VP"]   = new string[][] { new string[] { "left", "VE", "VC", "VV", "VNV", "VPT", "VRD", "VSB", "VCD", "VP" } };
     nonTerminalInfo["VPT"]  = new string[][] { new string[] { "left", "VA", "VV" } };
     nonTerminalInfo["VCP"]  = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VNV"]  = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VRD"]  = new string[][] { new string[] { "left", "VV", "VA" } };
     nonTerminalInfo["VSB"]  = new string[][] { new string[] { "right", "VV", "VE" } };
     nonTerminalInfo["FRAG"] = new string[][] { new string[] { "right", "VV", "NN" } };
     //FRAG seems only to be used for bits at the beginnings of articles: "Xinwenshe<DATE>" and "(wan)"
     // some POS tags apparently sit where phrases are supposed to be
     nonTerminalInfo["CD"] = new string[][] { new string[] { "right", "CD" } };
     nonTerminalInfo["NN"] = new string[][] { new string[] { "right", "NN" } };
     nonTerminalInfo["NR"] = new string[][] { new string[] { "right", "NR" } };
     // I'm adding these POS tags to do primitive morphology for character-level
     // parsing.  It shouldn't affect anything else because heads of preterminals are not
     // generally queried - GMA
     nonTerminalInfo["VV"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VA"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VC"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VE"] = new string[][] { new string[] { "left" } };
 }
 public CollinsHeadFinder(ITreebankLanguagePack tlp, params string[] categoriesToAvoid)
     : base(tlp, categoriesToAvoid)
 {
     nonTerminalInfo = Generics.NewHashMap();
     // This version from Collins' diss (1999: 236-238)
     nonTerminalInfo["ADJP"]  = new string[][] { new string[] { "left", "NNS", "QP", "NN", "$", "ADVP", "JJ", "VBN", "VBG", "ADJP", "JJR", "NP", "JJS", "DT", "FW", "RBR", "RBS", "SBAR", "RB" } };
     nonTerminalInfo["ADVP"]  = new string[][] { new string[] { "right", "RB", "RBR", "RBS", "FW", "ADVP", "TO", "CD", "JJR", "JJ", "IN", "NP", "JJS", "NN" } };
     nonTerminalInfo["CONJP"] = new string[][] { new string[] { "right", "CC", "RB", "IN" } };
     nonTerminalInfo["FRAG"]  = new string[][] { new string[] { "right" } };
     // crap
     nonTerminalInfo["INTJ"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["LST"]  = new string[][] { new string[] { "right", "LS", ":" } };
     nonTerminalInfo["NAC"]  = new string[][] { new string[] { "left", "NN", "NNS", "NNP", "NNPS", "NP", "NAC", "EX", "$", "CD", "QP", "PRP", "VBG", "JJ", "JJS", "JJR", "ADJP", "FW" } };
     nonTerminalInfo["NX"]   = new string[][] { new string[] { "left" } };
     // crap
     nonTerminalInfo["PP"] = new string[][] { new string[] { "right", "IN", "TO", "VBG", "VBN", "RP", "FW" } };
     // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))
     nonTerminalInfo["PRN"]    = new string[][] { new string[] { "left" } };
     nonTerminalInfo["PRT"]    = new string[][] { new string[] { "right", "RP" } };
     nonTerminalInfo["QP"]     = new string[][] { new string[] { "left", "$", "IN", "NNS", "NN", "JJ", "RB", "DT", "CD", "NCD", "QP", "JJR", "JJS" } };
     nonTerminalInfo["RRC"]    = new string[][] { new string[] { "right", "VP", "NP", "ADVP", "ADJP", "PP" } };
     nonTerminalInfo["S"]      = new string[][] { new string[] { "left", "TO", "IN", "VP", "S", "SBAR", "ADJP", "UCP", "NP" } };
     nonTerminalInfo["SBAR"]   = new string[][] { new string[] { "left", "WHNP", "WHPP", "WHADVP", "WHADJP", "IN", "DT", "S", "SQ", "SINV", "SBAR", "FRAG" } };
     nonTerminalInfo["SBARQ"]  = new string[][] { new string[] { "left", "SQ", "S", "SINV", "SBARQ", "FRAG" } };
     nonTerminalInfo["SINV"]   = new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VP", "S", "SINV", "ADJP", "NP" } };
     nonTerminalInfo["SQ"]     = new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VP", "SQ" } };
     nonTerminalInfo["UCP"]    = new string[][] { new string[] { "right" } };
     nonTerminalInfo["VP"]     = new string[][] { new string[] { "left", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "AUX", "AUXG", "VP", "ADJP", "NN", "NNS", "NP" } };
     nonTerminalInfo["WHADJP"] = new string[][] { new string[] { "left", "CC", "WRB", "JJ", "ADJP" } };
     nonTerminalInfo["WHADVP"] = new string[][] { new string[] { "right", "CC", "WRB" } };
     nonTerminalInfo["WHNP"]   = new string[][] { new string[] { "left", "WDT", "WP", "WP$", "WHADJP", "WHPP", "WHNP" } };
     nonTerminalInfo["WHPP"]   = new string[][] { new string[] { "right", "IN", "TO", "FW" } };
     nonTerminalInfo["X"]      = new string[][] { new string[] { "right" } };
     // crap rule
     nonTerminalInfo["NP"] = new string[][] { new string[] { "rightdis", "NN", "NNP", "NNPS", "NNS", "NX", "POS", "JJR" }, new string[] { "left", "NP" }, new string[] { "rightdis", "$", "ADJP", "PRN" }, new string[] { "right", "CD" }, new string[]
                                              { "rightdis", "JJ", "JJS", "RB", "QP" } };
     nonTerminalInfo["TYPO"] = new string[][] { new string[] { "left" } };
     // another crap rule, for Brown (Roger)
     nonTerminalInfo["EDITED"] = new string[][] { new string[] { "left" } };
     // crap rule for Switchboard (if don't delete EDITED nodes)
     nonTerminalInfo["XS"] = new string[][] { new string[] { "right", "IN" } };
 }
Пример #31
0
 public BobChrisTreeNormalizer(ITreebankLanguagePack tlp)
 {
     this.tlp = tlp;
 }
 public CategoryAndFunctionStringFunction(ITreebankLanguagePack tlp)
 {
     this.tlp = tlp;
 }