コード例 #1
0
 public AbishekFrenchHeadFinder(FrenchTreebankLanguagePack tlp)
     : base(tlp)
 {
     //French POS:
     // A (adjective), ADV (adverb), C (conjunction and subordinating conjunction), CL (clitics),
     // CS (subordinating conjunction) but occurs only once!,
     // D (determiner), ET (foreign word), I (interjection), N (noun),
     // P (preposition), PREF (prefix), PRO (strong pronoun -- very confusing), V (verb), PUNC (punctuation)
     nonTerminalInfo = Generics.NewHashMap();
     // "sentence"
     nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "left", "VN", "V", "NP", "Srel", "Ssub", "Sint" } };
     nonTerminalInfo["SENT"]            = new string[][] { new string[] { "left", "VN", "V", "NP", "Srel", "Ssub", "Sint" } };
     // adjectival phrases
     nonTerminalInfo["AP"] = new string[][] { new string[] { "right", "A", "N", "V" } };
     // adverbial phrases
     nonTerminalInfo["AdP"] = new string[][] { new string[] { "right", "ADV" }, new string[] { "left", "P", "D", "C" } };
     // coordinated phrases
     nonTerminalInfo["COORD"] = new string[][] { new string[] { "left", "C" }, new string[] { "right" } };
     // noun phrases
     nonTerminalInfo["NP"] = new string[][] { new string[] { "right", "N", "PRO", "A", "ADV" }, new string[] { "left", "NP" }, new string[] { "right" } };
     // prepositional phrases
     nonTerminalInfo["PP"] = new string[][] { new string[] { "right", "P", "CL", "A", "ADV", "V", "N" } };
     // verbal nucleus
     nonTerminalInfo["VN"] = new string[][] { new string[] { "right", "V" } };
     // infinitive clauses
     nonTerminalInfo["VPinf"] = new string[][] { new string[] { "left", "VN", "V" }, new string[] { "right" } };
     // nonfinite clauses
     nonTerminalInfo["VPpart"] = new string[][] { new string[] { "left", "VN", "V" }, new string[] { "right" } };
     // relative clauses
     nonTerminalInfo["Srel"] = new string[][] { new string[] { "left", "VN", "V" } };
     // subordinate clauses
     nonTerminalInfo["Ssub"] = new string[][] { new string[] { "left", "VN", "V" }, new string[] { "right" } };
     // parenthetical clauses
     nonTerminalInfo["Sint"] = new string[][] { new string[] { "left", "VN", "V" }, new string[] { "right" } };
     // adverbes
     //nonTerminalInfo.put("ADV", new String[][] {{"left", "ADV", "PP", "P"}});
     // compound categories: start with MW: D, A, C, N, ADV, V, P, PRO, CL
     nonTerminalInfo["MWD"]   = new string[][] { new string[] { "left", "D" }, new string[] { "left" } };
     nonTerminalInfo["MWA"]   = new string[][] { new string[] { "left", "P" }, new string[] { "left", "N" }, new string[] { "right", "A" }, new string[] { "right" } };
     nonTerminalInfo["MWC"]   = new string[][] { new string[] { "left", "C", "CS" }, new string[] { "left" } };
     nonTerminalInfo["MWN"]   = new string[][] { new string[] { "right", "N", "ET" }, new string[] { "right" } };
     nonTerminalInfo["MWV"]   = new string[][] { new string[] { "left", "V" }, new string[] { "left" } };
     nonTerminalInfo["MWP"]   = new string[][] { new string[] { "left", "P", "ADV", "PRO" }, new string[] { "left" } };
     nonTerminalInfo["MWPRO"] = new string[][] { new string[] { "left", "PRO", "CL", "N", "A" }, new string[] { "left" } };
     nonTerminalInfo["MWCL"]  = new string[][] { new string[] { "left", "CL" }, new string[] { "right" } };
     nonTerminalInfo["MWADV"] = new string[][] { new string[] { "left", "P", "ADV" }, new string[] { "left" } };
     nonTerminalInfo["MWI"]   = new string[][] { new string[] { "left", "N", "ADV", "P" }, new string[] { "left" } };
     nonTerminalInfo["MWET"]  = new string[][] { new string[] { "left", "ET", "N" }, new string[] { "left" } };
     //TODO: wsg2011: For phrasal nodes that lacked a label.
     nonTerminalInfo[FrenchXMLTreeReader.MissingPhrasal] = new string[][] { new string[] { "left" } };
 }
コード例 #2
0
        /// <summary>Read parse trees from a Reader.</summary>
        /// <param name="in">Reader</param>
        /// <param name="tf">TreeFactory -- factory to create some kind of Tree</param>
        /// <param name="tn">the method of normalizing trees</param>
        public FrenchXMLTreeReader(Reader @in, ITreeFactory tf, TreeNormalizer tn)
        {
            // Prefix for MWE nodes
            ITreebankLanguagePack tlp = new FrenchTreebankLanguagePack();

            stream         = new ReaderInputStream(@in, tlp.GetEncoding());
            treeFactory    = tf;
            treeNormalizer = tn;
            DocumentBuilder parser = XMLUtils.GetXmlParser();

            try
            {
                IDocument xml  = parser.Parse(stream);
                IElement  root = xml.GetDocumentElement();
                sentences = root.GetElementsByTagName(NodeSent);
                sentIdx   = 0;
            }
            catch (Exception e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }