示例#1
0
        private static string XML_FILENAME;        // word list

        /**
         * This main method reads a list of CSV words and POS tags and looks up against
         * the NIHDB Lexicon for a corresponding entry. If found the baseform is written out into a XML
         * file, which can be used in SimpleNLG or elsewhere.
         *
         * @param args : List of Arguments that this command line application must be provided with in order:
         * <ol>
         *      <li>The full path to the NIHDB Lexicon database file e.g. C:\\NIHDB\\lexAccess2009</li>
         *      <li>The full path to the list of baseforms and POS tags to include in the written out XML Lexicon file</li>
         *      <li>The full path to the XML file that the XML Lexicon will be written out to.</li>
         * </ol>
         *
         *<p>Example usage:
         *   java simplenlg.lexicon.util.NIHLexiconXMLDumpUtil C:\\NIHDB\\lexAccess2009 C:\\NIHDB\\wordlist.csv C:\\NIHDB\\default-lexicon.xml
         *
         *   You will need to have the HSQLDB driver (org.hsqldb.jdbc.JDBCDriver) on your Java classpath before running this application.
         *</p>
         */
        public static void Main(string[] args)
        {
            Lexicon lex = null;

            if (args.Length == 3)
            {
                DB_FILENAME       = args[0];
                WORDLIST_FILENAME = args[1];
                XML_FILENAME      = args[2];

                // Check to see if the HSQLDB driver is available on the classpath:
                bool dbDriverAvaliable = false;
                try
                {
//					Type driverClass = Type.GetType("org.hsqldb.jdbc.JDBCDriver", false, typeof(NIHLexiconXMLDumpUtil).ClassLoader); //OUTCOMMENTED UNTIL SOLUTION FOUND FOR HSQLDB CONNECTION
                    Type driverClass = null;

                    if (null != driverClass)
                    {
                        dbDriverAvaliable = true;
                    }
                }
                catch (Exception)
                {
                    Console.Error.WriteLine("*** Please add the HSQLDB JDBCDriver to your Java classpath and try again.");
                }

                if ((null != DB_FILENAME && DB_FILENAME.Length > 0) && (null != WORDLIST_FILENAME && WORDLIST_FILENAME.Length > 0) && (null != XML_FILENAME && XML_FILENAME.Length > 0) && dbDriverAvaliable)
                {
                    lex = new NIHDBLexicon(DB_FILENAME, XMLRealiser.LexiconType.NIHDB_HSQL);

                    try
                    {
                        LineNumberReader       wordListFile = new LineNumberReader(new System.IO.StreamReader(WORDLIST_FILENAME));
                        System.IO.StreamWriter xmlFile      = new System.IO.StreamWriter(XML_FILENAME);
                        xmlFile.BaseStream.WriteByte(Convert.ToByte(string.Format("<lexicon>%n")));
                        string line = wordListFile.ReadLine();
                        while (!ReferenceEquals(line, null))
                        {
                            string[]    cols  = line.Split(',');
                            string      @base = cols[0];
                            string      cat   = cols[1];
                            WordElement word  = null;
                            if (cat.Equals("noun", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.NOUN));
                            }
                            else if (cat.Equals("verb", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.VERB));
                            }
                            else if (cat.Equals("adv", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ADVERB));
                            }
                            else if (cat.Equals("adj", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ADJECTIVE));
                            }
                            else if (cat.Equals("det", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.DETERMINER));
                            }
                            else if (cat.Equals("prep", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.PREPOSITION));
                            }
                            else if (cat.Equals("pron", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.PRONOUN));
                            }
                            else if (cat.Equals("conj", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.CONJUNCTION));
                            }
                            else if (cat.Equals("modal", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.MODAL));
                            }
                            else if (cat.Equals("interjection", StringComparison.OrdinalIgnoreCase))
                            {
                                word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.NOUN));                                 // Kilgarriff;s interjections are mostly nouns in the lexicon
                            }

                            if (word == null)
                            {
                                Console.WriteLine("*** The following baseform and POS tag is not found: " + @base + ":" + cat);
                            }
                            else
                            {
                                xmlFile.BaseStream.WriteByte(Convert.ToByte(word.toXML()));
                            }
                            line = wordListFile.ReadLine();
                        }
                        xmlFile.BaseStream.WriteByte(Convert.ToByte(string.Format("</lexicon>%n")));
                        wordListFile.Close();
                        xmlFile.Close();

                        lex.close();

                        Console.WriteLine("*** XML Lexicon Export Completed.");
                    }
                    catch (Exception e)
                    {
                        Console.Error.WriteLine("*** An Error occured during the export. The Exception message is below: ");
                        Console.Error.WriteLine(e.Message);
                        Console.Error.WriteLine("************************");
                        Console.Error.WriteLine("Please make sure you have the correct application arguments: ");
                        printArgumentsMessage();
                    }
                }
                else
                {
                    printErrorArgumentMessage();
                }
            }
            else
            {
                printErrorArgumentMessage();
            }
        }