private static string XML_FILENAME; // word list /** * This main method reads a list of CSV words and POS tags and looks up against * the NIHDB Lexicon for a corresponding entry. If found the baseform is written out into a XML * file, which can be used in SimpleNLG or elsewhere. * * @param args : List of Arguments that this command line application must be provided with in order: * <ol> * <li>The full path to the NIHDB Lexicon database file e.g. C:\\NIHDB\\lexAccess2009</li> * <li>The full path to the list of baseforms and POS tags to include in the written out XML Lexicon file</li> * <li>The full path to the XML file that the XML Lexicon will be written out to.</li> * </ol> * *<p>Example usage: * java simplenlg.lexicon.util.NIHLexiconXMLDumpUtil C:\\NIHDB\\lexAccess2009 C:\\NIHDB\\wordlist.csv C:\\NIHDB\\default-lexicon.xml * * You will need to have the HSQLDB driver (org.hsqldb.jdbc.JDBCDriver) on your Java classpath before running this application. *</p> */ public static void Main(string[] args) { Lexicon lex = null; if (args.Length == 3) { DB_FILENAME = args[0]; WORDLIST_FILENAME = args[1]; XML_FILENAME = args[2]; // Check to see if the HSQLDB driver is available on the classpath: bool dbDriverAvaliable = false; try { // Type driverClass = Type.GetType("org.hsqldb.jdbc.JDBCDriver", false, typeof(NIHLexiconXMLDumpUtil).ClassLoader); //OUTCOMMENTED UNTIL SOLUTION FOUND FOR HSQLDB CONNECTION Type driverClass = null; if (null != driverClass) { dbDriverAvaliable = true; } } catch (Exception) { Console.Error.WriteLine("*** Please add the HSQLDB JDBCDriver to your Java classpath and try again."); } if ((null != DB_FILENAME && DB_FILENAME.Length > 0) && (null != WORDLIST_FILENAME && WORDLIST_FILENAME.Length > 0) && (null != XML_FILENAME && XML_FILENAME.Length > 0) && dbDriverAvaliable) { lex = new NIHDBLexicon(DB_FILENAME, XMLRealiser.LexiconType.NIHDB_HSQL); try { LineNumberReader wordListFile = new LineNumberReader(new System.IO.StreamReader(WORDLIST_FILENAME)); System.IO.StreamWriter xmlFile = new System.IO.StreamWriter(XML_FILENAME); xmlFile.BaseStream.WriteByte(Convert.ToByte(string.Format("<lexicon>%n"))); string line = wordListFile.ReadLine(); while (!ReferenceEquals(line, null)) { string[] cols = line.Split(','); string @base = cols[0]; string cat = cols[1]; WordElement word = null; if (cat.Equals("noun", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.NOUN)); } else if (cat.Equals("verb", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.VERB)); } else if (cat.Equals("adv", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ADVERB)); } else if (cat.Equals("adj", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ADJECTIVE)); } else if (cat.Equals("det", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.DETERMINER)); } else if (cat.Equals("prep", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.PREPOSITION)); } else if (cat.Equals("pron", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.PRONOUN)); } else if (cat.Equals("conj", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.CONJUNCTION)); } else if (cat.Equals("modal", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.MODAL)); } else if (cat.Equals("interjection", StringComparison.OrdinalIgnoreCase)) { word = lex.getWord(@base, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.NOUN)); // Kilgarriff;s interjections are mostly nouns in the lexicon } if (word == null) { Console.WriteLine("*** The following baseform and POS tag is not found: " + @base + ":" + cat); } else { xmlFile.BaseStream.WriteByte(Convert.ToByte(word.toXML())); } line = wordListFile.ReadLine(); } xmlFile.BaseStream.WriteByte(Convert.ToByte(string.Format("</lexicon>%n"))); wordListFile.Close(); xmlFile.Close(); lex.close(); Console.WriteLine("*** XML Lexicon Export Completed."); } catch (Exception e) { Console.Error.WriteLine("*** An Error occured during the export. The Exception message is below: "); Console.Error.WriteLine(e.Message); Console.Error.WriteLine("************************"); Console.Error.WriteLine("Please make sure you have the correct application arguments: "); printArgumentsMessage(); } } else { printErrorArgumentMessage(); } } else { printErrorArgumentMessage(); } }