public static void runDoc() { // parse wn-???-lmf.xml, get d:\rewise\data\wordnet\ids.txt wordNet.Parser.xmlToDBFirstPhase(); // parse wn-???-lmf.xml again, create and fill "wordnetDB" SQL database // at the end, call WnWikt.run wordNet.Parser.xmlToDBSecondPhase(); // dump "dbStat.txt" wordNet.Parser.dbStat(); // dump/???.txt wordNet.Dumps.dumps(); // dump/eng_lemmas*.txt wordNet.Dumps.lemmas(); // dump-words/???.txt wordNet.Dumps.langLemmas(); // ================== HELPERS, called in xmlToDBSecondPhase // call createNewSource(), dump i to wn-wikt.json and insert new entries, langs and translations to "wordnetDB" WnWikt.run(); // imports wn-wikt\.* to memory and merge it with "wordnetDB" WnWikt.createNewSource(); }
// using https://www.nuget.org/packages/EntityFramework.BulkInsert-ef6-ext/ public static void xmlToDBSecondPhase() { count = 0; var ctx = new Context(false); var allNodes = xml2Objects(ctx).SelectMany(f => f).ToArray(); foreach (var node in allNodes.OfType <LexicalEntry>()) { node.finish(ctx); } var allDB = allNodes.SelectMany(n => n.createDB(ctx)).ToArray(); using (var dbCtx = wordNetDB.Context.getContext(true)) { dbCtx.Ids.Add(new wordNetDB.Ids { Text = File.ReadAllText(Context.root + "ids.txt") }); // ctx.ids.Values.Select(id => id.Split('=')).Select(p => p[0] + "=" + p[2]).Aggregate((r, i) => r + "\n" + i) }); dbCtx.SaveChanges(); Console.WriteLine("Ids inserted"); var opt = new BulkInsertOptions() { BulkCopyOptions = BulkCopyOptions.TableLock, BatchSize = 50000, }; dbCtx.BulkInsert(allDB.OfType <wordNetDB.Lang>()); Console.WriteLine("Lang inserted"); dbCtx.BulkInsert(allDB.OfType <wordNetDB.Entry>()); Console.WriteLine("Entry inserted"); dbCtx.BulkInsert(allDB.OfType <wordNetDB.Synset>()); Console.WriteLine("Synset inserted"); dbCtx.BulkInsert(allDB.OfType <wordNetDB.Translation>()); Console.WriteLine("Translation inserted"); dbCtx.BulkInsert(allDB.OfType <wordNetDB.Relation>()); Console.WriteLine("Relation inserted"); dbCtx.BulkInsert(allDB.OfType <wordNetDB.Sense>()); Console.WriteLine("Sense inserted"); dbCtx.BulkInsert(allDB.OfType <wordNetDB.Example>()); Console.WriteLine("Example inserted"); } WnWikt.run(); }