示例#1
0
    public static void runDoc()
    {
        // parse wn-???-lmf.xml, get d:\rewise\data\wordnet\ids.txt
        wordNet.Parser.xmlToDBFirstPhase();

        // parse wn-???-lmf.xml again, create and fill "wordnetDB" SQL database
        // at the end, call WnWikt.run
        wordNet.Parser.xmlToDBSecondPhase();

        // dump "dbStat.txt"
        wordNet.Parser.dbStat();
        // dump/???.txt
        wordNet.Dumps.dumps();
        // dump/eng_lemmas*.txt
        wordNet.Dumps.lemmas();
        // dump-words/???.txt
        wordNet.Dumps.langLemmas();

        // ================== HELPERS, called in xmlToDBSecondPhase
        // call createNewSource(), dump i to wn-wikt.json and insert new entries, langs and translations to "wordnetDB"
        WnWikt.run();

        // imports wn-wikt\.* to memory and merge it with "wordnetDB"
        WnWikt.createNewSource();
    }
示例#2
0
文件: parser.cs 项目: reactxx/rewise
        // using https://www.nuget.org/packages/EntityFramework.BulkInsert-ef6-ext/
        public static void xmlToDBSecondPhase()
        {
            count = 0;
            var ctx      = new Context(false);
            var allNodes = xml2Objects(ctx).SelectMany(f => f).ToArray();

            foreach (var node in allNodes.OfType <LexicalEntry>())
            {
                node.finish(ctx);
            }

            var allDB = allNodes.SelectMany(n => n.createDB(ctx)).ToArray();

            using (var dbCtx = wordNetDB.Context.getContext(true)) {
                dbCtx.Ids.Add(new wordNetDB.Ids {
                    Text = File.ReadAllText(Context.root + "ids.txt")
                });                                                                            // ctx.ids.Values.Select(id => id.Split('=')).Select(p => p[0] + "=" + p[2]).Aggregate((r, i) => r + "\n" + i) });
                dbCtx.SaveChanges();
                Console.WriteLine("Ids inserted");
                var opt = new BulkInsertOptions()
                {
                    BulkCopyOptions = BulkCopyOptions.TableLock,
                    BatchSize       = 50000,
                };
                dbCtx.BulkInsert(allDB.OfType <wordNetDB.Lang>());
                Console.WriteLine("Lang inserted");
                dbCtx.BulkInsert(allDB.OfType <wordNetDB.Entry>());
                Console.WriteLine("Entry inserted");
                dbCtx.BulkInsert(allDB.OfType <wordNetDB.Synset>());
                Console.WriteLine("Synset inserted");
                dbCtx.BulkInsert(allDB.OfType <wordNetDB.Translation>());
                Console.WriteLine("Translation inserted");
                dbCtx.BulkInsert(allDB.OfType <wordNetDB.Relation>());
                Console.WriteLine("Relation inserted");
                dbCtx.BulkInsert(allDB.OfType <wordNetDB.Sense>());
                Console.WriteLine("Sense inserted");
                dbCtx.BulkInsert(allDB.OfType <wordNetDB.Example>());
                Console.WriteLine("Example inserted");
            }

            WnWikt.run();
        }