public static void TestSqlite(string wordNetDb, string wordsFile, string notFoundFile) { Console.WriteLine("Start"); var wordSet = new HashSet <string>(File.ReadAllLines(wordsFile)); // var notFoundList = new List<string>(); int found = 0; int morphs = 0; int notFound = 0; int cnt = 0; var start = DateTime.Now; using (var context = WordNetContext.GetContext(wordNetDb)) { foreach (var word in wordSet) { if (word.Trim() == "") { continue; } var search = Search.GetSearch(word, context); if (search.SynSets.Any()) { // Console.WriteLine($"+ {word}"); found++; } else if (search.MorphStrings?.Any() ?? false) { // Console.WriteLine($"* {word} -> {search.MorphStrings.First()}"); morphs++; } else { // Console.WriteLine($"- {word}"); // notFoundList.Add(word); notFound++; } if (cnt == 0) { // var perWord = (DateTime.Now - start).TotalMilliseconds / 1000; // Console.WriteLine($"init: {perWord:F2}s"); start = DateTime.Now; } cnt++; // if (cnt % 10000 == 0) // { // var perWord = (DateTime.Now - start).TotalMilliseconds / cnt; // Console.WriteLine($"#{cnt}: {perWord:F2} ms/word; Found+Morphs/Total: {(found+morphs)}/{notFoundList.Count}"); // } } } // File.WriteAllLines(notFoundFile, notFoundList); var perWord = (DateTime.Now - start).TotalMilliseconds / wordSet.Count * 1000; Console.WriteLine($"Total words {wordSet.Count} Found {found} Morphs {morphs} Not found {notFound}; Speed: {perWord:F2} ms/(1000 words)"); }
/// <summary> /// Convert method creates db file from wndb files /// dictpath - path to wndb data files /// context - dest db context /// </summary> public static void Convert(string dictPack, WordNetContext context) { WNDB wndb = new WNDB(dictPack); var synWords = new List <string>(); // int ind; var wordToLemma = new Dictionary <string, Lemma>(); var words = new Dictionary <string, Writing>(); var poses = (new [] { "n", "v", "a", "r" }).Select(s => PartOfSpeech.of(s)); foreach (var pos in poses) { Console.WriteLine("Process Data of {0}", pos.name); // ind = 0; foreach (var data in wndb.GetData(pos)) { if (data.pos != pos.symbol && !(data.pos == "s" && pos.symbol == "a")) //data.adj includes 'a' & 's' pos symbols { throw new Exception("pos!=data.pos"); } var synset = new SynSet { Pos = data.pos }; context.SynSets.Add(synset); synWords.Clear(); foreach (var oword in data.origWords) { Lemma lemma; string lcWord = oword.word.ToLower(); // add lemma if (!wordToLemma.TryGetValue(lcWord, out lemma)) { lemma = new Lemma { Value = lcWord, Poses = data.pos }; wordToLemma.Add(lcWord, lemma); context.Lemmas.Add(lemma); } else if (!lemma.Poses.Contains(data.pos)) { lemma.Poses += data.pos; } if (synWords.IndexOf(lcWord) < 0) { synWords.Add(lcWord); // add SynSet <-> Lemma relation context.SynsetLemmas.Add(new SynsetLemma { SynSet = synset, Lemma = lemma }); } // add original word if it differs from lemma Writing word; if (lcWord != oword.word) { if (!words.TryGetValue(oword.word, out word)) { word = new Writing { Value = oword.word, Lemma = lemma }; words.Add(oword.word, word); context.Writings.Add(word); } else if (word.Lemma != lemma) { Console.WriteLine("Word mix: {0} {1} {2}", oword.word, lemma.Value, word.Lemma.Value); continue; } } } synset.Definition = string.Join(";", data.definitions); synset.Example = string.Join(";", data.examples); // ind++; // if (ind % 1000 == 0) // ShowProgress(ind.ToString()); } Console.WriteLine("Save changes"); context.SaveChanges(); // exceptions //TODO: remove morphes, ... Console.WriteLine("Process Exceptions of {0}", pos.name); // ind = 0; foreach (var exwords in GetExceptions(wndb, pos)) { for (int i = 1; i < exwords.Length; i++) { if (exwords[i] == exwords[0]) { continue; } Lemma lemma; if (wordToLemma.TryGetValue(exwords[i], out lemma) || (exwords[i].Contains('-') && wordToLemma.TryGetValue(exwords[i].Replace('-', ' '), out lemma))) { context.Excepts.Add(new Except { Value = exwords[0], MainForm = exwords[i], Lemma = lemma }); } // else // { // Console.WriteLine("Lemma not found {0}", exwords[i]); // context.Excepts.Add(new Except { Value = exwords[0], MainForm = exwords[i] }); // } } // ind++; // if (ind % 1000 == 0) // ShowProgress(ind.ToString()); } Console.WriteLine("Save changes"); context.SaveChanges(); } //Console.WriteLine("Save changes"); context.SaveChanges(); }