public static async void test() { var ctx = new FulltextContext(); //ctx.recreate(); ctx.Database.ExecuteSqlCommand("delete Dicts"); Dict dict; ctx.Dicts.Add(dict = new Dict { Name = "Dict 1" }); for (var idx = 0; idx < 100; idx++) { var engPhrase = await Insert(ctx, "Now are you?", null, dict, new PhraseSide { src = Langs.en_gb, dest = Langs.en_gb }); var phrase = await Insert(ctx, "Ahoj, jak se máš?", null, dict, new PhraseSide { src = Langs.en_gb, dest = Langs.cs_cz }, engPhrase.Id); ctx.SaveChanges(); var search = await SearchPhrase(new PhraseSide { src = Langs.en_gb, dest = Langs.cs_cz }, "měj", true); search = await SearchPhrase(new PhraseSide { src = Langs.en_gb, dest = Langs.cs_cz }, "měj", false); await Insert(ctx, "Ahoj, jak se máš? Asi dobře Kadle.", phrase.Id, null, null); await Insert(ctx, "Asi dobře, Karle.", phrase.Id, null, null); await Insert(ctx, null, engPhrase.Id, null, null); ctx.SaveChanges(); //await Insert(null, phrase.Id, null); } return; //Parallel.ForEach(Enumerable.Range(0, 1000), async idx => { // var res = await RunSpellCheck.Check(Langs.de_de, new string[] { // "einem", "Pferd", "die", "Sporen", "geben" // //"einem", "pferd", "die", "sporen", "geben" // //"Einem Pferd die Sporen geben." // //"Klassenbuch" // }); // res = null; //}); //Parallel.ForEach(Enumerable.Range(0, 1000), async idx => { // var s = "yyy"; for (var i = 0; i < idx; i++) s += "x"; // var res = await RunSpellCheckWords.Check(Langs.cs_cz, new string[] { // //"einem", "Pferd", "die", "Sporen", "geben", s // s, "ahoj", "jak", "se", "máš" // //"einem", "pferd", "die", "sporen", "geben" // //"Einem Pferd die Sporen geben." // //"Klassenbuch" // }); // if (idx >= 999) // res = null; //}); return; //public static async LangsLib.PhraseWord[] spellCheckedWordBreak(Langs lang, string text) { //for (var idx = 0; idx < 1000; idx++) { // var res = await SpellChecker.SpellLang.Check(Langs.de_de, new string[] { }); //} //var stemmer = new StemmerBreaker.Runner(lang); //var words = stemmer.wordBreak(text); //var errors = await RunSpellCheck.Check(lang, words.Select(idx => text.Substring(idx.srcPos, idx.srcLen))) as SpellLangResult; //if (errors != null) { }; //return new LangsLib.PhraseWord[0]; Langs[] langs = new Langs[] { Langs.cs_cz, Langs.de_de, Langs.ru_ru, Langs.pt_pt, Langs.sk_sk, Langs.fr_fr, Langs.it_it, Langs.es_es }; //for (var idx = 0; idx < 1000; idx++) { // var s = ""; for (var i = 0; i < idx; i++) s += "x"; // var res = await SpellChecker.SpellLang.Check(langs[idx % 8], new string[] { // //s + " ahoj" + " jak" + " se" + " máš" // s, "ahoj", "jak", "se", "máš" // }); //} //Parallel.ForEach(Enumerable.Range(0, 1000), async idx => { // var s = ""; for (var i = 0; i < idx; i++) s += "x"; // var res = await SpellChecker.RunSpellCheck.Check(langs[idx % 8], new string[] { // //s + " ahoj" + " jak" + " se" + " máš" // s, "ahoj", "jak", "se", "máš" // }); //}); //return null; //SpellChecker.RunSpellCheck.Check(lang, new string[] { null }); //spellRes = SpellChecker.SpellLang.Check(lang, text); //using (var rn = new StemmerBreaker.Runner(lang)) // rn.wordBreak(text); //spellRes = SpellChecker.SpellLang.Check(lang, text); //using (var rn = new StemmerBreaker.Runner(lang)) // rn.wordBreak(text); //spellRes = SpellChecker.SpellLang.Check(lang, text); //IEnumerable<StemmerBreaker.Put> stBrRes = stBr.wordBreak(text); //if (spellRes != null) stBrRes = stBrRes.Where(wb => spellRes.All(br => br.pos!= wb.srcPos)); //var words = stBrRes.Select(wb => text.Substring(wb.srcPos, wb.srcLen)).ToArray(); //return null; //public static string[] toStrings(string text, List<Put> idxs) //{ // return idxs.Select(idx => text.Substring(idx.srcPos, idx.srcLen)).ToArray(); //} }
public RunInsertPhrase(FulltextContext ctx, string newWords, int?phraseId, Dict dict, PhraseSide?phraseSide, int?srcSideId) { this.phraseId = phraseId; this.phraseSide = phraseSide; this.newWords = newWords; this.srcSideId = srcSideId; this.ctx = ctx; this.dict = dict; }
public static Task <Phrase> Insert(FulltextContext ctx, string newWords, int?phraseId, Dict dict, PhraseSide?phraseSide, int?srcSideId = null) { return(Lib.Run(new RunInsertPhrase(ctx, newWords, phraseId, dict, phraseSide, srcSideId))); }
public static Phrase STAInsert(FulltextContext ctx, string newPhraseText /*NullOrEmpty => delete, else update or insert*/, int?phraseId /*==null => insert else update or delete*/, Dict dict /*my dict for insert*/, PhraseSide?phraseSide /*for Insert: dict and its side, e.g. czech part of English-Czech dict*/, int?srcSideId /*for inserting Destination side*/) { if (string.IsNullOrEmpty(newPhraseText)) //DELETE { if (phraseId == null || phraseSide != null || dict != null) { throw new Exception("phraseId == null || phraseSide!=null || dict!=null"); } var delPh = ctx.Phrases.Include(p => p.Dests).First(p => p.Id == phraseId); ctx.Phrases.RemoveRange(delPh.Dests); ctx.Phrases.Remove(delPh); return(null); } Phrase dbPhrase; PhraseWords oldPhrase = null; PhraseSide ps; if (phraseId != null) //UPDATE { if (srcSideId != null || dict != null) { throw new Exception("srcSideId != null || dict!=null"); } dbPhrase = ctx.Phrases.Include(p => p.Words).First(p => p.Id == phraseId); oldPhrase = new PhraseWords(dbPhrase.Text, TPosLen.fromBytes(dbPhrase.TextIdxs)); ps = new PhraseSide { src = (Langs)dbPhrase.Dict.SrcLang, dest = (Langs)dbPhrase.DestLang }; } else //INSERT { if (phraseSide == null || dict == null) { throw new Exception("phraseSide == null || dict == null"); } ps = (PhraseSide)phraseSide; if (ps.src != ps.dest && srcSideId == null) { throw new Exception("ps.src!=ps.dest && srcSideId == null"); } ctx.Phrases.Add(dbPhrase = new Phrase { SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, SrcRef = srcSideId, Dict = dict }); } var lang = ps.langOfText(); var newPhrase = new PhraseWords(newPhraseText); Action <SelectedWord[]> spellCheckAndDBInsert = wordIdxs => { STASpellCheck(lang, wordIdxs, newPhrase); //low level spell check for (var i = 0; i < wordIdxs.Length; i++) { if (newPhrase.Idxs[wordIdxs[i].idx].Len > 0) //new correct words to fulltext DB { ctx.PhraseWords.Add(new PhraseWord() { SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, Word = wordIdxs[i].ftxWord, Phrase = dbPhrase }); } } }; //Word breaking STAWordBreak(lang, newPhrase); var newWordIdx = getCorrectPhraseWords(newPhrase); if (oldPhrase == null) //INSERT { spellCheckAndDBInsert(newWordIdx); //Spell check and add to DB } else //UPDATE //Delete olds from DB { var oldWordIdx = getCorrectPhraseWords(oldPhrase); var dbOldWords = dbPhrase.Words; foreach (var w in oldWordIdx.Except(newWordIdx)) { ctx.PhraseWords.Remove(dbOldWords.First(db => db.Word == w.ftxWord)); } //Spell check and add to DB var newWithoutOldWordIdx = newWordIdx.Except(oldWordIdx).ToArray(); spellCheckAndDBInsert(newWithoutOldWordIdx); } dbPhrase.Text = newPhrase.Text; dbPhrase.TextIdxs = TPosLen.toBytes(newPhrase.Idxs); dbPhrase.Base = newPhrase.Idxs.Select(idx => newPhrase.Text.Substring(idx.Pos, Math.Abs(idx.Len)).ToLower()).DefaultIfEmpty().Aggregate((r, i) => r + "|" + i); if (string.IsNullOrEmpty(dbPhrase.Base)) { dbPhrase.Base = ""; //error } if (dbPhrase.Base.Length > Phrase.maxPhraseBaseLen) { dbPhrase.Base = dbPhrase.Base.Substring(0, Phrase.maxPhraseBaseLen); } return(dbPhrase); }