Exemplo n.º 1
0
        public static string[] StemmingWithSQLServer(Langs lang, string phrase)
        {
            var ctx = new FulltextContext();
            var sql = string.Format("SELECT display_term FROM sys.dm_fts_parser('FormsOf(INFLECTIONAL, \"{0}\")', {1}, 0, 1)", phrase.Replace("'", "''") /*https://stackoverflow.com/questions/5528972/how-do-i-convert-a-string-into-safe-sql-string*/, Metas.lang2LCID(lang));

            return(ctx.Set <dm_fts_parser>().FromSql(sql).Select(p => p.display_term).ToArray());
        }
Exemplo n.º 2
0
        public static int[] STASearchPhrase(PhraseSide phraseSide, string text, bool isDBStemming)
        {
            var ctx = new FulltextContext(); var lang = phraseSide.langOfText(); var txt = new PhraseWords(text);              //var dict = phraseSide.getDictId();

            txt.Idxs = StemmerBreaker.RunBreaker.STAWordBreak(lang, text);
            var           words = getCorrectPhraseWords(txt);
            List <string> res   = new List <string>();

            foreach (var w in words)
            {
                if (!StemmerBreaker.Lib.hasStemmer(lang))
                {
                    res.Add(w.ftxWord);                                                       //stemmer does not exists => and single word (same as in the StemmerBreaker.Runner.stemm: if (stemmer == null) { onPutWord(PutTypes.put, word); return; })
                }
                else
                {
                    var st = isDBStemming ? (IEnumerable <string>)StemmingWithSQLServer(lang, w.ftxWord) : StemmerBreaker.RunStemmer.STAStemm(lang, w.ftxWord);
                    //var st1 = StemmerBreaker.RunStemmer.STAStemm(lang, w.word);
                    //var st2 = DBStemming(lang, w.word);
                    res.AddRange(st);
                }
            }
            res = res.Distinct().ToList();
            var ids = ctx.PhraseWords.Where(w => w.SrcLang == (byte)phraseSide.src && w.DestLang == (byte)phraseSide.dest && res.Contains(w.Word)).Select(w => w.PhraseRef).Distinct().ToArray();

            return(ids);
        }
Exemplo n.º 3
0
        public static async void test()
        {
            var ctx = new FulltextContext();

            //ctx.recreate();
            ctx.Database.ExecuteSqlCommand("delete Dicts");
            Dict dict;

            ctx.Dicts.Add(dict = new Dict {
                Name = "Dict 1"
            });
            for (var idx = 0; idx < 100; idx++)
            {
                var engPhrase = await Insert(ctx, "Now are you?", null, dict, new PhraseSide { src = Langs.en_gb, dest = Langs.en_gb });

                var phrase = await Insert(ctx, "Ahoj, jak se máš?", null, dict, new PhraseSide { src = Langs.en_gb, dest = Langs.cs_cz }, engPhrase.Id);

                ctx.SaveChanges();
                var search = await SearchPhrase(new PhraseSide { src = Langs.en_gb, dest = Langs.cs_cz }, "měj", true);

                search = await SearchPhrase(new PhraseSide { src = Langs.en_gb, dest = Langs.cs_cz }, "měj", false);
                await Insert(ctx, "Ahoj, jak se máš? Asi dobře Kadle.", phrase.Id, null, null);
                await Insert(ctx, "Asi dobře, Karle.", phrase.Id, null, null);
                await Insert(ctx, null, engPhrase.Id, null, null);

                ctx.SaveChanges();
                //await Insert(null, phrase.Id, null);
            }

            return;

            //Parallel.ForEach(Enumerable.Range(0, 1000), async idx => {
            //	var res = await RunSpellCheck.Check(Langs.de_de, new string[] {
            //			"einem", "Pferd", "die", "Sporen", "geben"
            //			//"einem", "pferd", "die", "sporen", "geben"
            //			//"Einem Pferd die Sporen geben."
            //			//"Klassenbuch"
            //		});
            //	res = null;
            //});


            //Parallel.ForEach(Enumerable.Range(0, 1000), async idx => {
            //	var s = "yyy"; for (var i = 0; i < idx; i++) s += "x";
            //	var res = await RunSpellCheckWords.Check(Langs.cs_cz, new string[] {
            //			//"einem", "Pferd", "die", "Sporen", "geben", s
            //			s, "ahoj", "jak", "se", "máš"
            //			//"einem", "pferd", "die", "sporen", "geben"
            //			//"Einem Pferd die Sporen geben."
            //			//"Klassenbuch"
            //		});
            //	if (idx >= 999)
            //		res = null;
            //});

            return;

            //public static async LangsLib.PhraseWord[] spellCheckedWordBreak(Langs lang, string text) {

            //for (var idx = 0; idx < 1000; idx++) {
            //	var res = await SpellChecker.SpellLang.Check(Langs.de_de, new string[] { });
            //}
            //var stemmer = new StemmerBreaker.Runner(lang);
            //var words = stemmer.wordBreak(text);
            //var errors = await RunSpellCheck.Check(lang, words.Select(idx => text.Substring(idx.srcPos, idx.srcLen))) as SpellLangResult;
            //if (errors != null) { };
            //return new LangsLib.PhraseWord[0];


            Langs[] langs = new Langs[] { Langs.cs_cz, Langs.de_de, Langs.ru_ru, Langs.pt_pt, Langs.sk_sk, Langs.fr_fr, Langs.it_it, Langs.es_es };
            //for (var idx = 0; idx < 1000; idx++) {
            //	var s = ""; for (var i = 0; i < idx; i++) s += "x";
            //	var res = await SpellChecker.SpellLang.Check(langs[idx % 8], new string[] {
            //		//s + " ahoj" + " jak" + " se" + " máš"
            //		s, "ahoj", "jak", "se", "máš"
            //	});
            //}
            //Parallel.ForEach(Enumerable.Range(0, 1000), async idx => {
            //	var s = ""; for (var i = 0; i < idx; i++) s += "x";
            //	var res = await SpellChecker.RunSpellCheck.Check(langs[idx % 8], new string[] {
            //		//s + " ahoj" + " jak" + " se" + " máš"
            //		s, "ahoj", "jak", "se", "máš"
            //	});
            //});
            //return null;
            //SpellChecker.RunSpellCheck.Check(lang, new string[] { null });
            //spellRes = SpellChecker.SpellLang.Check(lang, text);
            //using (var rn = new StemmerBreaker.Runner(lang))
            //	rn.wordBreak(text);
            //spellRes = SpellChecker.SpellLang.Check(lang, text);
            //using (var rn = new StemmerBreaker.Runner(lang))
            //	rn.wordBreak(text);
            //spellRes = SpellChecker.SpellLang.Check(lang, text);
            //IEnumerable<StemmerBreaker.Put> stBrRes = stBr.wordBreak(text);
            //if (spellRes != null) stBrRes = stBrRes.Where(wb => spellRes.All(br => br.pos!= wb.srcPos));
            //var words = stBrRes.Select(wb => text.Substring(wb.srcPos, wb.srcLen)).ToArray();
            //return null;
            //public static string[] toStrings(string text, List<Put> idxs)
            //{
            //	return idxs.Select(idx => text.Substring(idx.srcPos, idx.srcLen)).ToArray();
            //}
        }
Exemplo n.º 4
0
 public static Task <Phrase> Insert(FulltextContext ctx, string newWords, int?phraseId, Dict dict, PhraseSide?phraseSide, int?srcSideId = null)
 {
     return(Lib.Run(new RunInsertPhrase(ctx, newWords, phraseId, dict, phraseSide, srcSideId)));
 }
Exemplo n.º 5
0
 public RunInsertPhrase(FulltextContext ctx, string newWords, int?phraseId, Dict dict, PhraseSide?phraseSide, int?srcSideId)
 {
     this.phraseId = phraseId; this.phraseSide = phraseSide; this.newWords = newWords; this.srcSideId = srcSideId; this.ctx = ctx; this.dict = dict;
 }
Exemplo n.º 6
0
        public static Phrase STAInsert(FulltextContext ctx, string newPhraseText /*NullOrEmpty => delete, else update or insert*/, int?phraseId /*==null => insert else update or delete*/, Dict dict /*my dict for insert*/, PhraseSide?phraseSide /*for Insert: dict and its side, e.g. czech part of English-Czech dict*/, int?srcSideId /*for inserting Destination side*/)
        {
            if (string.IsNullOrEmpty(newPhraseText))               //DELETE
            {
                if (phraseId == null || phraseSide != null || dict != null)
                {
                    throw new Exception("phraseId == null || phraseSide!=null || dict!=null");
                }
                var delPh = ctx.Phrases.Include(p => p.Dests).First(p => p.Id == phraseId);

                ctx.Phrases.RemoveRange(delPh.Dests);
                ctx.Phrases.Remove(delPh);
                return(null);
            }

            Phrase dbPhrase; PhraseWords oldPhrase = null; PhraseSide ps;

            if (phraseId != null)               //UPDATE
            {
                if (srcSideId != null || dict != null)
                {
                    throw new Exception("srcSideId != null || dict!=null");
                }
                dbPhrase  = ctx.Phrases.Include(p => p.Words).First(p => p.Id == phraseId);
                oldPhrase = new PhraseWords(dbPhrase.Text, TPosLen.fromBytes(dbPhrase.TextIdxs));
                ps        = new PhraseSide {
                    src = (Langs)dbPhrase.Dict.SrcLang, dest = (Langs)dbPhrase.DestLang
                };
            }
            else                 //INSERT
            {
                if (phraseSide == null || dict == null)
                {
                    throw new Exception("phraseSide == null || dict == null");
                }
                ps = (PhraseSide)phraseSide;
                if (ps.src != ps.dest && srcSideId == null)
                {
                    throw new Exception("ps.src!=ps.dest && srcSideId == null");
                }
                ctx.Phrases.Add(dbPhrase = new Phrase {
                    SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, SrcRef = srcSideId, Dict = dict
                });
            }

            var lang = ps.langOfText(); var newPhrase = new PhraseWords(newPhraseText);

            Action <SelectedWord[]> spellCheckAndDBInsert = wordIdxs => {
                STASpellCheck(lang, wordIdxs, newPhrase);                 //low level spell check
                for (var i = 0; i < wordIdxs.Length; i++)
                {
                    if (newPhrase.Idxs[wordIdxs[i].idx].Len > 0)                                                       //new correct words to fulltext DB
                    {
                        ctx.PhraseWords.Add(new PhraseWord()
                        {
                            SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, Word = wordIdxs[i].ftxWord, Phrase = dbPhrase
                        });
                    }
                }
            };

            //Word breaking
            STAWordBreak(lang, newPhrase);

            var newWordIdx = getCorrectPhraseWords(newPhrase);

            if (oldPhrase == null)                 //INSERT
            {
                spellCheckAndDBInsert(newWordIdx); //Spell check and add to DB
            }
            else                                   //UPDATE

            //Delete olds from DB
            {
                var oldWordIdx = getCorrectPhraseWords(oldPhrase);
                var dbOldWords = dbPhrase.Words;
                foreach (var w in oldWordIdx.Except(newWordIdx))
                {
                    ctx.PhraseWords.Remove(dbOldWords.First(db => db.Word == w.ftxWord));
                }

                //Spell check and add to DB
                var newWithoutOldWordIdx = newWordIdx.Except(oldWordIdx).ToArray();
                spellCheckAndDBInsert(newWithoutOldWordIdx);
            }

            dbPhrase.Text     = newPhrase.Text;
            dbPhrase.TextIdxs = TPosLen.toBytes(newPhrase.Idxs);
            dbPhrase.Base     = newPhrase.Idxs.Select(idx => newPhrase.Text.Substring(idx.Pos, Math.Abs(idx.Len)).ToLower()).DefaultIfEmpty().Aggregate((r, i) => r + "|" + i);
            if (string.IsNullOrEmpty(dbPhrase.Base))
            {
                dbPhrase.Base = "";                                                  //error
            }
            if (dbPhrase.Base.Length > Phrase.maxPhraseBaseLen)
            {
                dbPhrase.Base = dbPhrase.Base.Substring(0, Phrase.maxPhraseBaseLen);
            }

            return(dbPhrase);
        }