示例#1
0
        public static int[] STASearchPhrase(PhraseSide phraseSide, string text, bool isDBStemming)
        {
            var ctx = new FulltextContext(); var lang = phraseSide.langOfText(); var txt = new PhraseWords(text);              //var dict = phraseSide.getDictId();

            txt.Idxs = StemmerBreaker.RunBreaker.STAWordBreak(lang, text);
            var           words = getCorrectPhraseWords(txt);
            List <string> res   = new List <string>();

            foreach (var w in words)
            {
                if (!StemmerBreaker.Lib.hasStemmer(lang))
                {
                    res.Add(w.ftxWord);                                                       //stemmer does not exists => and single word (same as in the StemmerBreaker.Runner.stemm: if (stemmer == null) { onPutWord(PutTypes.put, word); return; })
                }
                else
                {
                    var st = isDBStemming ? (IEnumerable <string>)StemmingWithSQLServer(lang, w.ftxWord) : StemmerBreaker.RunStemmer.STAStemm(lang, w.ftxWord);
                    //var st1 = StemmerBreaker.RunStemmer.STAStemm(lang, w.word);
                    //var st2 = DBStemming(lang, w.word);
                    res.AddRange(st);
                }
            }
            res = res.Distinct().ToList();
            var ids = ctx.PhraseWords.Where(w => w.SrcLang == (byte)phraseSide.src && w.DestLang == (byte)phraseSide.dest && res.Contains(w.Word)).Select(w => w.PhraseRef).Distinct().ToArray();

            return(ids);
        }
示例#2
0
        public static PhraseWords STABreakAndCheck(Langs lang, string phrase)
        {
            var newText = new PhraseWords(phrase);

            //WordBreaking text (text in brackets excluded)
            STAWordBreak(lang, newText);
            //Spell check
            var newWordIdx = getCorrectPhraseWords(newText);

            STASpellCheck(lang, newWordIdx, newText);             //low level spell check
            return(newText);
        }
示例#3
0
        public static PhraseWords STABreakAndCheck(Langs lang, string phrase)
        {
            if (string.IsNullOrEmpty(phrase))
            {
                return(null);
            }
            var newText = new PhraseWords(phrase);

            //WordBreaking text (text in brackets excluded)
            STAWordBreak(lang, newText);
            //Spell check
            STASpellCheck(lang, new SelectedWords(newText));             //low level spell check
            return(newText);
        }
示例#4
0
        //Pro SpellCheck error nastavi zaporne Len
        static void STASpellCheck(Langs lang, SelectedWord[] nws, PhraseWords newText)
        {
            //Spell check
            var errorIdxs = RunSpellCheckWords.STACheck(lang, nws);

            //update Len for wrong words
            if (errorIdxs != null)
            {
                foreach (var errIdx in errorIdxs)
                {
                    newText.Idxs[errIdx] = new TPosLen()
                    {
                        Pos = newText.Idxs[errIdx].Pos, Len = (sbyte)-newText.Idxs[errIdx].Len
                    }
                }
            }
            ;
        }
示例#5
0
        //Doplni text.Idxs
        static void STAWordBreak(Langs lang, PhraseWords text)
        {
            var noBrackets = roundBrackets.Replace(otherBrackets.Replace(text.Text, match => new String(' ', match.Length)), match => new String(' ', match.Length));

            text.Idxs = StemmerBreaker.RunBreaker.STAWordBreak(lang, noBrackets);
        }
示例#6
0
        public static Phrase STAInsert(FulltextContext ctx, string newPhraseText /*NullOrEmpty => delete, else update or insert*/, int?phraseId /*==null => insert else update or delete*/, Dict dict /*my dict for insert*/, PhraseSide?phraseSide /*for Insert: dict and its side, e.g. czech part of English-Czech dict*/, int?srcSideId /*for inserting Destination side*/)
        {
            if (string.IsNullOrEmpty(newPhraseText))               //DELETE
            {
                if (phraseId == null || phraseSide != null || dict != null)
                {
                    throw new Exception("phraseId == null || phraseSide!=null || dict!=null");
                }
                var delPh = ctx.Phrases.Include(p => p.Dests).First(p => p.Id == phraseId);

                ctx.Phrases.RemoveRange(delPh.Dests);
                ctx.Phrases.Remove(delPh);
                return(null);
            }

            Phrase dbPhrase; PhraseWords oldPhrase = null; PhraseSide ps;

            if (phraseId != null)               //UPDATE
            {
                if (srcSideId != null || dict != null)
                {
                    throw new Exception("srcSideId != null || dict!=null");
                }
                dbPhrase  = ctx.Phrases.Include(p => p.Words).First(p => p.Id == phraseId);
                oldPhrase = new PhraseWords(dbPhrase.Text, TPosLen.fromBytes(dbPhrase.TextIdxs));
                ps        = new PhraseSide {
                    src = (Langs)dbPhrase.Dict.SrcLang, dest = (Langs)dbPhrase.DestLang
                };
            }
            else                 //INSERT
            {
                if (phraseSide == null || dict == null)
                {
                    throw new Exception("phraseSide == null || dict == null");
                }
                ps = (PhraseSide)phraseSide;
                if (ps.src != ps.dest && srcSideId == null)
                {
                    throw new Exception("ps.src!=ps.dest && srcSideId == null");
                }
                ctx.Phrases.Add(dbPhrase = new Phrase {
                    SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, SrcRef = srcSideId, Dict = dict
                });
            }

            var lang = ps.langOfText(); var newPhrase = new PhraseWords(newPhraseText);

            Action <SelectedWord[]> spellCheckAndDBInsert = wordIdxs => {
                STASpellCheck(lang, wordIdxs, newPhrase);                 //low level spell check
                for (var i = 0; i < wordIdxs.Length; i++)
                {
                    if (newPhrase.Idxs[wordIdxs[i].idx].Len > 0)                                                       //new correct words to fulltext DB
                    {
                        ctx.PhraseWords.Add(new PhraseWord()
                        {
                            SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, Word = wordIdxs[i].ftxWord, Phrase = dbPhrase
                        });
                    }
                }
            };

            //Word breaking
            STAWordBreak(lang, newPhrase);

            var newWordIdx = getCorrectPhraseWords(newPhrase);

            if (oldPhrase == null)                 //INSERT
            {
                spellCheckAndDBInsert(newWordIdx); //Spell check and add to DB
            }
            else                                   //UPDATE

            //Delete olds from DB
            {
                var oldWordIdx = getCorrectPhraseWords(oldPhrase);
                var dbOldWords = dbPhrase.Words;
                foreach (var w in oldWordIdx.Except(newWordIdx))
                {
                    ctx.PhraseWords.Remove(dbOldWords.First(db => db.Word == w.ftxWord));
                }

                //Spell check and add to DB
                var newWithoutOldWordIdx = newWordIdx.Except(oldWordIdx).ToArray();
                spellCheckAndDBInsert(newWithoutOldWordIdx);
            }

            dbPhrase.Text     = newPhrase.Text;
            dbPhrase.TextIdxs = TPosLen.toBytes(newPhrase.Idxs);
            dbPhrase.Base     = newPhrase.Idxs.Select(idx => newPhrase.Text.Substring(idx.Pos, Math.Abs(idx.Len)).ToLower()).DefaultIfEmpty().Aggregate((r, i) => r + "|" + i);
            if (string.IsNullOrEmpty(dbPhrase.Base))
            {
                dbPhrase.Base = "";                                                  //error
            }
            if (dbPhrase.Base.Length > Phrase.maxPhraseBaseLen)
            {
                dbPhrase.Base = dbPhrase.Base.Substring(0, Phrase.maxPhraseBaseLen);
            }

            return(dbPhrase);
        }
示例#7
0
 //Doplni text.Idxs pro text, zbavený závorek
 public static void STAWordBreak(Langs lang, PhraseWords text)
 {
     text.Idxs = StemmerBreaker.RunBreaker.STAWordBreak(lang, noBrackets(text.Text));
 }