public static int[] STASearchPhrase(PhraseSide phraseSide, string text, bool isDBStemming) { var ctx = new FulltextContext(); var lang = phraseSide.langOfText(); var txt = new PhraseWords(text); //var dict = phraseSide.getDictId(); txt.Idxs = StemmerBreaker.RunBreaker.STAWordBreak(lang, text); var words = getCorrectPhraseWords(txt); List <string> res = new List <string>(); foreach (var w in words) { if (!StemmerBreaker.Lib.hasStemmer(lang)) { res.Add(w.ftxWord); //stemmer does not exists => and single word (same as in the StemmerBreaker.Runner.stemm: if (stemmer == null) { onPutWord(PutTypes.put, word); return; }) } else { var st = isDBStemming ? (IEnumerable <string>)StemmingWithSQLServer(lang, w.ftxWord) : StemmerBreaker.RunStemmer.STAStemm(lang, w.ftxWord); //var st1 = StemmerBreaker.RunStemmer.STAStemm(lang, w.word); //var st2 = DBStemming(lang, w.word); res.AddRange(st); } } res = res.Distinct().ToList(); var ids = ctx.PhraseWords.Where(w => w.SrcLang == (byte)phraseSide.src && w.DestLang == (byte)phraseSide.dest && res.Contains(w.Word)).Select(w => w.PhraseRef).Distinct().ToArray(); return(ids); }
public static PhraseWords STABreakAndCheck(Langs lang, string phrase) { var newText = new PhraseWords(phrase); //WordBreaking text (text in brackets excluded) STAWordBreak(lang, newText); //Spell check var newWordIdx = getCorrectPhraseWords(newText); STASpellCheck(lang, newWordIdx, newText); //low level spell check return(newText); }
public static PhraseWords STABreakAndCheck(Langs lang, string phrase) { if (string.IsNullOrEmpty(phrase)) { return(null); } var newText = new PhraseWords(phrase); //WordBreaking text (text in brackets excluded) STAWordBreak(lang, newText); //Spell check STASpellCheck(lang, new SelectedWords(newText)); //low level spell check return(newText); }
//Pro SpellCheck error nastavi zaporne Len static void STASpellCheck(Langs lang, SelectedWord[] nws, PhraseWords newText) { //Spell check var errorIdxs = RunSpellCheckWords.STACheck(lang, nws); //update Len for wrong words if (errorIdxs != null) { foreach (var errIdx in errorIdxs) { newText.Idxs[errIdx] = new TPosLen() { Pos = newText.Idxs[errIdx].Pos, Len = (sbyte)-newText.Idxs[errIdx].Len } } } ; }
//Doplni text.Idxs static void STAWordBreak(Langs lang, PhraseWords text) { var noBrackets = roundBrackets.Replace(otherBrackets.Replace(text.Text, match => new String(' ', match.Length)), match => new String(' ', match.Length)); text.Idxs = StemmerBreaker.RunBreaker.STAWordBreak(lang, noBrackets); }
public static Phrase STAInsert(FulltextContext ctx, string newPhraseText /*NullOrEmpty => delete, else update or insert*/, int?phraseId /*==null => insert else update or delete*/, Dict dict /*my dict for insert*/, PhraseSide?phraseSide /*for Insert: dict and its side, e.g. czech part of English-Czech dict*/, int?srcSideId /*for inserting Destination side*/) { if (string.IsNullOrEmpty(newPhraseText)) //DELETE { if (phraseId == null || phraseSide != null || dict != null) { throw new Exception("phraseId == null || phraseSide!=null || dict!=null"); } var delPh = ctx.Phrases.Include(p => p.Dests).First(p => p.Id == phraseId); ctx.Phrases.RemoveRange(delPh.Dests); ctx.Phrases.Remove(delPh); return(null); } Phrase dbPhrase; PhraseWords oldPhrase = null; PhraseSide ps; if (phraseId != null) //UPDATE { if (srcSideId != null || dict != null) { throw new Exception("srcSideId != null || dict!=null"); } dbPhrase = ctx.Phrases.Include(p => p.Words).First(p => p.Id == phraseId); oldPhrase = new PhraseWords(dbPhrase.Text, TPosLen.fromBytes(dbPhrase.TextIdxs)); ps = new PhraseSide { src = (Langs)dbPhrase.Dict.SrcLang, dest = (Langs)dbPhrase.DestLang }; } else //INSERT { if (phraseSide == null || dict == null) { throw new Exception("phraseSide == null || dict == null"); } ps = (PhraseSide)phraseSide; if (ps.src != ps.dest && srcSideId == null) { throw new Exception("ps.src!=ps.dest && srcSideId == null"); } ctx.Phrases.Add(dbPhrase = new Phrase { SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, SrcRef = srcSideId, Dict = dict }); } var lang = ps.langOfText(); var newPhrase = new PhraseWords(newPhraseText); Action <SelectedWord[]> spellCheckAndDBInsert = wordIdxs => { STASpellCheck(lang, wordIdxs, newPhrase); //low level spell check for (var i = 0; i < wordIdxs.Length; i++) { if (newPhrase.Idxs[wordIdxs[i].idx].Len > 0) //new correct words to fulltext DB { ctx.PhraseWords.Add(new PhraseWord() { SrcLang = (byte)ps.src, DestLang = (byte)ps.dest, Word = wordIdxs[i].ftxWord, Phrase = dbPhrase }); } } }; //Word breaking STAWordBreak(lang, newPhrase); var newWordIdx = getCorrectPhraseWords(newPhrase); if (oldPhrase == null) //INSERT { spellCheckAndDBInsert(newWordIdx); //Spell check and add to DB } else //UPDATE //Delete olds from DB { var oldWordIdx = getCorrectPhraseWords(oldPhrase); var dbOldWords = dbPhrase.Words; foreach (var w in oldWordIdx.Except(newWordIdx)) { ctx.PhraseWords.Remove(dbOldWords.First(db => db.Word == w.ftxWord)); } //Spell check and add to DB var newWithoutOldWordIdx = newWordIdx.Except(oldWordIdx).ToArray(); spellCheckAndDBInsert(newWithoutOldWordIdx); } dbPhrase.Text = newPhrase.Text; dbPhrase.TextIdxs = TPosLen.toBytes(newPhrase.Idxs); dbPhrase.Base = newPhrase.Idxs.Select(idx => newPhrase.Text.Substring(idx.Pos, Math.Abs(idx.Len)).ToLower()).DefaultIfEmpty().Aggregate((r, i) => r + "|" + i); if (string.IsNullOrEmpty(dbPhrase.Base)) { dbPhrase.Base = ""; //error } if (dbPhrase.Base.Length > Phrase.maxPhraseBaseLen) { dbPhrase.Base = dbPhrase.Base.Substring(0, Phrase.maxPhraseBaseLen); } return(dbPhrase); }
//Doplni text.Idxs pro text, zbavený závorek public static void STAWordBreak(Langs lang, PhraseWords text) { text.Idxs = StemmerBreaker.RunBreaker.STAWordBreak(lang, noBrackets(text.Text)); }