public static (IEnumerable <JMDictEntry> entry, string word) GreedyLookup(this JMDict jmDict, IEnumerable <string> request, int backOffCountStart = 5) { int backOffCount = backOffCountStart; IEnumerable <JMDictEntry> found = null; string foundWord = null; string concatenatedWord = ""; foreach (var word in request) { concatenatedWord += word; var newEntry = jmDict.Lookup(concatenatedWord); if (newEntry == null) { backOffCount--; if (backOffCount == 0) { break; } } else { found = newEntry; foundWord = concatenatedWord; backOffCount = backOffCountStart; } } return(found, foundWord); }
public void SetUp() { var baseDir = Tagger.baseDir; var kanjidict = JDict.KanjiDict.Create(Path.Combine(baseDir, @"character\kanjidic2.xml.gz")); var kradfile = new JDict.Kradfile(Path.Combine(baseDir, @"character\kradfile1_plus_2_utf8"), Encoding.UTF8); var radkfile = new Radkfile(Path.Combine(baseDir, @"character\radkfile1_plus_2_utf8"), Encoding.UTF8); var kanaProperties = new KanaProperties( Path.Combine(baseDir, @"character\hiragana_romaji.txt"), Path.Combine(baseDir, @"character\katakana_romaji.txt"), Path.Combine(baseDir, @"character\hiragana_katakana.txt"), Path.Combine(baseDir, @"character\kana_related.txt"), Encoding.UTF8); this.lang = new LanguageService( new MeCabIpadic(new MeCabParam { DicDir = Path.Combine(baseDir, @"mecab\ipadic"), }), EasilyConfusedKana.FromFile(Path.Combine(baseDir, @"character\confused.txt")), kradfile, radkfile, kanjidict, kanaProperties); this.jmdict = JDict.JMDict.Create(Path.Combine(baseDir, "dictionaries", "JMdict_e.gz"), Path.Combine(baseDir, "dictionaries", "JMdict_e.cache")); glosser = new AutoGlosser(lang, jmdict); }
public AutoGlosser(ILanguageService lang, JMDict dict) { this.lang = lang; this.dict = dict; }
public VerbConjugationDataSource(JMDict jdict) { this.jdict = jdict; }
public JMDictDataSource(JMDict jdict) { this.jdict = jdict; }
public AutoGlosserDataSource(ILanguageService lang, JMDict jdict) { this.autoglosser = new AutoGlosser(lang, jdict); }
public PartialWordLookupJMDictDataSource(JMDict jmdict, FrequencyList list) { this.jmdict = jmdict; this.list = list; }