Ejemplo n.º 1
0
        public static (IEnumerable <JMDictEntry> entry, string word) GreedyLookup(this JMDict jmDict, IEnumerable <string> request, int backOffCountStart = 5)
        {
            int backOffCount = backOffCountStart;
            IEnumerable <JMDictEntry> found = null;
            string foundWord        = null;
            string concatenatedWord = "";

            foreach (var word in request)
            {
                concatenatedWord += word;
                var newEntry = jmDict.Lookup(concatenatedWord);
                if (newEntry == null)
                {
                    backOffCount--;
                    if (backOffCount == 0)
                    {
                        break;
                    }
                }
                else
                {
                    found        = newEntry;
                    foundWord    = concatenatedWord;
                    backOffCount = backOffCountStart;
                }
            }

            return(found, foundWord);
        }
        public void SetUp()
        {
            var baseDir        = Tagger.baseDir;
            var kanjidict      = JDict.KanjiDict.Create(Path.Combine(baseDir, @"character\kanjidic2.xml.gz"));
            var kradfile       = new JDict.Kradfile(Path.Combine(baseDir, @"character\kradfile1_plus_2_utf8"), Encoding.UTF8);
            var radkfile       = new Radkfile(Path.Combine(baseDir, @"character\radkfile1_plus_2_utf8"), Encoding.UTF8);
            var kanaProperties = new KanaProperties(
                Path.Combine(baseDir, @"character\hiragana_romaji.txt"),
                Path.Combine(baseDir, @"character\katakana_romaji.txt"),
                Path.Combine(baseDir, @"character\hiragana_katakana.txt"),
                Path.Combine(baseDir, @"character\kana_related.txt"),
                Encoding.UTF8);

            this.lang = new LanguageService(
                new MeCabIpadic(new MeCabParam
            {
                DicDir = Path.Combine(baseDir, @"mecab\ipadic"),
            }),
                EasilyConfusedKana.FromFile(Path.Combine(baseDir, @"character\confused.txt")),
                kradfile,
                radkfile,
                kanjidict,
                kanaProperties);
            this.jmdict = JDict.JMDict.Create(Path.Combine(baseDir, "dictionaries", "JMdict_e.gz"), Path.Combine(baseDir, "dictionaries", "JMdict_e.cache"));
            glosser     = new AutoGlosser(lang, jmdict);
        }
Ejemplo n.º 3
0
 public AutoGlosser(ILanguageService lang, JMDict dict)
 {
     this.lang = lang;
     this.dict = dict;
 }
 public VerbConjugationDataSource(JMDict jdict)
 {
     this.jdict = jdict;
 }
Ejemplo n.º 5
0
 public JMDictDataSource(JMDict jdict)
 {
     this.jdict = jdict;
 }
Ejemplo n.º 6
0
 public AutoGlosserDataSource(ILanguageService lang, JMDict jdict)
 {
     this.autoglosser = new AutoGlosser(lang, jdict);
 }
Ejemplo n.º 7
0
 public PartialWordLookupJMDictDataSource(JMDict jmdict, FrequencyList list)
 {
     this.jmdict = jmdict;
     this.list   = list;
 }