Ejemplo n.º 1
0
        public virtual UnknownDictionaryWriter ReadDictionaryFile(string filename, string encoding)
        {
            UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);

            List <string[]> lines   = new List <string[]>();
            Encoding        decoder = Encoding.GetEncoding(encoding);

            using (Stream inputStream = new FileStream(filename, FileMode.Open, FileAccess.Read))
                using (TextReader reader = new StreamReader(inputStream, decoder))
                {
                    dictionary.Put(CSVUtil.Parse(NGRAM_DICTIONARY_ENTRY));


                    string line = null;
                    while ((line = reader.ReadLine()) != null)
                    {
                        // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
                        // even though the unknown dictionary returns hardcoded null here.
                        string[] parsed = CSVUtil.Parse(line + ",*,*"); // Probably we don't need to validate entry
                        lines.Add(parsed);
                    }
                }

            lines.Sort(new ComparerAnonymousHelper());

            foreach (string[] entry in lines)
            {
                dictionary.Put(entry);
            }

            return(dictionary);
        }
Ejemplo n.º 2
0
        public void TestPut()
        {
            UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);

            try
            {
                unkDic.Put(CSVUtil.Parse("KANJI,1285,11426,名詞,一般,*,*,*,*,*,*,*"));
                fail();
            }
#pragma warning disable 168
            catch (Exception e)
#pragma warning restore 168
            {
            }

            String entry1 = "ALPHA,1285,1285,13398,名詞,一般,*,*,*,*,*,*,*";
            String entry2 = "HIRAGANA,1285,1285,13069,名詞,一般,*,*,*,*,*,*,*";
            String entry3 = "KANJI,1285,1285,11426,名詞,一般,*,*,*,*,*,*,*";

            unkDic.PutCharacterCategory(0, "ALPHA");
            unkDic.PutCharacterCategory(1, "HIRAGANA");
            unkDic.PutCharacterCategory(2, "KANJI");

            unkDic.Put(CSVUtil.Parse(entry1));
            unkDic.Put(CSVUtil.Parse(entry2));
            unkDic.Put(CSVUtil.Parse(entry3));
        }