예제 #1
0
            public int Compare(string[] left, string[] right)
            {
                int leftId  = CharacterDefinition.LookupCharacterClass(left[0]);
                int rightId = CharacterDefinition.LookupCharacterClass(right[0]);

                return(leftId - rightId);
            }
        public void PutInvokeDefinition(string characterClassName, int invoke, int group, int length)
        {
            byte characterClass = CharacterDefinition.LookupCharacterClass(characterClassName);

            invokeMap[characterClass] = invoke == 1;
            groupMap[characterClass]  = group == 1;
            // TODO: length def ignored
        }
예제 #3
0
#pragma warning disable IDE0060 // Remove unused parameter
        public void PutInvokeDefinition(string characterClassName, int invoke, int group, int length)
#pragma warning restore IDE0060 // Remove unused parameter
        {
            byte characterClass = CharacterDefinition.LookupCharacterClass(characterClassName);

            invokeMap[characterClass] = invoke == 1;
            groupMap[characterClass]  = group == 1;
            // TODO: length def ignored
        }
        /// <summary>
        /// Put mapping from unicode code point to character class.
        /// </summary>
        /// <param name="codePoint">Code point.</param>
        /// <param name="characterClassName">Character class name.</param>
        public void PutCharacterCategory(int codePoint, string characterClassName)
        {
            characterClassName = characterClassName.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)[0]; // use first
                                                                                                                         // category
                                                                                                                         // class

            // Override Nakaguro
            if (codePoint == 0x30FB)
            {
                characterClassName = "SYMBOL";
            }
            characterCategoryMap[codePoint] = CharacterDefinition.LookupCharacterClass(characterClassName);
        }
        /// <summary>
        /// Put mapping from unicode code point to character class.
        /// </summary>
        /// <param name="codePoint">Code point.</param>
        /// <param name="characterClassName">Character class name.</param>
        public void PutCharacterCategory(int codePoint, string characterClassName)
        {
            characterClassName = characterClassName.Split(' ')[0]; // use first
                                                                   // category
                                                                   // class

            // Override Nakaguro
            if (codePoint == 0x30FB)
            {
                characterClassName = "SYMBOL";
            }
            characterCategoryMap[codePoint] = CharacterDefinition.LookupCharacterClass(characterClassName);
        }
예제 #6
0
        public override int Put(string[] entry)
        {
            // Get wordId of current entry
            int wordId = m_buffer.Position;

            // Put entry
            int result = base.Put(entry);

            // Put entry in targetMap
            int characterId = CharacterDefinition.LookupCharacterClass(entry[0]);

            AddMapping(characterId, wordId);
            return(result);
        }
예제 #7
0
        public virtual UnknownDictionaryWriter ReadDictionaryFile(string filename, string encoding)
        {
            UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);

            List <string[]> lines   = new List <string[]>();
            Encoding        decoder = Encoding.GetEncoding(encoding);

            using (Stream inputStream = new FileStream(filename, FileMode.Open, FileAccess.Read))
                using (TextReader reader = new StreamReader(inputStream, decoder))
                {
                    dictionary.Put(CSVUtil.Parse(NGRAM_DICTIONARY_ENTRY));


                    string line = null;
                    while ((line = reader.ReadLine()) != null)
                    {
                        // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
                        // even though the unknown dictionary returns hardcoded null here.
                        string[] parsed = CSVUtil.Parse(line + ",*,*"); // Probably we don't need to validate entry
                        lines.Add(parsed);
                    }
                }

            lines.Sort(Comparer <string[]> .Create((left, right) =>
            {
                int leftId  = CharacterDefinition.LookupCharacterClass(left[0]);
                int rightId = CharacterDefinition.LookupCharacterClass(right[0]);
                return(leftId - rightId);
            }));

            foreach (string[] entry in lines)
            {
                dictionary.Put(entry);
            }

            return(dictionary);
        }