Beispiel #1
0
        internal static UnicodeBlock Of(char c)
        {
            UnicodeBlock block = new UnicodeBlock();

            if (c >= '\u4E00' && c <= '\u9FFF')
            {
                block._block = BlockType.CJKUnifiedIdeographs;
            }
            else if (c >= '\u3040' && c <= '\u309F')
            {
                block._block = BlockType.Hiragana;
            }
            else if (c >= '\u30A0' && c <= '\u30FF')
            {
                block._block = BlockType.Katakana;
            }
            else if (c >= '\uFF00' && c <= '\uFFEF')
            {
                block._block = BlockType.HalfKana;
            }
            else
            {
                block._block = BlockType.Unknown;
            }
            return(block);
        }
Beispiel #2
0
 public void TestUnicodeBlockForName()
 {
     String[] names = { "Latin-1 Supplement",
                        "Optical Character Recognition",
                        "CJK Unified Ideographs Extension A",
                        "Supplemental Arrows-B",
                        "Supplemental arrows b",
                        "supp-lement-al arrowsb",
                        "Supplementary Private Use Area-B",
                        "supplementary_Private_Use_Area-b",
                        "supplementary_PRIVATE_Use_Area_b" };
     for (int i = 0; i < names.Length; ++i)
     {
         try
         {
             UnicodeBlock b = UnicodeBlock
                              .ForName(names[i]);
             Logln("found: " + b + " for name: " + names[i]);
         }
         catch (Exception e)
         {
             Errln("could not find block for name: " + names[i]);
             break;
         }
     }
 }
Beispiel #3
0
 public void TestForName()
 {
     Assert.AreEqual(UnicodeBlock.BASIC_LATIN, UnicodeBlock.ForName("Basic Latin"));
     Assert.AreEqual(UnicodeBlock.HANGUL_SYLLABLES, UnicodeBlock.ForName("HANGUL_SYLLABLES"));
     try
     {
         UnicodeBlock.ForName("Basic___ Latin");
         Assert.Fail("exception should be thrown");
     }
     catch (ArgumentException e)
     {
         Assert.AreEqual("invalid block name", e.Message);
     }
 }
Beispiel #4
0
 public void TestOfMethod()
 {
     Assert.AreEqual(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, UnicodeBlock.Of('中'));
     Assert.AreEqual(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, UnicodeBlock.Of('語'));
     Assert.AreEqual(UnicodeBlock.HANGUL_SYLLABLES, UnicodeBlock.Of('한'));
     Assert.AreEqual(UnicodeBlock.BASIC_LATIN, UnicodeBlock.Of('E'));
     Assert.AreEqual(UnicodeBlock.BASIC_LATIN, UnicodeBlock.Of('a'));
     Assert.AreEqual(UnicodeBlock.BASIC_LATIN, UnicodeBlock.Of('$'));
     Assert.AreEqual(UnicodeBlock.BASIC_LATIN, UnicodeBlock.Of('-'));
     Assert.AreEqual(UnicodeBlock.LATIN_1_SUPPLEMENT, UnicodeBlock.Of('ç'));
     Assert.AreEqual(UnicodeBlock.LATIN_1_SUPPLEMENT, UnicodeBlock.Of('ñ'));
     Assert.AreEqual(UnicodeBlock.GREEK, UnicodeBlock.Of('ω'));
     Assert.AreEqual(UnicodeBlock.CYRILLIC, UnicodeBlock.Of('Ѧ'));
 }
        /// <summary>
        /// 文を単語ごとに分割します。
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        string[] Split(string text)
        {
            if (text.Length == 0)
            {
                return(new string[0]);
            }

            if (config.UseMecab)
            {
                return(_tagger.Parse(text).Replace("\r", "").Replace("\n", "").Split(' ').Where(s => !string.IsNullOrEmpty(s) && !string.IsNullOrWhiteSpace(s)).ToArray());
            }

            var list = new List <string>();
            var buf  = "";

            for (var i = 0; i < text.Length; i++)
            {
                UnicodeBlock block = text[i].GetBlock();
                if (Divider.Contains(text[i].ToString()) ||
                    char.IsSeparator(text[i]) ||
                    char.IsSymbol(text[i]))
                {
                    if (!string.IsNullOrEmpty(buf) && !string.IsNullOrWhiteSpace(buf))
                    {
                        list.Add(buf);
                    }
                    list.Add(text[i].ToString());
                    buf = "";
                    continue;
                }

                if (block != prevBlock)
                {
                    if (!string.IsNullOrEmpty(buf) && !string.IsNullOrWhiteSpace(buf))
                    {
                        list.Add(buf);
                    }
                    buf = "";
                }

                buf      += text[i];
                prevBlock = block;
            }
            if (!string.IsNullOrEmpty(buf))
            {
                list.Add(buf);
            }
            return(list.ToArray());
        }
Beispiel #6
0
        /// <summary>
        /// Builds a NFA from a unicode block
        /// </summary>
        /// <param name="node">An AST node representing a NFA</param>
        /// <returns>The equivalent NFA</returns>
        private NFA BuildNFAFromUnicodeBlock(ASTNode node)
        {
            // extract the value
            string       value = node.Value.Substring(3, node.Value.Length - 4);
            UnicodeBlock block = UnicodeBlocks.GetBlock(value);

            if (block == null)
            {
                OnError(node.Position, "Unknown unicode block {0}", value);
                return(BuildEpsilonNFA());
            }
            // build the result
            NFA automata = NFA.NewMinimal();

            AddUnicodeSpanToNFA(automata, block.Span);
            return(automata);
        }
        private void blockSelection_SelectedIndexChanged(object sender, EventArgs e)
        {
            charView.Items.Clear();
            UnicodeBlock    newB        = UnicodeInfo.GetBlocks()[blockSelection.SelectedIndex];
            int             i           = newB.CodePointRange.FirstCodePoint;
            int             range_upper = newB.CodePointRange.LastCodePoint;
            UnicodeCharInfo uc          = new UnicodeCharInfo();
            int             itemind     = 0;
            string          name        = "RESERVED CODEPOINT";

            while (i <= range_upper)
            {
                uc = UnicodeInfo.GetCharInfo(i);
                charView.Items.Add(UnicodeInfo.GetDisplayText(uc));
                if (uc.Name != null)
                {
                    name = uc.Name;
                }
                charView.Items[itemind].ToolTipText = name + "\r\nCode point: U+" + BitConverter.ToString(BitConverter.GetBytes(uc.CodePoint).Reverse().ToArray()).Replace("-", "");
                i++;
                itemind++;
            }
        }
Beispiel #8
0
 internal ScriptData(UnicodeBlock flag, int @base)
 {
     this.flag  = flag;
     this.@base = @base;
 }
Beispiel #9
0
        //public UnicodeInfo ToUnicodeData()
        //{
        //	var finalUnicodeData = new UnicodeCharacterData[ucdEntryCount];

        //	for (int i = 0; i < finalUnicodeData.Length; ++i)
        //		finalUnicodeData[i] = ucdEntries[i].ToCharacterData();

        //	var finalUnihanData = new UnihanCharacterData[unihanEntryCount];

        //	for (int i = 0; i < finalUnihanData.Length; ++i)
        //		finalUnihanData[i] = unihanEntries[i].ToCharacterData();

        //	return new UnicodeInfo(unicodeVersion, finalUnicodeData, finalUnihanData, blockEntries.ToArray());
        //}

        private void WriteUnicodeBlockToFile(BinaryWriter writer, UnicodeBlock block)
        {
            writer.WriteCodePoint(block.CodePointRange.FirstCodePoint);
            writer.WriteCodePoint(block.CodePointRange.LastCodePoint);
            writer.Write(block.Name);
        }
Beispiel #10
0
 public void AddBlockEntry(UnicodeBlock block)
 {
     blockEntries.Add(block);
 }
 internal ScriptData(UnicodeBlock flag, int @base)
 {
     this.flag = flag;
     this.@base = @base;
 }
Beispiel #12
0
 internal bool Equals(UnicodeBlock block)
 {
     return(_block == block._block);
 }