예제 #1
0
    public void OnValueChange()
    {
        if (field != null)
        {
            string inStr = field.text;

            if (inStr.Length != 0)
            {
                System.Text.StringBuilder retStr = new System.Text.StringBuilder();
                System.Globalization.TextElementEnumerator tee =
                    System.Globalization.StringInfo.GetTextElementEnumerator(inStr);
                tee.Reset();

                while (tee.MoveNext())
                {
                    // 1文字取得
                    var te = tee.GetTextElement();
                    // 1文字が2つ以上のcharからなる場合は、サロゲートペアと判断
                    if (1 < te.Length)
                    {
                        // 文字列から除去
                    }
                    else
                    {
                        retStr = retStr.Append(te);
                    }
                }
                // InputFieldに返す
                field.text = retStr.ToString();
            }
        }
    }
예제 #2
0
        /// <summary>
        /// Creates a new Tokenizer object that will read from the given input.
        /// </summary>
        /// <param name="input">Where to read input from.</param>
        /// <param name="name">The name of the input, used for debugging.</param>
        /// <exception cref="System.ArgumentNullException">If input is null.</exception>
        public Tokenizer(TextElementEnumerator input, string name)
        {
            if (input == null)
                throw new ArgumentNullException("reader");

            this.peek = new Stack<Token>();
            this.input = input;
            this.Name = name;
            this.Position = 1;
            this.Line = 1;
            input.MoveNext(); // start the enumerator
        }
예제 #3
0
파일: Strings.cs 프로젝트: wzchua/docs
        static void Main()
        {
            // Create a file that contains the Greek work ψυχή (psyche) when interpreted by using
            // code page 737 ((DOS) Greek). You can also create the file by using Character Map
            // to paste the characters into Microsoft Word and then "Save As" by using the DOS
            // (Greek) encoding. (Word will actually create a six-byte file by appending "\r\n" at the end.)
            System.IO.File.WriteAllBytes(@"greek.txt", new byte[] { 0xAF, 0xAC, 0xAE, 0x9E });

            // Specify the code page to correctly interpret byte values
            Encoding encoding = Encoding.GetEncoding(737); //(DOS) Greek code page

            byte[] codePageValues = System.IO.File.ReadAllBytes(@"greek.txt");

            // Same content is now encoded as UTF-16
            string unicodeValues = encoding.GetString(codePageValues);

            // Show that the text content is still intact in Unicode string
            // (Add a reference to System.Windows.Forms.dll)
            System.Windows.Forms.MessageBox.Show(unicodeValues);

            // Same content "ψυχή" is stored as UTF-8
            System.IO.File.WriteAllText(@"greek_unicode.txt", unicodeValues);

            // Conversion is complete. Show the bytes to prove the conversion.
            Console.WriteLine("8-bit encoding byte values:");
            foreach (byte b in codePageValues)
            {
                Console.Write("{0:X}-", b);
            }

            Console.WriteLine();
            Console.WriteLine("Unicode values:");
            string unicodeString = System.IO.File.ReadAllText("greek_unicode.txt");

            System.Globalization.TextElementEnumerator enumerator =
                System.Globalization.StringInfo.GetTextElementEnumerator(unicodeString);
            while (enumerator.MoveNext())
            {
                string s = enumerator.GetTextElement();
                int    i = Char.ConvertToUtf32(s, 0);
                Console.Write("{0:X}-", i);
            }
            Console.WriteLine();

            // Keep the console window open in debug mode.
            Console.Write("Press any key to exit.");
            Console.ReadKey();
        }
예제 #4
0
        /// <summary>Returns the indexes of each base character, high surrogate, or control character within the specified string.</summary>
        /// <returns>An array of integers that contains the zero-based indexes of each base character, high surrogate, or control character within the specified string.</returns>
        /// <param name="str">The string to search. </param>
        /// <exception cref="T:System.ArgumentNullException">
        ///   <paramref name="str" /> is null. </exception>
        public static int[] ParseCombiningCharacters(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("string is null");
            }
            ArrayList             arrayList             = new ArrayList(str.Length);
            TextElementEnumerator textElementEnumerator = StringInfo.GetTextElementEnumerator(str);

            textElementEnumerator.Reset();
            while (textElementEnumerator.MoveNext())
            {
                arrayList.Add(textElementEnumerator.ElementIndex);
            }
            return((int[])arrayList.ToArray(typeof(int)));
        }
예제 #5
0
 //     INITIALIZATION
 //_________________________________________________________________________________________
 /// <summary>
 /// Creates a new string iterator.
 /// </summary>
 /// <param name="prototype"> The next object in the prototype chain. </param>
 /// <param name="iteratedString"> The string to iterate over. </param>
 internal StringIterator(ObjectInstance prototype, string iteratedString)
     : base(prototype)
 {
     this.enumerator = StringInfo.GetTextElementEnumerator(iteratedString);
 }
예제 #6
0
    public static void Main()
    {
        // <Snippet6>
        // First sentence of The Mystery of the Yellow Room, by Leroux.
        string opening = "Ce n'est pas sans une certaine émotion que " +
                         "je commence à raconter ici les aventures " +
                         "extraordinaires de Joseph Rouletabille.";
        // Character counters.
        int nChars = 0;
        // Objects to store word count.
        List <int> chars    = new List <int>();
        List <int> elements = new List <int>();

        foreach (var ch in opening)
        {
            // Skip the ' character.
            if (ch == '\u0027')
            {
                continue;
            }

            if (Char.IsWhiteSpace(ch) | (Char.IsPunctuation(ch)))
            {
                chars.Add(nChars);
                nChars = 0;
            }
            else
            {
                nChars++;
            }
        }

        System.Globalization.TextElementEnumerator te =
            System.Globalization.StringInfo.GetTextElementEnumerator(opening);
        while (te.MoveNext())
        {
            string s = te.GetTextElement();
            // Skip the ' character.
            if (s == "\u0027")
            {
                continue;
            }
            if (String.IsNullOrEmpty(s.Trim()) | (s.Length == 1 && Char.IsPunctuation(Convert.ToChar(s))))
            {
                elements.Add(nChars);
                nChars = 0;
            }
            else
            {
                nChars++;
            }
        }

        // Display character counts.
        Console.WriteLine("{0,6} {1,20} {2,20}",
                          "Word #", "Char Objects", "Characters");
        for (int ctr = 0; ctr < chars.Count; ctr++)
        {
            Console.WriteLine("{0,6} {1,20} {2,20}",
                              ctr, chars[ctr], elements[ctr]);
        }
        // The example displays the following output:
        //       Word #         Char Objects           Characters
        //            0                    2                    2
        //            1                    4                    4
        //            2                    3                    3
        //            3                    4                    4
        //            4                    3                    3
        //            5                    8                    8
        //            6                    8                    7
        //            7                    3                    3
        //            8                    2                    2
        //            9                    8                    8
        //           10                    2                    1
        //           11                    8                    8
        //           12                    3                    3
        //           13                    3                    3
        //           14                    9                    9
        //           15                   15                   15
        //           16                    2                    2
        //           17                    6                    6
        //           18                   12                   12
        // </Snippet6>
    }
    public static String ToEncode(IntPtr code, IntPtr nNormalizeForm)
    {
        String text = "";

        try
        {
            if ((Int32)(dynamic)Hm.Macro.Var["selecting"] == 1)
            {
                text = Hm.Edit.SelectedText;
            }
            else
            {
                text = Hm.Edit.TotalText;
            }

            int lineCnt = text.Count(c => c == '\n');
            if (lineCnt < 2)
            {
                text += "\n\n";
            }
        }
        catch (Exception ex0)
        {
            return(ex0.GetType() + ":" + ex0.Message + "\n元のファイルにバイナリが混在していないかを確認してください。");
        }


        String normalize_text = "";

        try
        {
            if (nNormalizeForm.ToInt32() == 2)
            {
                normalize_text = text.Normalize();
            }
            else if (nNormalizeForm.ToInt32() == 1)
            {
                StringBuilder sb = new StringBuilder(500 * 1024); // 500k程度をデフォルトとして確保

                //TextElementEnumeratorを作成する
                System.Globalization.TextElementEnumerator tee = System.Globalization.StringInfo.GetTextElementEnumerator(text);
                //読み取る位置をテキストの先頭にする
                tee.Reset();
                //1文字ずつ取得する
                while (tee.MoveNext())
                {
                    //1文字取得する
                    string te = tee.GetTextElement();
                    //1文字が2つ以上のCharから成る場合は、サロゲートペアか結合文字列と判断する
                    if (te.Length > 1)
                    {
                        // ノーマライズして足す
                        sb.Append(te.Normalize());

                        /*
                         * //サロゲートペアか調べる
                         * if (te.Length == 2 && char.IsSurrogatePair(te, 0))
                         * {
                         *  Console.WriteLine("サロゲートペア「{0}」が「{1}」の位置にあります。",
                         *      te, tee.ElementIndex);
                         * }
                         * else
                         * {
                         *  //サロゲートペアでない場合は結合文字列と判断する
                         *  Console.WriteLine("結合文字列「{0}」が「{1}」の位置にあります。",
                         *      te, tee.ElementIndex);
                         * }
                         */
                    }
                    else
                    {
                        // 普通に足す
                        sb.Append(te);
                    }
                }

                normalize_text = sb.ToString();
            }
            else
            {
                normalize_text = text;
            }
        }
        catch (Exception ex1)
        {
            return(ex1.GetType() + ":" + ex1.Message + "\n元のファイルにバイナリが混在していないかを確認してください。");
        }

        try
        {
            // 独自に実装したフォールバックを指定してEncodingを取得
            var encode = Encoding.GetEncoding(code.ToInt32(), new HmEncoderScalarValueFallback(), DecoderFallback.ReplacementFallback);

            var bytes = encode.GetBytes(normalize_text);

            // 再び文字列に戻して
            return(encode.GetString(bytes));
        }
        catch (Exception ex2)
        {
            return(ex2.GetType() + ":" + ex2.Message + "\n指定の「#ToEncodeCodePage」の値等が正しいか、よく確認してください。");
        }
    }