Ejemplo n.º 1
0
 /// <summary>
 /// Разбить на слова.
 /// </summary>
 /// <param name="src">Исходная строка.</param>
 /// <returns>Разбитая на слова строка.</returns>
 public IEnumerable<string> Split(string src)
 {
     var ws = new Windows.Data.Text.WordsSegmenter("ru-RU");
     var tokens = ws.GetTokens(src ?? "");
     foreach (var token in tokens)
     {
         yield return token.Text;
     }
 }
        /// <summary>
        /// This is the click handler for the 'Word Segments' button.
        ///
        /// When this button is activated, the Text Segmentation API will calculate
        /// the word segment from the given input string and character index for that string, 
        /// and return the WordSegment object that contains the index within its text bounds.
        /// Segment breaking behavior is based off of the language-tag input, which defines 
        /// which language rules to use.
        ///
        /// </summary>
        /// <param name="sender">The object that raised the event.</param>
        /// <param name="e">Event data that describes the click action on the button.</param>
        private void WordSegmentButton_Click(object sender, RoutedEventArgs e)
        {
            Button b = sender as Button;
            if (b != null)
            {
                // Initialize and obtain input values
                StringBuilder notifyText = new StringBuilder();

                // Obtain the input string value, check for non-emptiness
                String inputStringText = inputStringBox.Text;
                if (String.IsNullOrEmpty(inputStringText))
                {
                    notifyText = new StringBuilder("Cannot compute word segments: input string is empty.");
                    rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                    return;
                }

                // Obtain the language tag value, check for non-emptiness
                // Ex. Valid Values:
                //     "en-US" (English (United States))
                //     "fr-FR" (French (France))
                //     "de-DE" (German (Germany))
                //     "ja-JP" (Japanese (Japan))
                //     "ar-SA" (Arabic (Saudi Arabia))
                //     "zh-CN" (China (PRC))
                String languageTagText = languageTagBox.Text;
                if (String.IsNullOrEmpty(languageTagText))
                {
                    notifyText.AppendLine("Language tag input is empty ... using generic-language segmentation rules.");
                    languageTagText = "und";    // This is used for non language-specific locales. 'und' is short for 'undetermined'.
                }
                else
                {
                    if (!Windows.Globalization.Language.IsWellFormed(languageTagText))
                    {
                        notifyText = new StringBuilder("Language tag is not well formed: \"" + languageTagText + "\"");
                        rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                        return;
                    }
                }

                // Obtain the input Index
                String inputIndexString = indexBox.Text;
                uint inputIndex = 0;
                if (String.IsNullOrEmpty(inputIndexString))
                {
                    notifyText.AppendLine("No input index provided ... using first segment reference (index = 0) as default.");
                }
                else
                {
                    try
                    {
                        inputIndex = Convert.ToUInt32(indexBox.Text);
                    }
                    catch (FormatException)
                    {
                        notifyText = new StringBuilder("Invalid index supplied.\n\nPlease check that this value is valid, and non-negative.");
                        rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                        return;
                    }
                    catch (OverflowException)
                    {
                        notifyText = new StringBuilder("Invalid index supplied: Negative-valued index.\n\nPlease check that this value is valid, and non-negative.");
                        rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                        return;
                    }

                    if ((inputIndex < 0) || (inputIndex >= inputStringText.Length))
                    {
                        notifyText = new StringBuilder("Invalid index supplied ... cannot use a negative index, or an index that is out of bounds of the input string.\n\nPlease re-check the index value, and try again.");
                        rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                        return;
                    }
                }

                // Notify that we are going to calculate word segment
                notifyText.AppendLine("\nFinding the word segment for the given index ...\n");
                notifyText.AppendLine("Input: \"" + inputStringText + "\"");
                notifyText.AppendLine("Language Tag: \"" + languageTagText + "\"");
                notifyText.AppendLine("Index: " + inputIndex + "\n");

                // Construct the WordsSegmenter instance
                var segmenter = new Windows.Data.Text.WordsSegmenter(languageTagText);

                // Obtain the token segment
                var tokenSegment = segmenter.GetTokenAt(inputStringText, inputIndex);
                notifyText.AppendLine("Indexed segment: \"" + tokenSegment.Text + "\"");

                // Set output box text to the contents of the StringBuilder instance
                rootPage.NotifyUser(notifyText.ToString(), NotifyType.StatusMessage);
            }
        }
        /// <summary>
        /// This is the click handler for the 'Word Segments' button.
        ///
        /// When this button is activated, the Text Segmentation API will calculate
        /// the word segment from the given input string and character index for that string,
        /// and return the WordSegment object that contains the index within its text bounds.
        /// Segment breaking behavior is based off of the language-tag input, which defines
        /// which language rules to use.
        ///
        /// </summary>
        /// <param name="sender">The object that raised the event.</param>
        /// <param name="e">Event data that describes the click action on the button.</param>
        private void WordSegmentButton_Click(object sender, RoutedEventArgs e)
        {
            // Initialize and obtain input values
            StringBuilder notifyText = new StringBuilder();

            // Obtain the input string value, check for non-emptiness
            String inputStringText = inputStringBox.Text;

            if (String.IsNullOrEmpty(inputStringText))
            {
                notifyText = new StringBuilder("Cannot compute word segments: input string is empty.");
                rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                return;
            }

            // Obtain the language tag value, check for non-emptiness
            // Ex. Valid Values:
            //     "en-US" (English (United States))
            //     "fr-FR" (French (France))
            //     "de-DE" (German (Germany))
            //     "ja-JP" (Japanese (Japan))
            //     "ar-SA" (Arabic (Saudi Arabia))
            //     "zh-CN" (China (PRC))
            String languageTagText = languageTagBox.Text;

            if (String.IsNullOrEmpty(languageTagText))
            {
                notifyText.AppendLine("Language tag input is empty ... using generic-language segmentation rules.");
                languageTagText = "und";    // This is used for non language-specific locales. 'und' is short for 'undetermined'.
            }
            else
            {
                if (!Windows.Globalization.Language.IsWellFormed(languageTagText))
                {
                    notifyText = new StringBuilder("Language tag is not well formed: \"" + languageTagText + "\"");
                    rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                    return;
                }
            }

            // Obtain the input Index
            String inputIndexString = indexBox.Text;
            uint   inputIndex       = 0;

            if (String.IsNullOrEmpty(inputIndexString))
            {
                notifyText.AppendLine("No input index provided ... using first segment reference (index = 0) as default.");
            }
            else
            {
                try
                {
                    inputIndex = Convert.ToUInt32(indexBox.Text);
                }
                catch (FormatException)
                {
                    notifyText = new StringBuilder("Invalid index supplied.\n\nPlease check that this value is valid, and non-negative.");
                    rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                    return;
                }
                catch (OverflowException)
                {
                    notifyText = new StringBuilder("Invalid index supplied: Negative-valued index.\n\nPlease check that this value is valid, and non-negative.");
                    rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                    return;
                }

                if ((inputIndex < 0) || (inputIndex >= inputStringText.Length))
                {
                    notifyText = new StringBuilder("Invalid index supplied ... cannot use a negative index, or an index that is out of bounds of the input string.\n\nPlease re-check the index value, and try again.");
                    rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                    return;
                }
            }

            // Notify that we are going to calculate word segment
            notifyText.AppendLine("\nFinding the word segment for the given index ...\n");
            notifyText.AppendLine("Input: \"" + inputStringText + "\"");
            notifyText.AppendLine("Language Tag: \"" + languageTagText + "\"");
            notifyText.AppendLine("Index: " + inputIndex + "\n");

            // Construct the WordsSegmenter instance
            var segmenter = new Windows.Data.Text.WordsSegmenter(languageTagText);

            // Obtain the token segment
            var tokenSegment = segmenter.GetTokenAt(inputStringText, inputIndex);

            notifyText.AppendLine("Indexed segment: \"" + tokenSegment.Text + "\"");

            // Set output box text to the contents of the StringBuilder instance
            rootPage.NotifyUser(notifyText.ToString(), NotifyType.StatusMessage);
        }
        /// <summary>
        /// This is the click handler for the 'Word Segments' button.
        ///
        /// When this button is activated, the Text Segmentation API will calculate
        /// the word segments from the given input string and language tag, and then
        /// print out the word and alternate-form segments for that language.
        ///
        /// </summary>
        /// <param name="sender">The object that raised the event.</param>
        /// <param name="e">Event data that describes the click action on the button.</param>
        private void WordSegmentsButton_Click(object sender, RoutedEventArgs e)
        {
            // Initialize and obtain input values
            StringBuilder notifyText = new StringBuilder();

            // Obtain the input string value, check for non-emptiness
            String inputStringText = inputStringBox.Text;
            if (String.IsNullOrEmpty(inputStringText))
            {
                notifyText = new StringBuilder("Cannot compute word segments: input string is empty.");
                rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                return;
            }

            // Obtain the language tag value, check for non-emptiness
            // Ex. Valid Values:
            //     "en-US" (English (United States))
            //     "fr-FR" (French (France))
            //     "de-DE" (German (Germany))
            //     "ja-JP" (Japanese (Japan))
            //     "ar-SA" (Arabic (Saudi Arabia))
            //     "zh-CN" (China (PRC))
            String languageTagText = languageTagBox.Text;
            if (String.IsNullOrEmpty(languageTagText))
            {
                notifyText.AppendLine("Language tag input is empty ... using generic-language segmentation rules.");
                languageTagText = "und";    // This is used for non language-specific locales. 'und' is short for 'undetermined'.
            }
            else
            {
                if (!Windows.Globalization.Language.IsWellFormed(languageTagText))
                {
                    notifyText = new StringBuilder("Language tag is not well formed: \"" + languageTagText + "\"");
                    rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                    return;
                }
            }

            // Notify that we are going to calculate word segments
            notifyText.AppendLine("\nCalculating word segments ...\n");
            notifyText.AppendLine("Input: \"" + inputStringText + "\"");
            notifyText.AppendLine("Language Tag: \"" + languageTagText + "\"");

            // Construct the WordsSegmenter instance
            var segmenter = new Windows.Data.Text.WordsSegmenter(languageTagText);
            var wordSegments = segmenter.GetTokens(inputStringText);

            // Iterate over the tokenized segments
            foreach (var wordSegment in wordSegments)
            {
                notifyText.AppendLine("\tWord---->\"" + wordSegment.Text + "\"");
                foreach (var alternateForm in wordSegment.AlternateForms)
                {
                    notifyText.AppendLine("\t\tAlternate---->\"" + alternateForm.AlternateText + "\"");
                }
            }

            // send notifyText to the output pane
            rootPage.NotifyUser(notifyText.ToString(), NotifyType.StatusMessage);
        }
        /// <summary>
        /// This is the click handler for the 'Word Segments' button.
        ///
        /// When this button is activated, the Text Segmentation API will calculate
        /// the word segments from the given input string and language tag, and then
        /// print out the word and alternate-form segments for that language.
        ///
        /// </summary>
        /// <param name="sender">The object that raised the event.</param>
        /// <param name="e">Event data that describes the click action on the button.</param>
        private void WordSegmentsButton_Click(object sender, RoutedEventArgs e)
        {
            // Initialize and obtain input values
            StringBuilder notifyText = new StringBuilder();

            // Obtain the input string value, check for non-emptiness
            String inputStringText = inputStringBox.Text;

            if (String.IsNullOrEmpty(inputStringText))
            {
                notifyText = new StringBuilder("Cannot compute word segments: input string is empty.");
                rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                return;
            }

            // Obtain the language tag value, check for non-emptiness
            // Ex. Valid Values:
            //     "en-US" (English (United States))
            //     "fr-FR" (French (France))
            //     "de-DE" (German (Germany))
            //     "ja-JP" (Japanese (Japan))
            //     "ar-SA" (Arabic (Saudi Arabia))
            //     "zh-CN" (China (PRC))
            String languageTagText = languageTagBox.Text;

            if (String.IsNullOrEmpty(languageTagText))
            {
                notifyText.AppendLine("Language tag input is empty ... using generic-language segmentation rules.");
                languageTagText = "und";    // This is used for non language-specific locales. 'und' is short for 'undetermined'.
            }
            else
            {
                if (!Windows.Globalization.Language.IsWellFormed(languageTagText))
                {
                    notifyText = new StringBuilder("Language tag is not well formed: \"" + languageTagText + "\"");
                    rootPage.NotifyUser(notifyText.ToString(), NotifyType.ErrorMessage);
                    return;
                }
            }

            // Notify that we are going to calculate word segments
            notifyText.AppendLine("\nCalculating word segments ...\n");
            notifyText.AppendLine("Input: \"" + inputStringText + "\"");
            notifyText.AppendLine("Language Tag: \"" + languageTagText + "\"");

            // Construct the WordsSegmenter instance
            var segmenter    = new Windows.Data.Text.WordsSegmenter(languageTagText);
            var wordSegments = segmenter.GetTokens(inputStringText);

            // Iterate over the tokenized segments
            foreach (var wordSegment in wordSegments)
            {
                notifyText.AppendLine("\tWord---->\"" + wordSegment.Text + "\"");
                foreach (var alternateForm in wordSegment.AlternateForms)
                {
                    notifyText.AppendLine("\t\tAlternate---->\"" + alternateForm.AlternateText + "\"");
                }
            }

            // send notifyText to the output pane
            rootPage.NotifyUser(notifyText.ToString(), NotifyType.StatusMessage);
        }