コード例 #1
0
        /// <summary> Replaces all known synonyms of action pattern key words with their base form. </summary>
        /// <param name="buddyText">Buddy text to process</param>
        /// <returns>Unambigiuous <paramref name="buddyText"/></returns>
        public virtual string ResolveAmbiguity(string buddyText)
        {
            SynMapRegistry synMapRegistry = new SynMapRegistry();

            // TODO Hack entfernen
            buddyText = buddyText.Replace("nicht sichtbar", "verschwunden");

            string unambigiuousBuddyText = buddyText;

            for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();)
            {
                WordIterator.Word word = itr.Current;
                if (word == null)
                {
                    break;
                }

                string wordText = word.Text;

                // Check for synonyms
                if (!synMapRegistry.TryGetRootWord(wordText, out string rootWord))
                {
                    continue;
                }
                word.Replace(rootWord);

                // Update return value
                unambigiuousBuddyText = itr.GetStringData();
            }

            return(unambigiuousBuddyText);
        }
コード例 #2
0
        /// <summary> Strips standard prepositions from the given <paramref name="buddyText"/>. </summary>
        /// <param name="buddyText">Buddy text to process</param>
        /// <returns><paramref name="buddyText"/> without articles</returns>
        public virtual string StripPrepositions(string buddyText)
        {
            // Prepositions to remove
            HashSet <string> prepositions = new HashSet <string>(
                new[] {
                "in", "im", "aus", "ein", "ob", "bis", "auf", "zu", "unter",
                "da", "wo"
            },
                StringComparer.InvariantCultureIgnoreCase
                );

            string strippedBuddyText = buddyText;

            for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();)
            {
                WordIterator.Word word = itr.Current;
                if (word == null)
                {
                    break;
                }

                // Check if the word is an preposition
                if (!prepositions.Contains(word.Text))
                {
                    continue;
                }
                word.Remove();

                // Update return value
                strippedBuddyText = itr.GetStringData();
            }

            return(strippedBuddyText);
        }
コード例 #3
0
        /// <summary> Strips standard auxiliary verbs from the given <paramref name="buddyText"/>. </summary>
        /// <param name="buddyText">Buddy text to process</param>
        /// <returns><paramref name="buddyText"/> without auxiliary verbs</returns>
        public virtual string StripAuxiliaryVerbs(string buddyText)
        {
            // Auxiliary verbs to remove
            HashSet <string> auxiliaryVerbs = new HashSet <string>(
                new[] { "ist" },
                StringComparer.InvariantCultureIgnoreCase
                );

            string strippedBuddyText = buddyText;

            for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();)
            {
                WordIterator.Word word = itr.Current;
                if (word == null)
                {
                    break;
                }

                // Check if the word is an preposition
                if (!auxiliaryVerbs.Contains(word.Text))
                {
                    continue;
                }
                word.Remove();

                // Update return value
                strippedBuddyText = itr.GetStringData();
            }

            return(strippedBuddyText);
        }
コード例 #4
0
        /// <summary> Strips standard articles from the given <paramref name="buddyText"/>. </summary>
        /// <param name="buddyText">Buddy text to process</param>
        /// <returns><paramref name="buddyText"/> without articles</returns>
        public virtual string StripArticles(string buddyText)
        {
            // Articles to remove
            HashSet <string> articles = new HashSet <string>(
                new[] { "der", "die", "das", "den", "dem", "ein", "eine" },
                StringComparer.InvariantCultureIgnoreCase
                );

            string strippedBuddyText = buddyText;

            for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();)
            {
                WordIterator.Word word = itr.Current;
                if (word == null)
                {
                    break;
                }

                // Check if the word is an article
                if (!articles.Contains(word.Text))
                {
                    continue;
                }
                word.Remove();

                // Update return value
                strippedBuddyText = itr.GetStringData();
            }

            return(strippedBuddyText);
        }
コード例 #5
0
        /// <summary> Strips typically unneeded substantives from the given <paramref name="buddyText"/>. </summary>
        /// <param name="buddyText">Buddy text to process</param>
        /// <returns><paramref name="buddyText"/> without substantives</returns>
        public virtual string StripSubstantives(string buddyText)   // TODO Add unit test
        // Substantives to remove
        {
            HashSet <string> substantives = new HashSet <string>(
                new[] {
                "Wert", "Button", "Schaltfläche", "Navigation", "Spalte", "Auswahlbox",
                "Dokument", "Auftrag", "Bestellung",

                // GTUE-related -> Remove all upper-case words?
                "Kennzeichen",
                "X", "XX", "XXX"
            },
                StringComparer.CurrentCultureIgnoreCase
                );

            string strippedBuddyText = buddyText;

            for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();)
            {
                WordIterator.Word word = itr.Current;
                if (word == null)
                {
                    break;
                }

                // Check if the word is an preposition
                if (!substantives.Contains(word.Text))
                {
                    continue;
                }
                word.Remove();

                // Update return value
                strippedBuddyText = itr.GetStringData();
            }

            return(strippedBuddyText);
        }