/// <summary> Replaces all known synonyms of action pattern key words with their base form. </summary> /// <param name="buddyText">Buddy text to process</param> /// <returns>Unambigiuous <paramref name="buddyText"/></returns> public virtual string ResolveAmbiguity(string buddyText) { SynMapRegistry synMapRegistry = new SynMapRegistry(); // TODO Hack entfernen buddyText = buddyText.Replace("nicht sichtbar", "verschwunden"); string unambigiuousBuddyText = buddyText; for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();) { WordIterator.Word word = itr.Current; if (word == null) { break; } string wordText = word.Text; // Check for synonyms if (!synMapRegistry.TryGetRootWord(wordText, out string rootWord)) { continue; } word.Replace(rootWord); // Update return value unambigiuousBuddyText = itr.GetStringData(); } return(unambigiuousBuddyText); }
/// <summary> Strips standard prepositions from the given <paramref name="buddyText"/>. </summary> /// <param name="buddyText">Buddy text to process</param> /// <returns><paramref name="buddyText"/> without articles</returns> public virtual string StripPrepositions(string buddyText) { // Prepositions to remove HashSet <string> prepositions = new HashSet <string>( new[] { "in", "im", "aus", "ein", "ob", "bis", "auf", "zu", "unter", "da", "wo" }, StringComparer.InvariantCultureIgnoreCase ); string strippedBuddyText = buddyText; for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();) { WordIterator.Word word = itr.Current; if (word == null) { break; } // Check if the word is an preposition if (!prepositions.Contains(word.Text)) { continue; } word.Remove(); // Update return value strippedBuddyText = itr.GetStringData(); } return(strippedBuddyText); }
/// <summary> Strips standard auxiliary verbs from the given <paramref name="buddyText"/>. </summary> /// <param name="buddyText">Buddy text to process</param> /// <returns><paramref name="buddyText"/> without auxiliary verbs</returns> public virtual string StripAuxiliaryVerbs(string buddyText) { // Auxiliary verbs to remove HashSet <string> auxiliaryVerbs = new HashSet <string>( new[] { "ist" }, StringComparer.InvariantCultureIgnoreCase ); string strippedBuddyText = buddyText; for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();) { WordIterator.Word word = itr.Current; if (word == null) { break; } // Check if the word is an preposition if (!auxiliaryVerbs.Contains(word.Text)) { continue; } word.Remove(); // Update return value strippedBuddyText = itr.GetStringData(); } return(strippedBuddyText); }
/// <summary> Strips standard articles from the given <paramref name="buddyText"/>. </summary> /// <param name="buddyText">Buddy text to process</param> /// <returns><paramref name="buddyText"/> without articles</returns> public virtual string StripArticles(string buddyText) { // Articles to remove HashSet <string> articles = new HashSet <string>( new[] { "der", "die", "das", "den", "dem", "ein", "eine" }, StringComparer.InvariantCultureIgnoreCase ); string strippedBuddyText = buddyText; for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();) { WordIterator.Word word = itr.Current; if (word == null) { break; } // Check if the word is an article if (!articles.Contains(word.Text)) { continue; } word.Remove(); // Update return value strippedBuddyText = itr.GetStringData(); } return(strippedBuddyText); }
/// <summary> Strips typically unneeded substantives from the given <paramref name="buddyText"/>. </summary> /// <param name="buddyText">Buddy text to process</param> /// <returns><paramref name="buddyText"/> without substantives</returns> public virtual string StripSubstantives(string buddyText) // TODO Add unit test // Substantives to remove { HashSet <string> substantives = new HashSet <string>( new[] { "Wert", "Button", "Schaltfläche", "Navigation", "Spalte", "Auswahlbox", "Dokument", "Auftrag", "Bestellung", // GTUE-related -> Remove all upper-case words? "Kennzeichen", "X", "XX", "XXX" }, StringComparer.CurrentCultureIgnoreCase ); string strippedBuddyText = buddyText; for (WordIterator itr = buddyText.GetWordIterator(); itr.MoveNext();) { WordIterator.Word word = itr.Current; if (word == null) { break; } // Check if the word is an preposition if (!substantives.Contains(word.Text)) { continue; } word.Remove(); // Update return value strippedBuddyText = itr.GetStringData(); } return(strippedBuddyText); }