private string[] QueryDictionary(string word, bool recurse) { var tags = tagDictionary.GetTags(word) ?? tagDictionary.GetTags(word.ToLowerInvariant()); if (recurse) { if (word.StartsWith("-") && word.Length > 1) { tags = QueryDictionary(word.Substring(1), false); } } return(GenderUtil.RemoveGender(tags)); }
/// <summary> /// Determines whether a particular continuation of a sequence is valid. /// This is used to restrict invalid sequences such as those used in start/continue tag-based chunking or could be used to implement tag dictionary restrictions. /// </summary> /// <param name="index">The index in the input sequence for which the new outcome is being proposed.</param> /// <param name="inputSequence">The input sequence.</param> /// <param name="outcomesSequence">The outcomes so far in this sequence.</param> /// <param name="outcome">The next proposed outcome for the outcomes sequence.</param> /// <returns><c>true</c> if the sequence would still be valid with the new outcome, <c>false</c> otherwise.</returns> public bool ValidSequence(int index, string[] inputSequence, string[] outcomesSequence, string outcome) { var word = inputSequence[index]; if (index > 0 && outcome == "mm" && inputSequence[index - 1].Equals("a", StringComparison.OrdinalIgnoreCase) && outcomesSequence[index - 1] == "artf") { return(false); } outcome = GenderUtil.RemoveGender(outcome); if (bosque && PunctuationRegex.IsMatch(word)) { return(outcome.Equals(word)); } if (index < inputSequence.Length - 1 && PunctuationRegex.IsMatch(inputSequence[index + 1]) && outcome.StartsWith("B-")) { // we can't start a MWE here :( return(false); } // validate B- and I- if (!ValidOutcome(outcome, outcomesSequence)) { return(false); } if (tagDictionary == null) { return(true); } if ((outcome.StartsWith("B-") || outcome.StartsWith("I-")) && inputSequence.Length > 1) { return(true); } if (word == outcome) { return(true); } var tagList = FilterMWE(QueryDictionary(word, true)); if (tagList != null && tagList.Count > 0) { // token exists if (outcome == "prop" && char.IsUpper(word[0])) { return(true); } return(Contains(tagList, outcome)); } if (unknownList != null) { unknownList.Add(word); } return(true); }