Esempio n. 1
0
        private string[] QueryDictionary(string word, bool recurse)
        {
            var tags = tagDictionary.GetTags(word) ?? tagDictionary.GetTags(word.ToLowerInvariant());

            if (recurse)
            {
                if (word.StartsWith("-") && word.Length > 1)
                {
                    tags = QueryDictionary(word.Substring(1), false);
                }
            }

            return(GenderUtil.RemoveGender(tags));
        }
Esempio n. 2
0
        /// <summary>
        /// Determines whether a particular continuation of a sequence is valid.
        /// This is used to restrict invalid sequences such as those used in start/continue tag-based chunking or could be used to implement tag dictionary restrictions.
        /// </summary>
        /// <param name="index">The index in the input sequence for which the new outcome is being proposed.</param>
        /// <param name="inputSequence">The input sequence.</param>
        /// <param name="outcomesSequence">The outcomes so far in this sequence.</param>
        /// <param name="outcome">The next proposed outcome for the outcomes sequence.</param>
        /// <returns><c>true</c> if the sequence would still be valid with the new outcome, <c>false</c> otherwise.</returns>
        public bool ValidSequence(int index, string[] inputSequence, string[] outcomesSequence, string outcome)
        {
            var word = inputSequence[index];

            if (index > 0 &&
                outcome == "mm" &&
                inputSequence[index - 1].Equals("a", StringComparison.OrdinalIgnoreCase) &&
                outcomesSequence[index - 1] == "artf")
            {
                return(false);
            }

            outcome = GenderUtil.RemoveGender(outcome);

            if (bosque && PunctuationRegex.IsMatch(word))
            {
                return(outcome.Equals(word));
            }

            if (index < inputSequence.Length - 1 &&
                PunctuationRegex.IsMatch(inputSequence[index + 1]) &&
                outcome.StartsWith("B-"))
            {
                // we can't start a MWE here :(
                return(false);
            }

            // validate B- and I-
            if (!ValidOutcome(outcome, outcomesSequence))
            {
                return(false);
            }

            if (tagDictionary == null)
            {
                return(true);
            }

            if ((outcome.StartsWith("B-") || outcome.StartsWith("I-")) && inputSequence.Length > 1)
            {
                return(true);
            }

            if (word == outcome)
            {
                return(true);
            }

            var tagList = FilterMWE(QueryDictionary(word, true));

            if (tagList != null && tagList.Count > 0)
            {
                // token exists

                if (outcome == "prop" && char.IsUpper(word[0]))
                {
                    return(true);
                }

                return(Contains(tagList, outcome));
            }

            if (unknownList != null)
            {
                unknownList.Add(word);
            }

            return(true);
        }