Exemple #1
0
        /// <summary>
        /// Tags the specified tokens.
        /// </summary>
        /// <param name="tokens">The tokens.</param>
        /// <returns>The tokens tagged.</returns>
        public Token[] Tag(Token[] tokens)
        {
            var  TempTokens = tokens.Where(x => x.TokenType != TokenType.WhiteSpace).ToArray();
            var  Tags       = new TagProbability[TempTokens.Length];
            bool InNNP      = false;

            for (int x = 0; x < TempTokens.Length; ++x)
            {
                Tags[x] = GetTag(TempTokens[x]);
            }
            TagProbability?Previous      = null;
            Token?         PreviousToken = null;

            for (int x = 0; x < TempTokens.Length; ++x)
            {
                var Tag   = Tags[x];
                var Token = TempTokens[x];
                InNNP         = ManualTagConversions(Tag, Previous, Token, PreviousToken, x, InNNP);
                Previous      = Tag;
                PreviousToken = Token;
            }

            Apply(TempTokens, Tags);

            Previous = null;
            for (int x = 0; x < TempTokens.Length; ++x)
            {
                var Tag   = Tags[x];
                var Token = TempTokens[x];
                if (Token.NormalizedValue.EndsWith("ed", StringComparison.Ordinal))
                {
                    if (Tag.Tag == "JJ" && (Previous?.Tag == "VBZ" || Previous?.Tag == "VBP") && Tags[x + 1].Tag == "TO")
                    {
                        Tag.Tag = "VBN";
                    }
                }

                Previous = Tag;
            }

            for (int x = 0; x < TempTokens.Length; ++x)
            {
                TempTokens[x].PartOfSpeech = Tags[x].Tag;
            }

            return(tokens);
        }
Exemple #2
0
        /// <summary>
        /// Applies the rule.
        /// </summary>
        /// <param name="rule">The rule.</param>
        /// <param name="token">The input.</param>
        /// <param name="tag">The tag.</param>
        /// <param name="index">The index.</param>
        /// <param name="tokens">The tokens.</param>
        /// <param name="tags">The tags.</param>
        /// <param name="run">The run.</param>
        public static void ApplyRule(BrillRule rule, Token token, TagProbability tag, int index, Token[] tokens, TagProbability[] tags, int run)
        {
            if (rule.From != tag.Tag || (rule.SecondRun && run == 0))
            {
                return;
            }

            var type = rule.Type;

            // Start word rule is case sensitive
            if (type == BrillConditions.STARTWORD)
            {
                if (index == 0 && token.Value == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }
                return;
            }

            switch (type)
            {
            case BrillConditions.PREVTAG:
            {
                if (index > 0 && tags[index - 1].Tag == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREVWORDPREVTAG:
            {
                if (index > 0 && tags[index - 1].Tag == rule.C2 && tokens[index - 1].NormalizedValue == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }
                break;
            }

            case BrillConditions.NEXTTAG:
            {
                if (index < tags.Length - 1 && tags[index + 1].Tag == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.NEXTTAG2:
            {
                if (index < tags.Length - 2 && tags[index + 2].Tag == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREVTAG2:
            {
                if (index > 1 && tags[index - 2].Tag == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREV1OR2TAG:
            {
                if ((index > 0 && tags[index - 1].Tag == rule.C1) || (index > 1 && tags[index - 2].Tag == rule.C1))
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREVWORD:
            {
                if (index > 0 && tokens[index - 1].NormalizedValue == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.CURRENTWD:
            {
                if (token.NormalizedValue == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.WDPREVTAG:
            {
                if (index > 0 && token.NormalizedValue == rule.C2 && tags[index - 1].Tag == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.WDPREVWD:
            {
                if (index > 0 && token.NormalizedValue == rule.C2 && tokens[index - 1].NormalizedValue == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.NEXT1OR2OR3TAG:
            {
                if ((index < tags.Length - 1 && tags[index + 1].Tag == rule.C1) || (index < tags.Length - 2 && tags[index + 2].Tag == rule.C1) || (index < tags.Length - 3 && tags[index + 3].Tag == rule.C1))
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.NEXT2WD:
            {
                if (index < tokens.Length - 2 && tokens[index + 2].NormalizedValue == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.WDNEXTWD:
            {
                if (index < tokens.Length - 1 && token.NormalizedValue == rule.C1 && tokens[index + 1].NormalizedValue == rule.C2)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.WDNEXTTAG:
            {
                if (index < tags.Length - 1 && token.NormalizedValue == rule.C1 && tags[index + 1].Tag == rule.C2)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREV1OR2OR3TAG:
            {
                if ((index > 0 && tags[index - 1].Tag == rule.C1) || (index > 1 && tags[index - 2].Tag == rule.C1) || (index > 2 && tags[index - 3].Tag == rule.C1))
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.SURROUNDTAG:
            {
                if (index > 0 && tags[index - 1].Tag == rule.C1 && index < tags.Length - 1 && tags[index + 1].Tag == rule.C2)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.SURROUNDTAGWD:
            {
                if (token.NormalizedValue == rule.C1 && index > 0 && tags[index - 1].Tag == rule.C2 && index < tags.Length - 1 && tags[index + 1].Tag == rule.C3)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.NEXTWD:
            {
                if (index < tokens.Length - 1 && tokens[index + 1].NormalizedValue == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.NEXT1OR2TAG:
            {
                if ((index < tags.Length - 1 && tags[index + 1].Tag == rule.C1) || (index < tags.Length - 2 && tags[index + 2].Tag == rule.C1))
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREV2TAG:
            {
                if (index > 1 && tags[index - 2].Tag == rule.C1 && tags[index - 1].Tag == rule.C2)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREV2TAGNEXTTAG:
            {
                if (index > 1 && tags[index - 2].Tag == rule.C1 && tags[index - 1].Tag == rule.C2 && index < tags.Length - 1 && tags[index + 1].Tag == rule.C3)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.NEXT2TAG:
            {
                if (index < tags.Length - 2 && tags[index + 1].Tag == rule.C1 && tags[index + 2].Tag == rule.C2)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.NEXT1OR2WD:
            {
                if ((index < tokens.Length - 1 && tokens[index + 1].NormalizedValue == rule.C1) || (index < tokens.Length - 2 && tokens[index + 2].NormalizedValue == rule.C1))
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREV2WD:
            {
                if (index > 1 && tokens[index - 2].NormalizedValue == rule.C1)
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }

            case BrillConditions.PREV1OR2WD:
            {
                if ((index > 0 && tokens[index - 1].NormalizedValue == rule.C1) || (index > 1 && tokens[index - 2].NormalizedValue == rule.C1))
                {
                    tags[index].Tag = rule.To;
                    return;
                }

                break;
            }
            }
        }