/// <summary> /// Tags the specified tokens. /// </summary> /// <param name="tokens">The tokens.</param> /// <returns>The tokens tagged.</returns> public Token[] Tag(Token[] tokens) { var TempTokens = tokens.Where(x => x.TokenType != TokenType.WhiteSpace).ToArray(); var Tags = new TagProbability[TempTokens.Length]; bool InNNP = false; for (int x = 0; x < TempTokens.Length; ++x) { Tags[x] = GetTag(TempTokens[x]); } TagProbability?Previous = null; Token? PreviousToken = null; for (int x = 0; x < TempTokens.Length; ++x) { var Tag = Tags[x]; var Token = TempTokens[x]; InNNP = ManualTagConversions(Tag, Previous, Token, PreviousToken, x, InNNP); Previous = Tag; PreviousToken = Token; } Apply(TempTokens, Tags); Previous = null; for (int x = 0; x < TempTokens.Length; ++x) { var Tag = Tags[x]; var Token = TempTokens[x]; if (Token.NormalizedValue.EndsWith("ed", StringComparison.Ordinal)) { if (Tag.Tag == "JJ" && (Previous?.Tag == "VBZ" || Previous?.Tag == "VBP") && Tags[x + 1].Tag == "TO") { Tag.Tag = "VBN"; } } Previous = Tag; } for (int x = 0; x < TempTokens.Length; ++x) { TempTokens[x].PartOfSpeech = Tags[x].Tag; } return(tokens); }
/// <summary> /// Applies the rule. /// </summary> /// <param name="rule">The rule.</param> /// <param name="token">The input.</param> /// <param name="tag">The tag.</param> /// <param name="index">The index.</param> /// <param name="tokens">The tokens.</param> /// <param name="tags">The tags.</param> /// <param name="run">The run.</param> public static void ApplyRule(BrillRule rule, Token token, TagProbability tag, int index, Token[] tokens, TagProbability[] tags, int run) { if (rule.From != tag.Tag || (rule.SecondRun && run == 0)) { return; } var type = rule.Type; // Start word rule is case sensitive if (type == BrillConditions.STARTWORD) { if (index == 0 && token.Value == rule.C1) { tags[index].Tag = rule.To; return; } return; } switch (type) { case BrillConditions.PREVTAG: { if (index > 0 && tags[index - 1].Tag == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREVWORDPREVTAG: { if (index > 0 && tags[index - 1].Tag == rule.C2 && tokens[index - 1].NormalizedValue == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXTTAG: { if (index < tags.Length - 1 && tags[index + 1].Tag == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXTTAG2: { if (index < tags.Length - 2 && tags[index + 2].Tag == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREVTAG2: { if (index > 1 && tags[index - 2].Tag == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREV1OR2TAG: { if ((index > 0 && tags[index - 1].Tag == rule.C1) || (index > 1 && tags[index - 2].Tag == rule.C1)) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREVWORD: { if (index > 0 && tokens[index - 1].NormalizedValue == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.CURRENTWD: { if (token.NormalizedValue == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.WDPREVTAG: { if (index > 0 && token.NormalizedValue == rule.C2 && tags[index - 1].Tag == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.WDPREVWD: { if (index > 0 && token.NormalizedValue == rule.C2 && tokens[index - 1].NormalizedValue == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXT1OR2OR3TAG: { if ((index < tags.Length - 1 && tags[index + 1].Tag == rule.C1) || (index < tags.Length - 2 && tags[index + 2].Tag == rule.C1) || (index < tags.Length - 3 && tags[index + 3].Tag == rule.C1)) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXT2WD: { if (index < tokens.Length - 2 && tokens[index + 2].NormalizedValue == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.WDNEXTWD: { if (index < tokens.Length - 1 && token.NormalizedValue == rule.C1 && tokens[index + 1].NormalizedValue == rule.C2) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.WDNEXTTAG: { if (index < tags.Length - 1 && token.NormalizedValue == rule.C1 && tags[index + 1].Tag == rule.C2) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREV1OR2OR3TAG: { if ((index > 0 && tags[index - 1].Tag == rule.C1) || (index > 1 && tags[index - 2].Tag == rule.C1) || (index > 2 && tags[index - 3].Tag == rule.C1)) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.SURROUNDTAG: { if (index > 0 && tags[index - 1].Tag == rule.C1 && index < tags.Length - 1 && tags[index + 1].Tag == rule.C2) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.SURROUNDTAGWD: { if (token.NormalizedValue == rule.C1 && index > 0 && tags[index - 1].Tag == rule.C2 && index < tags.Length - 1 && tags[index + 1].Tag == rule.C3) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXTWD: { if (index < tokens.Length - 1 && tokens[index + 1].NormalizedValue == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXT1OR2TAG: { if ((index < tags.Length - 1 && tags[index + 1].Tag == rule.C1) || (index < tags.Length - 2 && tags[index + 2].Tag == rule.C1)) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREV2TAG: { if (index > 1 && tags[index - 2].Tag == rule.C1 && tags[index - 1].Tag == rule.C2) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREV2TAGNEXTTAG: { if (index > 1 && tags[index - 2].Tag == rule.C1 && tags[index - 1].Tag == rule.C2 && index < tags.Length - 1 && tags[index + 1].Tag == rule.C3) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXT2TAG: { if (index < tags.Length - 2 && tags[index + 1].Tag == rule.C1 && tags[index + 2].Tag == rule.C2) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.NEXT1OR2WD: { if ((index < tokens.Length - 1 && tokens[index + 1].NormalizedValue == rule.C1) || (index < tokens.Length - 2 && tokens[index + 2].NormalizedValue == rule.C1)) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREV2WD: { if (index > 1 && tokens[index - 2].NormalizedValue == rule.C1) { tags[index].Tag = rule.To; return; } break; } case BrillConditions.PREV1OR2WD: { if ((index > 0 && tokens[index - 1].NormalizedValue == rule.C1) || (index > 1 && tokens[index - 2].NormalizedValue == rule.C1)) { tags[index].Tag = rule.To; return; } break; } } }