/// <summary> /// Tags the specified tokens. /// </summary> /// <param name="tokens">The tokens.</param> /// <returns>The tokens tagged.</returns> public Token[] Tag(Token[] tokens) { Token PreviousToken = null; for (int i = 0, tokensLength = tokens.Length; i < tokensLength; i++) { var Token = tokens[i]; if (Token.TokenType == TokenType.Word) { if (Lexicon.TryGetValue(Token.Value, out IEnumerable <string> POS) || Lexicon.TryGetValue(Token.Value.ToLowerInvariant(), out POS)) { } else if (Token.Value.Length == 1) { POS = new string[] { Token.Value + "^" }; } else { POS = new string[] { "NN" }; } string Word = Token.PartOfSpeech = POS.First(); if (PreviousToken?.PartOfSpeech == "DT") { if (Word == "VBD" || Word == "VBP" || Word == "VB") { Token.PartOfSpeech = "NN"; } } if (Word.StartsWith("N", StringComparison.OrdinalIgnoreCase) && Token.Value.EndsWith("ED", StringComparison.OrdinalIgnoreCase)) { Token.PartOfSpeech = "VBN"; } if (Token.Value.EndsWith("LY", StringComparison.OrdinalIgnoreCase)) { Token.PartOfSpeech = "RB"; } if (Token.PartOfSpeech.StartsWith("NN", StringComparison.OrdinalIgnoreCase) && Token.Value.EndsWith("AL", StringComparison.OrdinalIgnoreCase)) { Token.PartOfSpeech = "JJ"; } if (Token.PartOfSpeech.StartsWith("NN", StringComparison.OrdinalIgnoreCase) && string.Equals(PreviousToken?.Value, "WOULD", StringComparison.OrdinalIgnoreCase)) { Token.PartOfSpeech = "VB"; } if (Token.PartOfSpeech == "NN" && Token.Value.EndsWith("S", StringComparison.OrdinalIgnoreCase)) { Token.PartOfSpeech = "NNS"; } if (Token.PartOfSpeech.StartsWith("NN", StringComparison.OrdinalIgnoreCase) && Token.Value.EndsWith("ING", StringComparison.OrdinalIgnoreCase)) { Token.PartOfSpeech = "VBG"; } PreviousToken = Token; } else if (Token.TokenType == TokenType.Number) { Token.PartOfSpeech = "CD"; } else if (Token.TokenType == TokenType.Email || Token.TokenType == TokenType.HashTag || Token.TokenType == TokenType.Username) { Token.PartOfSpeech = "NN"; } else if (Token.TokenType == TokenType.Emoji) { Token.PartOfSpeech = "EM"; } } return(tokens); }