예제 #1
0
        /// <summary>
        /// Tags the specified tokens.
        /// </summary>
        /// <param name="tokens">The tokens.</param>
        /// <returns>The tokens tagged.</returns>
        public Token[] Tag(Token[] tokens)
        {
            Token PreviousToken = null;

            for (int i = 0, tokensLength = tokens.Length; i < tokensLength; i++)
            {
                var Token = tokens[i];
                if (Token.TokenType == TokenType.Word)
                {
                    if (Lexicon.TryGetValue(Token.Value, out IEnumerable <string> POS) || Lexicon.TryGetValue(Token.Value.ToLowerInvariant(), out POS))
                    {
                    }
                    else if (Token.Value.Length == 1)
                    {
                        POS = new string[] { Token.Value + "^" };
                    }
                    else
                    {
                        POS = new string[] { "NN" };
                    }

                    string Word = Token.PartOfSpeech = POS.First();

                    if (PreviousToken?.PartOfSpeech == "DT")
                    {
                        if (Word == "VBD" || Word == "VBP" || Word == "VB")
                        {
                            Token.PartOfSpeech = "NN";
                        }
                    }

                    if (Word.StartsWith("N", StringComparison.OrdinalIgnoreCase) && Token.Value.EndsWith("ED", StringComparison.OrdinalIgnoreCase))
                    {
                        Token.PartOfSpeech = "VBN";
                    }

                    if (Token.Value.EndsWith("LY", StringComparison.OrdinalIgnoreCase))
                    {
                        Token.PartOfSpeech = "RB";
                    }

                    if (Token.PartOfSpeech.StartsWith("NN", StringComparison.OrdinalIgnoreCase) && Token.Value.EndsWith("AL", StringComparison.OrdinalIgnoreCase))
                    {
                        Token.PartOfSpeech = "JJ";
                    }

                    if (Token.PartOfSpeech.StartsWith("NN", StringComparison.OrdinalIgnoreCase) && string.Equals(PreviousToken?.Value, "WOULD", StringComparison.OrdinalIgnoreCase))
                    {
                        Token.PartOfSpeech = "VB";
                    }

                    if (Token.PartOfSpeech == "NN" && Token.Value.EndsWith("S", StringComparison.OrdinalIgnoreCase))
                    {
                        Token.PartOfSpeech = "NNS";
                    }

                    if (Token.PartOfSpeech.StartsWith("NN", StringComparison.OrdinalIgnoreCase) && Token.Value.EndsWith("ING", StringComparison.OrdinalIgnoreCase))
                    {
                        Token.PartOfSpeech = "VBG";
                    }

                    PreviousToken = Token;
                }
                else if (Token.TokenType == TokenType.Number)
                {
                    Token.PartOfSpeech = "CD";
                }
                else if (Token.TokenType == TokenType.Email || Token.TokenType == TokenType.HashTag || Token.TokenType == TokenType.Username)
                {
                    Token.PartOfSpeech = "NN";
                }
                else if (Token.TokenType == TokenType.Emoji)
                {
                    Token.PartOfSpeech = "EM";
                }
            }
            return(tokens);
        }