Пример #1
0
 public SynSetGraph(Token[] tokens)
 {
     Wnlib.WNCommon.path = "C:\\Program Files\\WordNet\\2.1\\dict\\";
     m_tokens = tokens;
     m_htMap = new Hashtable(1024);
 }
Пример #2
0
        public Token[] Tokenize(string input)
        {
            ArrayList tokens = new ArrayList();

            //input = NormalizeCasing(input);
            input = m_trunc.Replace(input, string.Empty);
            string[] rawTokens = m_splitter.Split(input);

            for (int i = 0; i < rawTokens.Length; i++)
            {
                // Break up brill tags
                string []twoKons = rawTokens[i].Split('/');
                if (twoKons.Length == 2) // TODO: Fix this so that / is allowed in text (?)
                {
                    //m_trunc.Replace(twoKons[0], "");
                    if (!StopWordsHandler.Instance.IsStopWord(twoKons[0]))
                    {
                        Token t = new Token(twoKons[0], twoKons[1]);
                        tokens.Add(t);
                    }
                }
            }

            return (Token[]) tokens.ToArray(typeof(Token));
        }