예제 #1
0
        public HashSet <int> ReadWords(string keywords)
        {
            HashSet <int> result = new HashSet <int>();

            var tokenizer = new DefaultTokenizer();

            tokenizer.SetDoc(keywords);
            var token = tokenizer.ConsumeNext();

            while (token != null)
            {
                var wordid = WordIndex.GetWord(token.Value);
                if (wordid != -1)
                {
                    result.Add(wordid);
                }
                token = tokenizer.ConsumeNext();
            }
            return(result);
        }
예제 #2
0
        public HashSet <int> ListWords(string content)
        {
            HashSet <int> result = new HashSet <int>();

            var tokenizer = new DefaultTokenizer();

            tokenizer.SetDoc(content);
            var token = tokenizer.ConsumeNext();

            while (token != null)
            {
                var wordid = WordIndex.GetOrAddWord(token.Value);
                if (wordid >= 0)
                {
                    result.Add(wordid);
                }
                token = tokenizer.ConsumeNext();
            }
            return(result);
        }