public HashSet <int> ReadWords(string keywords) { HashSet <int> result = new HashSet <int>(); var tokenizer = new DefaultTokenizer(); tokenizer.SetDoc(keywords); var token = tokenizer.ConsumeNext(); while (token != null) { var wordid = WordIndex.GetWord(token.Value); if (wordid != -1) { result.Add(wordid); } token = tokenizer.ConsumeNext(); } return(result); }
public HashSet <int> ListWords(string content) { HashSet <int> result = new HashSet <int>(); var tokenizer = new DefaultTokenizer(); tokenizer.SetDoc(content); var token = tokenizer.ConsumeNext(); while (token != null) { var wordid = WordIndex.GetOrAddWord(token.Value); if (wordid >= 0) { result.Add(wordid); } token = tokenizer.ConsumeNext(); } return(result); }