public void TokenizerLowercasesCamelCasesAndRemovesStopWordsInput(string text, TokenAttributes[] expected) { // arrange, act var actual = new DescriptionAnalyzer().Tokenize(text); // assert Assert.Equal(expected, actual); }
public void TokenizerRemovesCorrectStopWords(string stopWord) { // arrange, act var text = string.Format("stop {0} word", stopWord); var actual = new DescriptionAnalyzer().Tokenize(text); var expected = new[] { new TokenAttributes("stop", 0, 4, 1), new TokenAttributes("word", 6 + stopWord.Length, 10 + stopWord.Length, 2) }; // assert Assert.Equal(expected, actual); }
static void Test2() { //string query = "signalrClient"; //string query = "microsoft"; string query = "HttpAgilityPack"; //string description = "Misc MPL Libraries from CodePlex: HttpAgilityPack, InputSimulator, Irony Parser, WCF Rest Start Kit, XObjects\n\nUses the FluentSharp APIs"; DescriptionAnalyzer analyzer = new DescriptionAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(query)); ITermAttribute termAttribute = tokenStream.AddAttribute<ITermAttribute>(); while (tokenStream.IncrementToken()) { Console.WriteLine(termAttribute.Term); } }