public void TokenizerLowercasesCamelCasesAndRemovesStopWordsInput(string text, TokenAttributes[] expected)
        {
            // arrange, act
            var actual = new DescriptionAnalyzer().Tokenize(text);

            // assert
            Assert.Equal(expected, actual);
        }
        public void TokenizerRemovesCorrectStopWords(string stopWord)
        {
            // arrange, act
            var text = string.Format("stop {0} word", stopWord);
            var actual = new DescriptionAnalyzer().Tokenize(text);
            var expected = new[]
            {
                new TokenAttributes("stop", 0, 4, 1),
                new TokenAttributes("word", 6 + stopWord.Length, 10 + stopWord.Length, 2)
            };

            // assert
            Assert.Equal(expected, actual);
        }
Example #3
0
        static void Test2()
        {
            //string query = "signalrClient";
            //string query = "microsoft";
            string query = "HttpAgilityPack";

            //string description = "Misc MPL Libraries from CodePlex: HttpAgilityPack, InputSimulator, Irony Parser, WCF Rest Start Kit, XObjects\n\nUses the FluentSharp APIs";

            DescriptionAnalyzer analyzer = new DescriptionAnalyzer();

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(query));

            ITermAttribute termAttribute = tokenStream.AddAttribute<ITermAttribute>();

            while (tokenStream.IncrementToken())
            {
                Console.WriteLine(termAttribute.Term);
            }
        }