Exemplo n.º 1
0
        protected internal virtual void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, int[] positionIncrements, string[] types, bool outputUnigrams)
        {
            ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);

            filter.SetOutputUnigrams(outputUnigrams);
            shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
        }
Exemplo n.º 2
0
        public override TokenStream Create(TokenStream input)
        {
            ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);

            r.SetOutputUnigrams(outputUnigrams);
            r.SetOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
            r.SetTokenSeparator(tokenSeparator);
            r.SetFillerToken(fillerToken);
            return(r);
        }
Exemplo n.º 3
0
        protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
        {
            ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize);

            filter.SetMinShingleSize(minShingleSize);
            filter.SetMaxShingleSize(maxShingleSize);
            filter.SetTokenSeparator(tokenSeparator);
            filter.SetOutputUnigrams(outputUnigrams);
            filter.SetOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
            filter.SetFillerToken(fillerToken);
            return(new TokenStreamComponents(components.Tokenizer, filter));
        }
Exemplo n.º 4
0
        public override TokenStream TokenStream(String fieldName, TextReader reader)
        {
            TokenStream wrapped;

            try
            {
                wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
            }
            catch (IOException)
            {
                wrapped = defaultAnalyzer.TokenStream(fieldName, reader);
            }
            ShingleFilter filter = new ShingleFilter(wrapped);

            filter.SetMaxShingleSize(maxShingleSize);
            filter.SetOutputUnigrams(outputUnigrams);
            return(filter);
        }
Exemplo n.º 5
0
        protected void ShingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare,
                                         int[] positionIncrements, String[] types, bool outputUnigrams)
        {
            var filter = new ShingleFilter(new TestTokenStream(tokensToShingle), maxSize);
            filter.SetOutputUnigrams(outputUnigrams);

            var termAtt = filter.AddAttribute<ITermAttribute>();
            var offsetAtt = filter.AddAttribute<IOffsetAttribute>();
            var posIncrAtt = filter.AddAttribute<IPositionIncrementAttribute>();
            var typeAtt = filter.AddAttribute<ITypeAttribute>();

            int i = 0;
            while (filter.IncrementToken())
            {
                Assert.IsTrue(i < tokensToCompare.Length, "ShingleFilter outputted more tokens than expected");

                String termText = termAtt.Term;
                String goldText = tokensToCompare[i].Term;

                Assert.AreEqual(goldText, termText, "Wrong termText");
                Assert.AreEqual(tokensToCompare[i].StartOffset, offsetAtt.StartOffset,
                                "Wrong startOffset for token \"" + termText + "\"");
                Assert.AreEqual(tokensToCompare[i].EndOffset, offsetAtt.EndOffset,
                                "Wrong endOffset for token \"" + termText + "\"");
                Assert.AreEqual(positionIncrements[i], posIncrAtt.PositionIncrement,
                                "Wrong positionIncrement for token \"" + termText + "\"");
                Assert.AreEqual(types[i], typeAtt.Type, "Wrong type for token \"" + termText + "\"");

                i++;
            }

            Assert.AreEqual(tokensToCompare.Length, i,
                            "ShingleFilter outputted wrong # of tokens. (# output = " + i + "; # expected =" +
                            tokensToCompare.Length + ")");
        }
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream wrapped;
     try
     {
         wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
     }
     catch (IOException)
     {
         wrapped = defaultAnalyzer.TokenStream(fieldName, reader);
     }
     ShingleFilter filter = new ShingleFilter(wrapped);
     filter.SetMaxShingleSize(maxShingleSize);
     filter.SetOutputUnigrams(outputUnigrams);
     return filter;
 }