protected internal virtual void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, int[] positionIncrements, string[] types, bool outputUnigrams) { ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize); filter.SetOutputUnigrams(outputUnigrams); shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types); }
public override TokenStream Create(TokenStream input) { ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize); r.SetOutputUnigrams(outputUnigrams); r.SetOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); r.SetTokenSeparator(tokenSeparator); r.SetFillerToken(fillerToken); return(r); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize); filter.SetMinShingleSize(minShingleSize); filter.SetMaxShingleSize(maxShingleSize); filter.SetTokenSeparator(tokenSeparator); filter.SetOutputUnigrams(outputUnigrams); filter.SetOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); filter.SetFillerToken(fillerToken); return(new TokenStreamComponents(components.Tokenizer, filter)); }
public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream wrapped; try { wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader); } catch (IOException) { wrapped = defaultAnalyzer.TokenStream(fieldName, reader); } ShingleFilter filter = new ShingleFilter(wrapped); filter.SetMaxShingleSize(maxShingleSize); filter.SetOutputUnigrams(outputUnigrams); return(filter); }
protected void ShingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, int[] positionIncrements, String[] types, bool outputUnigrams) { var filter = new ShingleFilter(new TestTokenStream(tokensToShingle), maxSize); filter.SetOutputUnigrams(outputUnigrams); var termAtt = filter.AddAttribute<ITermAttribute>(); var offsetAtt = filter.AddAttribute<IOffsetAttribute>(); var posIncrAtt = filter.AddAttribute<IPositionIncrementAttribute>(); var typeAtt = filter.AddAttribute<ITypeAttribute>(); int i = 0; while (filter.IncrementToken()) { Assert.IsTrue(i < tokensToCompare.Length, "ShingleFilter outputted more tokens than expected"); String termText = termAtt.Term; String goldText = tokensToCompare[i].Term; Assert.AreEqual(goldText, termText, "Wrong termText"); Assert.AreEqual(tokensToCompare[i].StartOffset, offsetAtt.StartOffset, "Wrong startOffset for token \"" + termText + "\""); Assert.AreEqual(tokensToCompare[i].EndOffset, offsetAtt.EndOffset, "Wrong endOffset for token \"" + termText + "\""); Assert.AreEqual(positionIncrements[i], posIncrAtt.PositionIncrement, "Wrong positionIncrement for token \"" + termText + "\""); Assert.AreEqual(types[i], typeAtt.Type, "Wrong type for token \"" + termText + "\""); i++; } Assert.AreEqual(tokensToCompare.Length, i, "ShingleFilter outputted wrong # of tokens. (# output = " + i + "; # expected =" + tokensToCompare.Length + ")"); }
public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream wrapped; try { wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader); } catch (IOException) { wrapped = defaultAnalyzer.TokenStream(fieldName, reader); } ShingleFilter filter = new ShingleFilter(wrapped); filter.SetMaxShingleSize(maxShingleSize); filter.SetOutputUnigrams(outputUnigrams); return filter; }