public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource) { this.outerInstance = outerInstance; this.attSource = attSource; this.termAtt = attSource.GetAttribute(typeof(CharTermAttribute)); this.offsetAtt = attSource.GetAttribute(typeof(OffsetAttribute)); }
public override ShingleFilter create(TokenStream input) { ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize); r.OutputUnigrams = outputUnigrams; r.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles; r.TokenSeparator = tokenSeparator; r.FillerToken = fillerToken; return r; }
public override ShingleFilter create(TokenStream input) { ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize); r.OutputUnigrams = outputUnigrams; r.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles; r.TokenSeparator = tokenSeparator; r.FillerToken = fillerToken; return(r); }
protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components) { ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize); filter.MinShingleSize = minShingleSize; filter.MaxShingleSize = maxShingleSize; filter.TokenSeparator = tokenSeparator; filter.OutputUnigrams = outputUnigrams; filter.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles; filter.FillerToken = fillerToken; return(new TokenStreamComponents(components.Tokenizer, filter)); }
public CircularSequence(ShingleFilter outerInstance) { this.outerInstance = outerInstance; minValue = outerInstance.outputUnigrams ? 1 : outerInstance.minShingleSize; reset(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: protected void shingleFilterTestCommon(ShingleFilter filter, org.apache.lucene.analysis.Token[] tokensToCompare, int[] positionIncrements, String[] types) throws java.io.IOException protected internal virtual void shingleFilterTestCommon(ShingleFilter filter, Token[] tokensToCompare, int[] positionIncrements, string[] types) { string[] text = new string[tokensToCompare.Length]; int[] startOffsets = new int[tokensToCompare.Length]; int[] endOffsets = new int[tokensToCompare.Length]; for (int i = 0; i < tokensToCompare.Length; i++) { text[i] = new string(tokensToCompare[i].buffer(),0, tokensToCompare[i].length()); startOffsets[i] = tokensToCompare[i].startOffset(); endOffsets[i] = tokensToCompare[i].endOffset(); } assertTokenStreamContents(filter, text, startOffsets, endOffsets, types, positionIncrements); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: protected void shingleFilterTest(String tokenSeparator, int minSize, int maxSize, org.apache.lucene.analysis.Token[] tokensToShingle, org.apache.lucene.analysis.Token[] tokensToCompare, int[] positionIncrements, String[] types, boolean outputUnigrams) throws java.io.IOException protected internal virtual void shingleFilterTest(string tokenSeparator, int minSize, int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, int[] positionIncrements, string[] types, bool outputUnigrams) { ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize); filter.TokenSeparator = tokenSeparator; filter.OutputUnigrams = outputUnigrams; shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTwoTrailingHolesTriShingleWithTokenFiller() throws java.io.IOException public virtual void testTwoTrailingHolesTriShingleWithTokenFiller() { // Analyzing "purple wizard of the", where of and the are removed as a // stopwords, leaving two trailing holes: Token[] inputTokens = new Token[] {createToken("purple", 0, 6), createToken("wizard", 7, 13)}; ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3); filter.FillerToken = "--"; assertTokenStreamContents(filter, new string[]{"purple", "purple wizard", "purple wizard --", "wizard", "wizard --", "wizard -- --"}, new int[]{0, 0, 0, 7, 7, 7}, new int[]{6, 13, 20, 13, 20, 20}, new int[]{1, 0, 0, 1, 0, 0}, 20); filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3); filter.FillerToken = ""; assertTokenStreamContents(filter, new string[]{"purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard "}, new int[]{0, 0, 0, 7, 7, 7}, new int[]{6, 13, 20, 13, 20, 20}, new int[]{1, 0, 0, 1, 0, 0}, 20); filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3); filter.FillerToken = null; assertTokenStreamContents(filter, new string[] {"purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard "}, new int[] {0, 0, 0, 7, 7, 7}, new int[] {6, 13, 20, 13, 20, 20}, new int[] {1, 0, 0, 1, 0, 0}, 20); filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3); filter.FillerToken = null; filter.TokenSeparator = null; assertTokenStreamContents(filter, new string[] {"purple", "purplewizard", "purplewizard", "wizard", "wizard", "wizard"}, new int[] {0, 0, 0, 7, 7, 7}, new int[] {6, 13, 20, 13, 20, 20}, new int[] {1, 0, 0, 1, 0, 0}, 20); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTwoTrailingHoles() throws java.io.IOException public virtual void testTwoTrailingHoles() { // Analyzing "purple wizard of the", where of and the are removed as a // stopwords, leaving two trailing holes: Token[] inputTokens = new Token[] {createToken("purple", 0, 6), createToken("wizard", 7, 13)}; ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 2); assertTokenStreamContents(filter, new string[] {"purple", "purple wizard", "wizard", "wizard _"}, new int[] {0, 0, 7, 7}, new int[] {6, 13, 13, 20}, new int[] {1, 0, 1, 0}, 20); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTrailingHole2() throws java.io.IOException public virtual void testTrailingHole2() { // Analyzing "purple wizard of", where of is removed as a // stopword leaving a trailing hole: Token[] inputTokens = new Token[] {createToken("purple", 0, 6), createToken("wizard", 7, 13)}; ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 16, inputTokens), 2, 2); assertTokenStreamContents(filter, new string[] {"purple", "purple wizard", "wizard", "wizard _"}, new int[] {0, 0, 7, 7}, new int[] {6, 13, 13, 16}, new int[] {1, 0, 1, 0}, 16); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence")); TokenStream filter = new ShingleFilter(wsTokenizer, 2); assertTokenStreamContents(filter, new string[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, new string[]{TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE}, new int[]{1,0,1,0,1,0,1}); wsTokenizer.Reader = new StringReader("please divide this sentence"); assertTokenStreamContents(filter, new string[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, new string[]{TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE}, new int[]{1,0,1,0,1,0,1}); }
public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource) { this.outerInstance = outerInstance; this.attSource = attSource; this.termAtt = attSource.getAttribute(typeof(CharTermAttribute)); this.offsetAtt = attSource.getAttribute(typeof(OffsetAttribute)); }
public override TokenStream create(TokenStream input) { var shingle = new ShingleFilter(input, _maxShingleSize ?? ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); return(shingle); }