public override Token Next() { if (i == TOKENS.Length) return null; Token t = new Token(TOKENS[i], i, i); t.SetPositionIncrement(INCREMENTS[i]); i++; return t; }
public override Token Next() { if (i == TOKENS.Length) { return(null); } Token t = new Token(TOKENS[i], i, i); t.SetPositionIncrement(INCREMENTS[i]); i++; return(t); }
/* * (non-Javadoc) * * @see Lucene.Net.Analysis.TokenStream#next() */ public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); int posIncr = 1; while (true) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return(null); } if (scanner.Yylength() <= maxTokenLength) { reusableToken.Clear(); reusableToken.SetPositionIncrement(posIncr); scanner.GetText(reusableToken); int start = scanner.Yychar(); reusableToken.SetStartOffset(start); reusableToken.SetEndOffset(start + reusableToken.TermLength()); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) { if (replaceInvalidAcronym) { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]); reusableToken.SetTermLength(reusableToken.TermLength() - 1); // remove extra '.' } else { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]); } } else { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]); } return(reusableToken); } // When we skip a too-long term, we still increment the // position increment else { posIncr++; } } }
/// <summary>Returns the next input Token, after being stemmed </summary> public override Token Next() { Token token = input.Next(); if (token == null) return null; stemmer.SetCurrent(token.TermText()); try { stemMethod(); } catch (System.Exception e) { throw new System.SystemException(e.ToString()); } Token newToken = new Token(stemmer.GetCurrent(), token.StartOffset(), token.EndOffset(), token.Type()); newToken.SetPositionIncrement(token.GetPositionIncrement()); return newToken; }
/// <summary>Returns the next input Token, after being stemmed </summary> public override Token Next() { Token token = input.Next(); if (token == null) { return(null); } stemmer.SetCurrent(token.TermText()); try { stemMethod.Invoke(stemmer, (System.Object[])EMPTY_ARGS); } catch (System.Exception e) { throw new System.SystemException(e.ToString()); } Token newToken = new Token(stemmer.GetCurrent(), token.StartOffset(), token.EndOffset(), token.Type()); newToken.SetPositionIncrement(token.GetPositionIncrement()); return(newToken); }
private void InitBlock(HighlighterTest enclosingInstance) { this.enclosingInstance = enclosingInstance; lst = new System.Collections.ArrayList(); Token t; t = new Token("hi", 0, 2); lst.Add(t); t = new Token("hispeed", 0, 8); lst.Add(t); t = new Token("speed", 3, 8); t.SetPositionIncrement(0); lst.Add(t); t = new Token("10", 8, 10); lst.Add(t); t = new Token("foo", 11, 14); lst.Add(t); iter = lst.GetEnumerator(); }
public override Token Next() { if (currentRealToken == null) { Token nextRealToken = realStream.Next(); if (nextRealToken == null) { return null; } System.String expansions = (System.String) synonyms[nextRealToken.TermText()]; if (expansions == null) { return nextRealToken; } st = new Tokenizer(expansions, ","); if (st.HasMoreTokens()) { currentRealToken = nextRealToken; } return currentRealToken; } else { System.String nextExpandedValue = st.NextToken(); Token expandedToken = new Token(nextExpandedValue, currentRealToken.StartOffset(), currentRealToken.EndOffset()); expandedToken.SetPositionIncrement(0); if (!st.HasMoreTokens()) { currentRealToken = null; st = null; } return expandedToken; } }
/* * (non-Javadoc) * * @see Lucene.Net.Analysis.TokenStream#next() */ public override Token Next(Token result) { int posIncr = 1; while (true) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return null; } if (scanner.Yylength() <= maxTokenLength) { result.Clear(); result.SetPositionIncrement(posIncr); scanner.GetText(result); int start = scanner.Yychar(); result.SetStartOffset(start); result.SetEndOffset(start + result.TermLength()); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) { if (replaceInvalidAcronym) { result.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]); result.SetTermLength(result.TermLength() - 1); // remove extra '.' } else { result.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]); } } else { result.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]); } return result; } // When we skip a too-long term, we still increment the // position increment else posIncr++; } }
public void TestNegativePositions() { SinkTokenizer tokens = new SinkTokenizer(); Token t = new Token(); t.SetTermText("a"); t.SetPositionIncrement(0); tokens.Add(t); t.SetTermText("b"); t.SetPositionIncrement(1); tokens.Add(t); t.SetTermText("c"); tokens.Add(t); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("field", tokens)); w.AddDocument(doc); w.Close(); IndexSearcher s = new IndexSearcher(dir); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "a")); pq.Add(new Term("field", "b")); pq.Add(new Term("field", "c")); Hits hits = s.Search(pq); Assert.AreEqual(1, hits.Length()); Query q = new SpanTermQuery(new Term("field", "a")); hits = s.Search(q); Assert.AreEqual(1, hits.Length()); TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a")); Assert.IsTrue(tps.Next()); Assert.AreEqual(1, tps.Freq()); Assert.AreEqual(-1, tps.NextPosition()); Assert.IsTrue(_TestUtil.CheckIndex(dir)); s.Close(); dir.Close(); }
public override Token Next() { if (tokenUpto >= Enclosing_Instance.tokens.Length) return null; else { Token t = new Token(); TestToken testToken = Enclosing_Instance.tokens[tokenUpto++]; t.SetTermText(testToken.text); if (tokenUpto > 1) t.SetPositionIncrement(testToken.pos - Enclosing_Instance.tokens[tokenUpto - 2].pos); else t.SetPositionIncrement(testToken.pos + 1); t.SetStartOffset(testToken.startOffset); t.SetEndOffset(testToken.endOffset); return t; } }