public virtual void TestCtor() { Token t = new Token(); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, content.Length); char[] buf = t.TermBuffer(); Assert.AreNotEqual(t.TermBuffer(), content); Assert.AreEqual("hello", t.Term); Assert.AreEqual("word", t.Type); Assert.AreEqual(0, t.Flags); t = new Token(6, 22); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term); Assert.AreEqual("(hello,6,22)", t.ToString()); Assert.AreEqual("word", t.Type); Assert.AreEqual(0, t.Flags); t = new Token(6, 22, 7); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term); Assert.AreEqual("(hello,6,22)", t.ToString()); Assert.AreEqual(7, t.Flags); t = new Token(6, 22, "junk"); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term); Assert.AreEqual("(hello,6,22,type=junk)", t.ToString()); Assert.AreEqual(0, t.Flags); }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); if (!done) { done = true; int upto = 0; reusableToken.Clear(); char[] buffer = reusableToken.TermBuffer(); while (true) { int length = input.Read(buffer, upto, buffer.Length - upto); if (length <= 0) { break; } upto += length; if (upto == buffer.Length) { buffer = reusableToken.ResizeTermBuffer(1 + buffer.Length); } } reusableToken.SetTermLength(upto); return(reusableToken); } return(null); }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { char[] buffer = result.TermBuffer(); int length = result.TermLength(); // If no characters actually require rewriting then we // just return token as-is: for (int i = 0; i < length; i++) { char c = buffer[i]; if (c >= '\u00c0' && c <= '\u0178') { RemoveAccents(buffer, length); result.SetTermBuffer(output, 0, outputPos); break; } } return(result); } else { return(null); } }
public override Token Next(Token result) { if (!done) { done = true; int upto = 0; result.Clear(); char[] buffer = result.TermBuffer(); while (true) { int length = input.Read(buffer, upto, buffer.Length - upto); if (length <= 0) { break; } upto += length; if (upto == buffer.Length) { buffer = result.ResizeTermBuffer(1 + buffer.Length); } } result.termLength = upto; return(result); } return(null); }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); Token nextToken = input.Next(reusableToken); if (nextToken != null) { char[] buffer = nextToken.TermBuffer(); int length = nextToken.TermLength(); // If no characters actually require rewriting then we // just return token as-is: for (int i = 0; i < length; i++) { char c = buffer[i]; if (c >= '\u00c0' && c <= '\ufb06') { RemoveAccents(buffer, length); nextToken.SetTermBuffer(output, 0, outputPos); break; } } return(nextToken); } else { return(null); } }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermText(), "hello"); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(new System.String(t.TermBuffer(), 0, 5), "hello"); t.SetTermText("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(new System.String(t.TermBuffer(), 0, 6), "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.TermText(), "hello3"); // Make sure if we get the buffer and change a character // that termText() reflects the change char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.TermText(), "hollo3"); }
void AddToken(Token oriToken, int termBufferOffset, int termBufferLength, byte type) { Token token = new Token(oriToken.TermBuffer(), termBufferOffset, termBufferLength, oriToken.StartOffset + termBufferOffset, oriToken.StartOffset + termBufferOffset + termBufferLength); if (type == (byte)UnicodeCategory.DecimalDigitNumber) token.Type = Word.TYPE_DIGIT; else token.Type = Word.TYPE_LETTER; tokenQueue.Enqueue(token); }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { if (stemmer.Stem(result.TermBuffer(), 0, result.termLength)) result.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength()); return result; } else return null; }
public virtual void TestResize() { Token t = new Token(); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, content.Length); for (int i = 0; i < 2000; i++) { t.ResizeTermBuffer(i); Assert.IsTrue(i <= t.TermBuffer().Length); Assert.AreEqual("hello", t.Term); } }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); reusableToken.Clear(); int length = 0; int start = bufferIndex; char[] buffer = reusableToken.TermBuffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader) input).Read(ioBuffer) : input.Read((System.Char[]) ioBuffer, 0, ioBuffer.Length); if (dataLen <= 0) { if (length > 0) break; else return null; } bufferIndex = 0; } char c = ioBuffer[bufferIndex++]; if (IsTokenChar(c)) { // if it's a token char if (length == 0) // start of token start = offset + bufferIndex - 1; else if (length == buffer.Length) buffer = reusableToken.ResizeTermBuffer(1 + length); buffer[length++] = Normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) // buffer overflow! break; } else if (length > 0) // at non-Letter w/ chars break; // return 'em } reusableToken.SetTermLength(length); reusableToken.SetStartOffset(start); reusableToken.SetEndOffset(start + length); return reusableToken; }
public override Token Next(Token token) { token.Clear(); int length = 0; int start = bufferIndex; char[] buffer = token.TermBuffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input is Lucene.Net.Index.DocumentsWriter.ReusableStringReader ? ((Lucene.Net.Index.DocumentsWriter.ReusableStringReader) input).Read(ioBuffer) : input.Read((System.Char[]) ioBuffer, 0, ioBuffer.Length); if (dataLen <= 0) { if (length > 0) break; else return null; } bufferIndex = 0; } char c = ioBuffer[bufferIndex++]; if (IsTokenChar(c)) { // if it's a token char if (length == 0) // start of token start = offset + bufferIndex - 1; else if (length == buffer.Length) buffer = token.ResizeTermBuffer(1 + length); buffer[length++] = Normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) // buffer overflow! break; } else if (length > 0) // at non-Letter w/ chars break; // return 'em } token.termLength = length; token.startOffset = start; token.endOffset = start + length; return token; }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(t.Term, "hello"); t.SetTermBuffer("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(t.Term, "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.Term, "hello3"); char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.Term, "hollo3"); }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { if (stemmer.Stem(result.TermBuffer(), 0, result.termLength)) { result.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength()); } return(result); } else { return(null); } }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { char[] buffer = result.TermBuffer(); int length = result.termLength; for (int i = 0; i < length; i++) buffer[i] = System.Char.ToLower(buffer[i]); return result; } else return null; }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); Token nextToken = input.Next(reusableToken); if (nextToken == null) { return(null); } if (stemmer.Stem(nextToken.TermBuffer(), 0, nextToken.TermLength())) { nextToken.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength()); } return(nextToken); }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { char[] buffer = result.TermBuffer(); int length = result.termLength; for (int i = 0; i < length; i++) { buffer[i] = System.Char.ToLower(buffer[i]); } return(result); } else { return(null); } }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermText(), "hello"); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(t.Term(), "hello"); t.SetTermText("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(t.Term(), "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.TermText(), "hello3"); // Make sure if we get the buffer and change a character // that termText() reflects the change char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.TermText(), "hollo3"); }
public virtual void TestClone() { Token t = new Token(0, 5); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, 5); char[] buf = t.TermBuffer(); Token copy = (Token)TestSimpleAttributeImpls.AssertCloneIsEqual(t); Assert.AreEqual(t.Term, copy.Term); Assert.AreNotSame(buf, copy.TermBuffer()); Payload pl = new Payload(new byte[] { 1, 2, 3, 4 }); t.Payload = pl; copy = (Token)TestSimpleAttributeImpls.AssertCloneIsEqual(t); Assert.AreEqual(pl, copy.Payload); Assert.AreNotSame(pl, copy.Payload); }
public override Token Next(Token reusableToken) { Token nextToken = input.Next(reusableToken); if (nextToken != null) { char[] buffer = nextToken.TermBuffer(); int length = nextToken.TermLength(); for (int i = 0; i < length; i++) { buffer[i] = System.Char.ToLower(buffer[i]); } return(nextToken); } else { return(null); } }
/// <summary> Returns the next input Token whose termText() is not a stop word.</summary> public override Token Next(Token result) { // return the first non-stop word found int skippedPositions = 0; while ((result = input.Next(result)) != null) { if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength)) { if (enablePositionIncrements) { result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions); } return(result); } skippedPositions += result.GetPositionIncrement(); } // reached EOS -- return null return(null); }
/// <summary> Returns the next input Token whose term() is not a stop word.</summary> public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); // return the first non-stop word found int skippedPositions = 0; for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken)) { if (!stopWords.Contains(nextToken.TermBuffer(), 0, nextToken.TermLength())) { if (enablePositionIncrements) { nextToken.SetPositionIncrement(nextToken.GetPositionIncrement() + skippedPositions); } return(nextToken); } skippedPositions += nextToken.GetPositionIncrement(); } // reached EOS -- return null return(null); }
public override Token Next(Token result) { if (!done) { done = true; int upto = 0; result.Clear(); char[] buffer = result.TermBuffer(); while (true) { int length = input.Read(buffer, upto, buffer.Length - upto); if (length <= 0) break; upto += length; if (upto == buffer.Length) buffer = result.ResizeTermBuffer(1 + buffer.Length); } result.termLength = upto; return result; } return null; }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); if (!done) { done = true; int upto = 0; reusableToken.Clear(); char[] buffer = reusableToken.TermBuffer(); while (true) { int length = input.Read(buffer, upto, buffer.Length - upto); if (length <= 0) break; upto += length; if (upto == buffer.Length) buffer = reusableToken.ResizeTermBuffer(1 + buffer.Length); } reusableToken.SetTermLength(upto); return reusableToken; } return null; }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { char[] buffer = result.TermBuffer(); int length = result.TermLength(); // If no characters actually require rewriting then we // just return token as-is: for (int i = 0; i < length; i++) { char c = buffer[i]; if (c >= '\u00c0' && c <= '\u0178') { RemoveAccents(buffer, length); result.SetTermBuffer(output, 0, outputPos); break; } } return result; } else return null; }
/// <summary> Returns the next input Token whose termText() is not a stop word.</summary> public override Token Next(Token result) { // return the first non-stop word found int skippedPositions = 0; while ((result = input.Next(result)) != null) { if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength)) { if (enablePositionIncrements) { result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions); } return result; } skippedPositions += result.GetPositionIncrement(); } // reached EOS -- return null return null; }
public virtual void TestGrow() { Token t = new Token(); System.Text.StringBuilder buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { char[] content = buf.ToString().ToCharArray(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(buf.Length, t.TermLength()); Assert.AreEqual(buf.ToString(), t.Term); buf.Append(buf.ToString()); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // now as a string, first variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // now as a string, second variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(32768, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(32768, t.TermBuffer().Length); }
private void SetCurrentToken(Token token) { if (token == null) return; ClearAttributes(); _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength()); _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement()); _flagsAtt.SetFlags(token.GetFlags()); _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset()); _typeAtt.SetType(token.Type()); _payloadAtt.SetPayload(token.GetPayload()); }
public char[] TermBuffer() { return(delegate_Renamed.TermBuffer()); }
public virtual void TestGrow() { Token t = new Token(); System.Text.StringBuilder buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { char[] content = buf.ToString().ToCharArray(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(buf.Length, t.TermLength()); Assert.AreEqual(buf.ToString(), t.Term); buf.Append(buf.ToString()); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // now as a string, first variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // now as a string, second variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(20167, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(20167, t.TermBuffer().Length); }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); reusableToken.Clear(); int length = 0; int start = bufferIndex; char[] buffer = reusableToken.TermBuffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader)input).Read(ioBuffer) : input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length); if (dataLen <= 0) { if (length > 0) { break; } else { return(null); } } bufferIndex = 0; } char c = ioBuffer[bufferIndex++]; if (IsTokenChar(c)) { // if it's a token char if (length == 0) { // start of token start = offset + bufferIndex - 1; } else if (length == buffer.Length) { buffer = reusableToken.ResizeTermBuffer(1 + length); } buffer[length++] = Normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) { // buffer overflow! break; } } else if (length > 0) { // at non-Letter w/ chars break; // return 'em } } reusableToken.SetTermLength(length); reusableToken.SetStartOffset(start); reusableToken.SetEndOffset(start + length); return(reusableToken); }
public override Token Next(Token token) { token.Clear(); int length = 0; int start = bufferIndex; char[] buffer = token.TermBuffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input is Lucene.Net.Index.DocumentsWriter.ReusableStringReader ? ((Lucene.Net.Index.DocumentsWriter.ReusableStringReader)input).Read(ioBuffer) : input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length); if (dataLen <= 0) { if (length > 0) { break; } else { return(null); } } bufferIndex = 0; } char c = ioBuffer[bufferIndex++]; if (IsTokenChar(c)) { // if it's a token char if (length == 0) { // start of token start = offset + bufferIndex - 1; } else if (length == buffer.Length) { buffer = token.ResizeTermBuffer(1 + length); } buffer[length++] = Normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) { // buffer overflow! break; } } else if (length > 0) { // at non-Letter w/ chars break; // return 'em } } token.termLength = length; token.startOffset = start; token.endOffset = start + length; return(token); }
public virtual void TestCopyTo() { Token t = new Token(); Token copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t); Assert.AreEqual("", t.Term); Assert.AreEqual("", copy.Term); t = new Token(0, 5); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, 5); char[] buf = t.TermBuffer(); copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t); Assert.AreEqual(t.Term, copy.Term); Assert.AreNotSame(buf, copy.TermBuffer()); Payload pl = new Payload(new byte[]{1, 2, 3, 4}); t.Payload = pl; copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t); Assert.AreEqual(pl, copy.Payload); Assert.AreNotSame(pl, copy.Payload); }