public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(t.Term, "hello"); t.SetTermBuffer("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(t.Term, "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.Term, "hello3"); char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.Term, "hollo3"); }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); Token nextToken = input.Next(reusableToken); if (nextToken != null) { char[] buffer = nextToken.TermBuffer(); int length = nextToken.TermLength(); // If no characters actually require rewriting then we // just return token as-is: for (int i = 0; i < length; i++) { char c = buffer[i]; if (c >= '\u00c0' && c <= '\ufb06') { RemoveAccents(buffer, length); nextToken.SetTermBuffer(output, 0, outputPos); break; } } return(nextToken); } else { return(null); } }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { char[] buffer = result.TermBuffer(); int length = result.TermLength(); // If no characters actually require rewriting then we // just return token as-is: for (int i = 0; i < length; i++) { char c = buffer[i]; if (c >= '\u00c0' && c <= '\u0178') { RemoveAccents(buffer, length); result.SetTermBuffer(output, 0, outputPos); break; } } return(result); } else { return(null); } }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermText(), "hello"); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(new System.String(t.TermBuffer(), 0, 5), "hello"); t.SetTermText("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(new System.String(t.TermBuffer(), 0, 6), "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.TermText(), "hello3"); // Make sure if we get the buffer and change a character // that termText() reflects the change char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.TermText(), "hollo3"); }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermText(), "hello"); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(t.Term(), "hello"); t.SetTermText("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(t.Term(), "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.TermText(), "hello3"); // Make sure if we get the buffer and change a character // that termText() reflects the change char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.TermText(), "hollo3"); }
/// <summary> Returns the next input Token whose term() is the right len</summary> public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); // return the first non-stop word found for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken)) { int len = nextToken.TermLength(); if (len >= min && len <= max) { return(nextToken); } // note: else we ignore it but should we index each part of it? } // reached EOS -- return null return(null); }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); Token nextToken = input.Next(reusableToken); if (nextToken == null) { return(null); } if (stemmer.Stem(nextToken.TermBuffer(), 0, nextToken.TermLength())) { nextToken.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength()); } return(nextToken); }
public override Token Next(Token reusableToken) { Token nextToken = input.Next(reusableToken); if (nextToken != null) { char[] buffer = nextToken.TermBuffer(); int length = nextToken.TermLength(); for (int i = 0; i < length; i++) { buffer[i] = System.Char.ToLower(buffer[i]); } return(nextToken); } else { return(null); } }
/// <summary> Returns the next input Token whose term() is not a stop word.</summary> public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); // return the first non-stop word found int skippedPositions = 0; for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken)) { if (!stopWords.Contains(nextToken.TermBuffer(), 0, nextToken.TermLength())) { if (enablePositionIncrements) { nextToken.SetPositionIncrement(nextToken.GetPositionIncrement() + skippedPositions); } return(nextToken); } skippedPositions += nextToken.GetPositionIncrement(); } // reached EOS -- return null return(null); }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { char[] buffer = result.TermBuffer(); int length = result.TermLength(); // If no characters actually require rewriting then we // just return token as-is: for (int i = 0; i < length; i++) { char c = buffer[i]; if (c >= '\u00c0' && c <= '\u0178') { RemoveAccents(buffer, length); result.SetTermBuffer(output, 0, outputPos); break; } } return result; } else return null; }
public virtual void TestGrow() { Token t = new Token(); System.Text.StringBuilder buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { char[] content = buf.ToString().ToCharArray(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(buf.Length, t.TermLength()); Assert.AreEqual(buf.ToString(), t.Term); buf.Append(buf.ToString()); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // now as a string, first variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // now as a string, second variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(32768, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(32768, t.TermBuffer().Length); }
public int TermLength() { return(delegate_Renamed.TermLength()); }
public virtual void TestGrow() { Token t = new Token(); System.Text.StringBuilder buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { char[] content = buf.ToString().ToCharArray(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(buf.Length, t.TermLength()); Assert.AreEqual(buf.ToString(), t.Term); buf.Append(buf.ToString()); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // now as a string, first variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // now as a string, second variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(20167, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(20167, t.TermBuffer().Length); }
private void SetCurrentToken(Token token) { if (token == null) return; ClearAttributes(); _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength()); _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement()); _flagsAtt.SetFlags(token.GetFlags()); _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset()); _typeAtt.SetType(token.Type()); _payloadAtt.SetPayload(token.GetPayload()); }