private string GetTokenizerText(string text) { StringBuilder result = new StringBuilder(); MyAnalyzer ma = new MyAnalyzer(AnalyzerFactory.stopWords); Lucene.Net.Analysis.TokenStream ts = ma.TokenStream("", new System.IO.StringReader(text)); Lucene.Net.Analysis.Token token; while ((token = ts.Next()) != null) { int len = token.TermLength(); char[] buff = token.TermBuffer(); if (len == 1) { if (buff[0] != MyFilter.Separator) { result.Append(buff, 0, 1); } } else { result.Append(buff, 0, len); } } ts.Close(); return(result.ToString()); }
private int GetTokenizerLength(string text) { int result = 0; MyAnalyzer ma = new MyAnalyzer(AnalyzerFactory.stopWords); Lucene.Net.Analysis.TokenStream ts = ma.TokenStream("", new System.IO.StringReader(text)); Lucene.Net.Analysis.Token token; while ((token = ts.Next()) != null) { int len = token.TermLength(); if (len == 1) { char[] buff = token.TermBuffer(); if (buff[0] != MyFilter.Separator) { result++; } } else { result += len; } } ts.Close(); return(result); }
/// <summary> /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// </summary> /// <filterpriority>2</filterpriority> public void Dispose() { if (source != null) { source.Close(); } if (result != null) { result.Close(); } }
public virtual void AssertAnalyzesTo(Analyzer a, System.String input, System.String[] output) { TokenStream ts = a.TokenStream("dummy", new System.IO.StringReader(input)); for (int i = 0; i < output.Length; i++) { Token t = ts.Next(); Assert.IsNotNull(t); Assert.AreEqual(t.TermText(), output[i]); } Assert.IsNull(ts.Next()); ts.Close(); }
public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int? finalOffset) { Assert.IsNotNull(output); ICheckClearAttributesAttribute checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>(); Assert.IsTrue(ts.HasAttribute<ITermAttribute>(), "has no TermAttribute"); ITermAttribute termAtt = ts.GetAttribute<ITermAttribute>(); IOffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute"); offsetAtt = ts.GetAttribute<IOffsetAttribute>(); } ITypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute"); typeAtt = ts.GetAttribute<ITypeAttribute>(); } IPositionIncrementAttribute posIncrAtt = null; if (posIncrements != null) { Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute"); posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>(); } ts.Reset(); for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetTermBuffer("bogusTerm"); if (offsetAtt != null) offsetAtt.SetOffset(14584724, 24683243); if (typeAtt != null) typeAtt.Type = "bogusType"; if (posIncrAtt != null) posIncrAtt.PositionIncrement = 45987657; checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.Term, "term " + i); if (startOffsets != null) Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i); if (endOffsets != null) Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i); if (types != null) Assert.AreEqual(types[i], typeAtt.Type, "type " + i); if (posIncrements != null) Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i); } Assert.IsFalse(ts.IncrementToken(), "end of stream"); ts.End(); if (finalOffset.HasValue) Assert.AreEqual(finalOffset, offsetAtt.EndOffset, "finalOffset "); ts.Close(); }
public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int?finalOffset) { Assert.IsNotNull(output); CheckClearAttributesAttribute checkClearAtt = (CheckClearAttributesAttribute)ts.AddAttribute(typeof(CheckClearAttributesAttribute)); Assert.IsTrue(ts.HasAttribute(typeof(TermAttribute)), "has no TermAttribute"); TermAttribute termAtt = (TermAttribute)ts.GetAttribute(typeof(TermAttribute)); OffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute(typeof(OffsetAttribute)), "has no OffsetAttribute"); offsetAtt = (OffsetAttribute)ts.GetAttribute(typeof(OffsetAttribute)); } TypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute(typeof(TypeAttribute)), "has no TypeAttribute"); typeAtt = (TypeAttribute)ts.GetAttribute(typeof(TypeAttribute)); } PositionIncrementAttribute posIncrAtt = null; if (posIncrements != null) { Assert.IsTrue(ts.HasAttribute(typeof(PositionIncrementAttribute)), "has no PositionIncrementAttribute"); posIncrAtt = (PositionIncrementAttribute)ts.GetAttribute(typeof(PositionIncrementAttribute)); } ts.Reset(); for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetTermBuffer("bogusTerm"); if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.SetType("bogusType"); } if (posIncrAtt != null) { posIncrAtt.SetPositionIncrement(45987657); } checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.Term(), "term " + i); if (startOffsets != null) { Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i); } if (endOffsets != null) { Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i); } if (types != null) { Assert.AreEqual(types[i], typeAtt.Type(), "type " + i); } if (posIncrements != null) { Assert.AreEqual(posIncrements[i], posIncrAtt.GetPositionIncrement(), "posIncrement " + i); } } Assert.IsFalse(ts.IncrementToken(), "end of stream"); ts.End(); if (finalOffset.HasValue) { Assert.AreEqual(finalOffset, offsetAtt.EndOffset(), "finalOffset "); } ts.Close(); }
/// <summary>Close the input TokenStream. </summary> public override void Close() { input.Close(); }