public virtual void TestStopList() { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); // {{Aroush how can we copy 'stopWordsSet' to 'System.String[]'? System.String[] arrStopWordsSet = new System.String[3]; arrStopWordsSet[0] = "good"; arrStopWordsSet[1] = "test"; arrStopWordsSet[2] = "analyzer"; // Aroush}} StopAnalyzer newStop = new StopAnalyzer(arrStopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsTrue(stream != null); Token token = null; try { while ((token = stream.Next()) != null) { System.String text = token.TermText(); Assert.IsTrue(stopWordsSet.Contains(text) == false); } } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
private Analyzer GetAnalyer(int type) { Analyzer analyzer; string path =System.Configuration.ConfigurationSettings.AppSettings["Data"].ToString(); switch (type) { case 0: analyzer=new StockFooAnalyzer(path); break; case 1: analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 2: analyzer = new SimpleAnalyzer(); break; case 3: analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 4: analyzer = new KeywordAnalyzer(); break; case 5: analyzer = new WhitespaceAnalyzer(); break; default: analyzer = new StockFooAnalyzer(path); break; } return analyzer; }
public override void SetUp() { base.SetUp(); stop = new StopAnalyzer(); inValidTokens = new System.Collections.Hashtable(); for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.Length; i++) { inValidTokens.Add(StopAnalyzer.ENGLISH_STOP_WORDS[i], StopAnalyzer.ENGLISH_STOP_WORDS[i]); } }
public void Code() { Analyzer _keywordanalyzer = new KeywordAnalyzer(); Analyzer _simpleanalyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); Analyzer _stopanalyzer = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer); _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer); _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer); IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED); IndexReader _reader = _writer.GetReader(); IndexSearcher _searcher = new IndexSearcher(_reader); //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer); string[] fields = new[] { "text", "title", "author" }; var boosts = new Dictionary <string, float>(); boosts.Add("text", 2.0f); boosts.Add("title", 1.5f); QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts); Query query = parser.Parse("lucene is great"); TopDocs hits = _searcher.Search(query, 1000); IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc)); var books = docs.Select(doc => new Book() { Text = doc.Get("text"), Title = doc.Get("title"), Author = doc.Get("author"), Length = Int32.Parse(doc.Get("length")) }); _writer.Optimize(); _writer.Commit(); _writer.DeleteAll(); }
public virtual void TestStopList() { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); Token token = null; while ((token = stream.Next()) != null) { System.String text = token.TermText(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(1, token.GetPositionIncrement()); // by default stop tokenizer does not apply increments. } }
public virtual void TestStopList() { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.AddAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { System.String text = termAtt.Term(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(1, posIncrAtt.GetPositionIncrement()); // by default stop tokenizer does not apply increments. } }
public virtual void TestStopList() { var stopWordsSet = Support.Compatibility.SetFactory.GetSet<string>(); stopWordsSet.Add("good"); stopWordsSet.Add("test"); stopWordsSet.Add("analyzer"); StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>(); IPositionIncrementAttribute posIncrAtt = stream.AddAttribute<IPositionIncrementAttribute>(); while (stream.IncrementToken()) { System.String text = termAtt.Term; Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(1, posIncrAtt.PositionIncrement); // in 2.4 stop tokenizer does not apply increments. } }
private void InitBlock(StopAnalyzer enclosingInstance) { this.enclosingInstance = enclosingInstance; }
public virtual void TestPhraseQueryWithStopAnalyzer() { RAMDirectory directory = new RAMDirectory(); StopAnalyzer stopAnalyzer = new StopAnalyzer(); IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); // valid exact phrase query PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "stop")); query.Add(new Term("field", "words")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); QueryUtils.Check(query, searcher); // currently StopAnalyzer does not leave "holes", so this matches. query = new PhraseQuery(); query.Add(new Term("field", "words")); query.Add(new Term("field", "here")); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); QueryUtils.Check(query, searcher); searcher.Close(); }
public virtual void TestStopListPositions() { bool defaultEnable = StopFilter.GetEnablePositionIncrementsDefault(); StopFilter.SetEnablePositionIncrementsDefault(true); try { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer with positions"); int[] expectedIncr = new int[]{1, 1, 1, 3, 1, 1, 1, 2, 1}; TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); Token token = null; int i = 0; while ((token = stream.Next()) != null) { System.String text = token.TermText(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(expectedIncr[i++], token.GetPositionIncrement()); } } finally { StopFilter.SetEnablePositionIncrementsDefault(defaultEnable); } }
public SavedStreams(StopAnalyzer enclosingInstance) { InitBlock(enclosingInstance); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(); StopFilter.SetEnablePositionIncrementsDefault(true); QueryParser qp = new QueryParser("field", analyzer); qp.SetEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
/// <summary> /// Returns an Analyzer for the given AnalyzerType /// </summary> /// <param name="oAnalyzerType">Enumeration value</param> /// <returns>Analyzer</returns> public static Analyzer GetAnalyzer(AnalyzerType oAnalyzerType) { Analyzer oAnalyzer = null; switch (oAnalyzerType) { case AnalyzerType.SimpleAnalyzer: oAnalyzer = new SimpleAnalyzer(); break; case AnalyzerType.StopAnalyzer: oAnalyzer = new StopAnalyzer(); break; case AnalyzerType.WhitespaceAnalyzer: oAnalyzer = new WhitespaceAnalyzer(); break; default: case AnalyzerType.StandardAnalyzer: oAnalyzer = new StandardAnalyzer(); break; } return oAnalyzer; }
public virtual void TestPhraseQueryWithStopAnalyzer() { RAMDirectory directory = new RAMDirectory(); StopAnalyzer stopAnalyzer = new StopAnalyzer(Util.Version.LUCENE_24); IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); // valid exact phrase query PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "stop")); query.Add(new Term("field", "words")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(query, searcher); // StopAnalyzer as of 2.4 does not leave "holes", so this matches. query = new PhraseQuery(); query.Add(new Term("field", "words")); query.Add(new Term("field", "here")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(query, searcher); searcher.Close(); }
public virtual void TestStop() { Analyzer a = new StopAnalyzer(Version.LUCENE_CURRENT); AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "foo", "bar"}); AssertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new System.String[]{"foo", "bar", "foo", "bar"}); }
public virtual void TestStop() { Analyzer a = new StopAnalyzer(_TestUtil.CurrentVersion); AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[] { "foo", "bar", "foo", "bar" }); AssertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new System.String[] { "foo", "bar", "foo", "bar" }); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(Util.Version.LUCENE_CURRENT); QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer); qp.EnablePositionIncrements = true; PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
public virtual void TestStopListPositions() { var stopWordsSet = Support.Compatibility.SetFactory.GetSet<string>(); stopWordsSet.Add("good"); stopWordsSet.Add("test"); stopWordsSet.Add("analyzer"); var newStop = new StopAnalyzer(Version.LUCENE_CURRENT, stopWordsSet); var reader = new System.IO.StringReader("This is a good test of the english stop analyzer with positions"); int[] expectedIncr = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; TokenStream stream = newStop.TokenStream("test", reader); Assert.NotNull(stream); int i = 0; ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>(); IPositionIncrementAttribute posIncrAtt = stream.AddAttribute<IPositionIncrementAttribute>(); while (stream.IncrementToken()) { string text = termAtt.Term; Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(expectedIncr[i++], posIncrAtt.PositionIncrement); } }