Esempio n. 1
0
		public virtual void  TestStopList()
		{
			System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
			stopWordsSet.Add("good", "good");
			stopWordsSet.Add("test", "test");
			stopWordsSet.Add("analyzer", "analyzer");

            // {{Aroush  how can we copy 'stopWordsSet' to 'System.String[]'?
            System.String[] arrStopWordsSet = new System.String[3];
            arrStopWordsSet[0] = "good";
            arrStopWordsSet[1] = "test";
            arrStopWordsSet[2] = "analyzer";
            // Aroush}}

			StopAnalyzer newStop = new StopAnalyzer(arrStopWordsSet);
			System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
			TokenStream stream = newStop.TokenStream("test", reader);
			Assert.IsTrue(stream != null);
			Token token = null;
			try
			{
				while ((token = stream.Next()) != null)
				{
					System.String text = token.TermText();
					Assert.IsTrue(stopWordsSet.Contains(text) == false);
				}
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
Esempio n. 2
0
 private Analyzer GetAnalyer(int type)
 {
     Analyzer analyzer;
     string path =System.Configuration.ConfigurationSettings.AppSettings["Data"].ToString();
     switch (type)
     {
         case 0:
             analyzer=new StockFooAnalyzer(path);
             break;
        case 1:
             analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
             break;
       case 2:
             analyzer = new SimpleAnalyzer();
             break;
       case 3:
             analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
             break;
      case 4:
             analyzer = new KeywordAnalyzer();
             break;
       case 5:
             analyzer = new WhitespaceAnalyzer();
             break;
      default:
             analyzer = new StockFooAnalyzer(path);
             break;
     }
     return analyzer;
 }
		public override void  SetUp()
		{
			base.SetUp();
			stop = new StopAnalyzer();
			inValidTokens = new System.Collections.Hashtable();

			for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.Length; i++)
			{
				inValidTokens.Add(StopAnalyzer.ENGLISH_STOP_WORDS[i], StopAnalyzer.ENGLISH_STOP_WORDS[i]);
			}
		}
Esempio n. 4
0
        public void Code()
        {
            Analyzer _keywordanalyzer    = new KeywordAnalyzer();
            Analyzer _simpleanalyzer     = new Lucene.Net.Analysis.SimpleAnalyzer();
            Analyzer _stopanalyzer       = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            Analyzer _standardanalyzer   = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);


            var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer);

            _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer);
            _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer);


            IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            IndexReader _reader = _writer.GetReader();

            IndexSearcher _searcher = new IndexSearcher(_reader);


            //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer);

            string[] fields = new[] { "text", "title", "author" };
            var      boosts = new Dictionary <string, float>();

            boosts.Add("text", 2.0f);
            boosts.Add("title", 1.5f);
            QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts);
            Query       query  = parser.Parse("lucene is great");


            TopDocs hits = _searcher.Search(query, 1000);

            IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc));

            var books = docs.Select(doc => new Book()
            {
                Text   = doc.Get("text"),
                Title  = doc.Get("title"),
                Author = doc.Get("author"),
                Length = Int32.Parse(doc.Get("length"))
            });


            _writer.Optimize();
            _writer.Commit();
            _writer.DeleteAll();
        }
		public virtual void  TestStopList()
		{
			System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
			stopWordsSet.Add("good", "good");
			stopWordsSet.Add("test", "test");
			stopWordsSet.Add("analyzer", "analyzer");
			StopAnalyzer newStop = new StopAnalyzer(stopWordsSet);
			System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
			TokenStream stream = newStop.TokenStream("test", reader);
			Assert.IsNotNull(stream);
			Token token = null;
			while ((token = stream.Next()) != null)
			{
				System.String text = token.TermText();
				Assert.IsFalse(stopWordsSet.Contains(text));
				Assert.AreEqual(1, token.GetPositionIncrement()); // by default stop tokenizer does not apply increments.
			}
		}
Esempio n. 6
0
		public virtual void  TestStopList()
		{
			System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
			stopWordsSet.Add("good", "good");
			stopWordsSet.Add("test", "test");
			stopWordsSet.Add("analyzer", "analyzer");
			StopAnalyzer newStop = new StopAnalyzer(stopWordsSet);
			System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
			TokenStream stream = newStop.TokenStream("test", reader);
			Assert.IsNotNull(stream);
			TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute));
			PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.AddAttribute(typeof(PositionIncrementAttribute));
			
			while (stream.IncrementToken())
			{
				System.String text = termAtt.Term();
				Assert.IsFalse(stopWordsSet.Contains(text));
				Assert.AreEqual(1, posIncrAtt.GetPositionIncrement()); // by default stop tokenizer does not apply increments.
			}
		}
Esempio n. 7
0
		public virtual void  TestStopList()
		{
			var stopWordsSet = Support.Compatibility.SetFactory.GetSet<string>();
			stopWordsSet.Add("good");
			stopWordsSet.Add("test");
			stopWordsSet.Add("analyzer");
			StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
			System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
			TokenStream stream = newStop.TokenStream("test", reader);
			Assert.IsNotNull(stream);
            ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>();
            IPositionIncrementAttribute posIncrAtt = stream.AddAttribute<IPositionIncrementAttribute>();
			
			while (stream.IncrementToken())
			{
				System.String text = termAtt.Term;
				Assert.IsFalse(stopWordsSet.Contains(text));
                Assert.AreEqual(1, posIncrAtt.PositionIncrement); // in 2.4 stop tokenizer does not apply increments.
			}
		}
Esempio n. 8
0
 private void InitBlock(StopAnalyzer enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
		public virtual void  TestPhraseQueryWithStopAnalyzer()
		{
			RAMDirectory directory = new RAMDirectory();
			StopAnalyzer stopAnalyzer = new StopAnalyzer();
			IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true);
			Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(directory);
			
			// valid exact phrase query
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("field", "stop"));
			query.Add(new Term("field", "words"));
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			QueryUtils.Check(query, searcher);

			
			// currently StopAnalyzer does not leave "holes", so this matches.
			query = new PhraseQuery();
			query.Add(new Term("field", "words"));
			query.Add(new Term("field", "here"));
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			QueryUtils.Check(query, searcher);

			
			searcher.Close();
		}
		public virtual void  TestStopListPositions()
		{
			bool defaultEnable = StopFilter.GetEnablePositionIncrementsDefault();
			StopFilter.SetEnablePositionIncrementsDefault(true);
			try
			{
				System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
				stopWordsSet.Add("good", "good");
				stopWordsSet.Add("test", "test");
				stopWordsSet.Add("analyzer", "analyzer");
				StopAnalyzer newStop = new StopAnalyzer(stopWordsSet);
				System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer with positions");
				int[] expectedIncr = new int[]{1, 1, 1, 3, 1, 1, 1, 2, 1};
				TokenStream stream = newStop.TokenStream("test", reader);
				Assert.IsNotNull(stream);
				Token token = null;
				int i = 0;
				while ((token = stream.Next()) != null)
				{
					System.String text = token.TermText();
					Assert.IsFalse(stopWordsSet.Contains(text));
					Assert.AreEqual(expectedIncr[i++], token.GetPositionIncrement());
				}
			}
			finally
			{
				StopFilter.SetEnablePositionIncrementsDefault(defaultEnable);
			}
		}
Esempio n. 11
0
 public SavedStreams(StopAnalyzer enclosingInstance)
 {
     InitBlock(enclosingInstance);
 }
		public virtual void  TestToString()
		{
			StopAnalyzer analyzer = new StopAnalyzer();
			StopFilter.SetEnablePositionIncrementsDefault(true);
			QueryParser qp = new QueryParser("field", analyzer);
			qp.SetEnablePositionIncrements(true);
			PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\"");
			Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString());
			q.Add(new Term("field", "hello"), 1);
			Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString());
		}
Esempio n. 13
0
 private void  InitBlock(StopAnalyzer enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
Esempio n. 14
0
 /// <summary>
 /// Returns an Analyzer for the given AnalyzerType
 /// </summary>
 /// <param name="oAnalyzerType">Enumeration value</param>
 /// <returns>Analyzer</returns>
 public static Analyzer GetAnalyzer(AnalyzerType oAnalyzerType)
 {
     Analyzer oAnalyzer = null;
     switch (oAnalyzerType)
     {
         case AnalyzerType.SimpleAnalyzer:
             oAnalyzer = new SimpleAnalyzer();
             break;
         case AnalyzerType.StopAnalyzer:
             oAnalyzer = new StopAnalyzer();
             break;
         case AnalyzerType.WhitespaceAnalyzer:
             oAnalyzer = new WhitespaceAnalyzer();
             break;
         default:
         case AnalyzerType.StandardAnalyzer:
             oAnalyzer = new StandardAnalyzer();
             break;
     }
     return oAnalyzer;
 }
Esempio n. 15
0
 public SavedStreams(StopAnalyzer enclosingInstance)
 {
     InitBlock(enclosingInstance);
 }
Esempio n. 16
0
		public virtual void  TestPhraseQueryWithStopAnalyzer()
		{
			RAMDirectory directory = new RAMDirectory();
			StopAnalyzer stopAnalyzer = new StopAnalyzer(Util.Version.LUCENE_24);
			IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			Document doc = new Document();
			doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			writer.Close();

		    IndexSearcher searcher = new IndexSearcher(directory, true);
			
			// valid exact phrase query
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("field", "stop"));
			query.Add(new Term("field", "words"));
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			QueryUtils.Check(query, searcher);
			
			
			// StopAnalyzer as of 2.4 does not leave "holes", so this matches.
			query = new PhraseQuery();
			query.Add(new Term("field", "words"));
			query.Add(new Term("field", "here"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			QueryUtils.Check(query, searcher);
			
			
			searcher.Close();
		}
Esempio n. 17
0
		public virtual void  TestStop()
		{
			Analyzer a = new StopAnalyzer(Version.LUCENE_CURRENT);
			AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "foo", "bar"});
			AssertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new System.String[]{"foo", "bar", "foo", "bar"});
		}
Esempio n. 18
0
 public virtual void TestStop()
 {
     Analyzer a = new StopAnalyzer(_TestUtil.CurrentVersion);
     AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[] { "foo", "bar", "foo", "bar" });
     AssertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new System.String[] { "foo", "bar", "foo", "bar" });
 }
Esempio n. 19
0
		public virtual void  TestToString()
		{
			StopAnalyzer analyzer = new StopAnalyzer(Util.Version.LUCENE_CURRENT);
			QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer);
			qp.EnablePositionIncrements = true;
			PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\"");
			Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString());
			q.Add(new Term("field", "hello"), 1);
			Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString());
		}
Esempio n. 20
0
		public virtual void  TestStopListPositions()
        {
            var stopWordsSet = Support.Compatibility.SetFactory.GetSet<string>();
            stopWordsSet.Add("good");
            stopWordsSet.Add("test");
            stopWordsSet.Add("analyzer");
            var newStop = new StopAnalyzer(Version.LUCENE_CURRENT, stopWordsSet);
            var reader = new System.IO.StringReader("This is a good test of the english stop analyzer with positions");
            int[] expectedIncr =                   { 1,   1, 1,          3, 1,  1,      1,            2,   1};
            TokenStream stream = newStop.TokenStream("test", reader);
            Assert.NotNull(stream);
            int i = 0;
            ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>();
            IPositionIncrementAttribute posIncrAtt = stream.AddAttribute<IPositionIncrementAttribute>();

            while (stream.IncrementToken())
            {
                string text = termAtt.Term;
                Assert.IsFalse(stopWordsSet.Contains(text));
                Assert.AreEqual(expectedIncr[i++], posIncrAtt.PositionIncrement);
            }
        }