Esempio n. 1
0
 public void Close()
 {
     lock (searcherLock)
     {
         EnsureOpen();
         closed = true;
         if (searcher != null)
         {
             searcher.Close();
         }
         searcher = null;
     }
 }
		// main search method
		private static IEnumerable<SampleData> _search(string searchQuery, string searchField = "") {
			// validation
			if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", ""))) return new List<SampleData>();

			// set up lucene searcher
			using (var searcher = new IndexSearcher(_directory, false)) {
				var hits_limit = 1000;
				var analyzer = new StandardAnalyzer(Version.LUCENE_29);

				// search by single field
				if (!string.IsNullOrEmpty(searchField)) {
					var parser = new QueryParser(Version.LUCENE_29, searchField, analyzer);
					var query = parseQuery(searchQuery, parser);
					var hits = searcher.Search(query, hits_limit).ScoreDocs;
					var results = _mapLuceneToDataList(hits, searcher);
					analyzer.Close();
					searcher.Close();
					searcher.Dispose();
					return results;
				}
				// search by multiple fields (ordered by RELEVANCE)
				else {
					var parser = new MultiFieldQueryParser
						(Version.LUCENE_29, new[] {"Id", "Name", "Description"}, analyzer);
					var query = parseQuery(searchQuery, parser);
					var hits = searcher.Search(query, null, hits_limit, Sort.INDEXORDER).ScoreDocs;
					var results = _mapLuceneToDataList(hits, searcher);
					analyzer.Close();
					searcher.Close();
					searcher.Dispose();
					return results;
				}
			}
		}
Esempio n. 3
0
        public static LuceneResult SearchBIMXchange(string field, string key, int pageSize, int pageNumber)
        {
            const string luceneIndexPath = "C:\\LuceneIndex";

            var directory = FSDirectory.Open(new DirectoryInfo(luceneIndexPath));

            var analyzer = new StandardAnalyzer(Version.LUCENE_29);

            var parser = new QueryParser(Version.LUCENE_29, field, analyzer);
            var query = parser.Parse(String.Format("{0}*", key));

            var searcher = new IndexSearcher(directory, true);

            var topDocs = searcher.Search(query, 1000000);

            var docs = new List<Document>();
            var start = (pageNumber-1)*pageSize;
            for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++)
            {
                var scoreDoc = topDocs.ScoreDocs[i];
                var docId = scoreDoc.doc;
                var doc = searcher.Doc(docId);
                docs.Add(doc);
            }

            searcher.Close();
            directory.Close();
            var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits};
            return result;
        }
		public virtual void  TestQuery()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
			AddDoc("one", iw);
			AddDoc("two", iw);
			AddDoc("three four", iw);
			iw.Close();
			
			IndexSearcher is_Renamed = new IndexSearcher(dir);
			Hits hits = is_Renamed.Search(new MatchAllDocsQuery());
			Assert.AreEqual(3, hits.Length());
			
			// some artificial queries to trigger the use of skipTo():
			
			BooleanQuery bq = new BooleanQuery();
			bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
			bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
			hits = is_Renamed.Search(bq);
			Assert.AreEqual(3, hits.Length());
			
			bq = new BooleanQuery();
			bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
			bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST);
			hits = is_Renamed.Search(bq);
			Assert.AreEqual(1, hits.Length());
			
			// delete a document:
			is_Renamed.GetIndexReader().DeleteDocument(0);
			hits = is_Renamed.Search(new MatchAllDocsQuery());
			Assert.AreEqual(2, hits.Length());
			
			is_Renamed.Close();
		}
Esempio n. 5
0
        private bool isInIndex(IndexableFileInfo fileInfo)
        {
            IndexSearcher searcher = new IndexSearcher(this.luceneIndexDir);

            try
            {
                BooleanQuery bq = new BooleanQuery();
                bq.Add(new TermQuery(new Term("filename", fileInfo.Filename)), BooleanClause.Occur.MUST);

                bq.Add(new TermQuery(new Term("LastModified", DateTools.DateToString(fileInfo.LastModified, DateTools.Resolution.SECOND))), BooleanClause.Occur.MUST);

                Hits hits  = searcher.Search(bq);
                int  count = hits.Length();

                if (count > 0)
                {
                    return(true);
                }
            }
            catch (Exception ex)
            {
                Console.Write(ex.Message);
            }
            finally
            {
                searcher.Close();
            }
            return(false);
        }
Esempio n. 6
0
        public void TestBooleanQuerySerialization()
        {
            Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery();

            lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Occur.MUST);

            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            bf.Serialize(ms, lucQuery);
            ms.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms);
            ms.Close();

            Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization");

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount = searcher.Search(lucQuery, 20).TotalHits;

            searcher.Close();
            searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount2 = searcher.Search(lucQuery2, 20).TotalHits;

            Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts");
        }
Esempio n. 7
0
        public void Initialize_Indexes_All_Nodes()
        {
            string elementIdForTestingSearch = _deepNodeFinder.GetNodesForIndexing()[0].Id;
            int expectedNumNodes = _deepNodeFinder.GetNodesForIndexing().Length;

            Assert.AreEqual("usfr-pte_NetCashFlowsProvidedUsedOperatingActivitiesDirectAbstract", elementIdForTestingSearch,
                            "TEST SANITY: element id for test search");
            Assert.AreEqual(1595, expectedNumNodes, "TEST SANITY: Number of nodes in found in the test taxonomy");

            IndexReader indexReader = IndexReader.Open(_indexMgr.LuceneDirectory_ForTesting);

            Assert.AreEqual(expectedNumNodes, indexReader.NumDocs(),
                            "An incorrect number of documents were found in the Lucene directory after initialization");

            IndexSearcher searcher = new IndexSearcher(_indexMgr.LuceneDirectory_ForTesting);
            try
            {
                Hits results =
                    searcher.Search(new TermQuery(new Term(LuceneNodeIndexer.ELEMENTID_FOR_DELETING_FIELD, elementIdForTestingSearch)));

                Assert.AreEqual(1, results.Length(), "Search results should only have 1 hit");
                Assert.AreEqual(elementIdForTestingSearch, results.Doc(0).Get(LuceneNodeIndexer.ELEMENTID_FIELD),
                                "Search results yielded the wrong element!");
            }
            finally
            {
                searcher.Close();
            }
        }
        public void HelloWorldTest()
        {
            Directory directory = new RAMDirectory();
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
            IndexWriter writer = new IndexWriter(directory,
                analyzer,
                IndexWriter.MaxFieldLength.UNLIMITED);

            Document doc = new Document();
            doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("postBody", "sample test", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);
            writer.Optimize();
            writer.Commit();
            writer.Close();

            QueryParser parser = new QueryParser(Version.LUCENE_29, "postBody", analyzer);
            Query query = parser.Parse("sample test");

            //Setup searcher
            IndexSearcher searcher = new IndexSearcher(directory, true);
            //Do the search
            var hits = searcher.Search(query, null, 10);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                var doc1 = hits.ScoreDocs[i];
            }

            searcher.Close();
            directory.Close();
        }
        public override List<ISearchEntity> GetSearchResult(out int MatchCount)
        {
            Analyzer analyzer = new StandardAnalyzer();

            IndexSearcher searcher = new IndexSearcher(searchInfo.ConfigElement.IndexDirectory);
            MultiFieldQueryParser parserName = new MultiFieldQueryParser(new string[] { "title", "content", "keywords" }, analyzer);

            Query queryName = parserName.Parse(searchInfo.QueryString);

            Hits hits = searcher.Search(queryName);

            List<ISearchEntity> ResultList = new List<ISearchEntity>();

            for (int i = 0; i < hits.Length(); i++)
            {
                Document doc = hits.Doc(i);

                ResultList.Add((ISearchEntity)new NewsModel()
                {
                    EntityIdentity = Convert.ToInt32(doc.Get("newsid")),
                    Title = Convert.ToString(doc.Get("title")),
                    Content = Convert.ToString(doc.Get("content")),
                    Keywords = doc.Get("keywords")
                });
            }
            searcher.Close();

            MatchCount = hits.Length();
            return ResultList;
        }
Esempio n. 10
0
        private void SwapSearcher(Directory dir)
        {
            /*
             * opening a searcher is possibly very expensive.
             * We rather close it again if the Spellchecker was closed during
             * this operation than block access to the current searcher while opening.
             */
            IndexSearcher indexSearcher = CreateSearcher(dir);

            lock (searcherLock)
            {
                if (closed)
                {
                    indexSearcher.Close();
                    throw new AlreadyClosedException("Spellchecker has been closed");
                }
                if (searcher != null)
                {
                    searcher.Close();
                }
                // set the spellindex in the sync block - ensure consistency.
                searcher        = indexSearcher;
                this.spellindex = dir;
            }
        }
Esempio n. 11
0
        public void TestBooleanQuerySerialization()
        {
            Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery();

            lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Occur.MUST);

            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            bf.Serialize(ms, lucQuery);
            ms.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms);
            ms.Close();

            Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization");

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount = searcher.Search(lucQuery, 20).TotalHits;

            searcher.Close();
            searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount2 = searcher.Search(lucQuery2, 20).TotalHits;

            Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts");
        }
		public virtual void  TestSorting()
		{
			Directory directory = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMaxBufferedDocs(2);
			writer.SetMergeFactor(1000);
			writer.AddDocument(Adoc(new System.String[]{"id", "a", "title", "ipod", "str_s", "a"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "b", "title", "ipod ipod", "str_s", "b"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "c", "title", "ipod ipod ipod", "str_s", "c"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "x", "title", "boosted", "str_s", "x"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "y", "title", "boosted boosted", "str_s", "y"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "z", "title", "boosted boosted boosted", "str_s", "z"}));
			
			IndexReader r = writer.GetReader();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(r);
			
			RunTest(searcher, true);
			RunTest(searcher, false);
			
			searcher.Close();
			r.Close();
			directory.Close();
		}
Esempio n. 13
0
        public SearchResults Find(string terms)
        {
            Directory directory = FSDirectory.GetDirectory("./index",false);
            // Now search the index:
            var isearcher = new IndexSearcher(directory);
            // Parse a simple query that searches for "text":
            //Query query = QueryParser.Parse("text", "fieldname", analyzer);
            var qp = new QueryParser("description", _analyzer);
            Query query = qp.Parse(terms);

            Hits hits = isearcher.Search(query);

            var sr = new SearchResults();

            // Iterate through the results:
            for (int i = 0; i < hits.Length(); i++)
            {
                Document hitDoc = hits.Doc(i);

                sr.Add(new Result() { Name = hitDoc.Get("name"), Description = hitDoc.Get("description") });
            }
            isearcher.Close();
            directory.Close();

            return sr;
        }
Esempio n. 14
0
        public static LuceneResult MultiSearchBIMXchange(Dictionary<string,string> terms, int pageSize, int pageNumber)
        {
            var directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex"));
            var booleanQuery = new BooleanQuery();
            foreach(var term in terms)
            {
                var query = new TermQuery(new Term(term.Key, term.Value));
                booleanQuery.Add(query,BooleanClause.Occur.MUST);
            }
            var searcher = new IndexSearcher(directory, true);

            var topDocs = searcher.Search(booleanQuery, 10);

            var docs = new List<Document>();
            var start = (pageNumber - 1) * pageSize;
            for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++)
            {
                var scoreDoc = topDocs.ScoreDocs[i];
                var docId = scoreDoc.doc;
                var doc = searcher.Doc(docId);
                docs.Add(doc);
            }

            searcher.Close();
            directory.Close();
            var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits};
            return result;
        }
Esempio n. 15
0
        public Data searchLucene(Data data)
        {
            Search_gl search = new Search_gl();
            List<string> item = new List<string>();
            Lucene.Net.Store.Directory directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\LuceneIndex"));
            var analyzer = new StandardAnalyzer(Version.LUCENE_29);

            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);

            //QueryParser queryParser = new QueryParser(Version.LUCENE_29, "summary", analyzer);  //search for single field
            MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] {"name", "summary"}, analyzer);  //search for multifield
            Query query = parser.Parse((data.getString("search")) + "*"); //cant search blank text with wildcard as first character

            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, collector);
            ScoreDoc[] hits = collector.TopDocs().ScoreDocs;
            int count = hits.Length;

            for (int i = 0; i < count; i++)
            {
                int docId = hits[i].doc;
                float score = hits[i].score;

                Document doc = searcher.Doc(docId);

                string id = doc.Get("id");
                item.Add(id);
            }
            Data list = search.search(data, item.ToArray());
            reader.Close();
            searcher.Close();

            return list;
        }
Esempio n. 16
0
        private void btnExecuteSearch_Click(object sender, EventArgs e)
        {
            Directory indexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(tempPath));
            IndexSearcher searcher = new IndexSearcher(indexDirectory, true); // read-only=true

            // TODO: QueryParser support for Hebrew terms (most concerning issue is with acronyms - mid-word quotes)
            QueryParser qp = new QueryParser("content", analyzer);
            qp.SetDefaultOperator(QueryParser.Operator.AND);
            Query query = qp.Parse(txbSearchQuery.Text);

            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;

            // Iterate through the results:
            BindingList<SearchResult> l = new BindingList<SearchResult>();
            for (int i = 0; i < hits.Length; i++)
            {
                Document hitDoc = searcher.Doc(hits[i].doc);
                SearchResult sr = new SearchResult(hitDoc.GetField("title").StringValue(),
                    hitDoc.GetField("path").StringValue(), hits[i].score);
                l.Add(sr);
            }

            searcher.Close();
            indexDirectory.Close();

            dgvResults.DataSource = l;
        }
 protected int getHitCount(String fieldName, String searchString)
 {
     IndexSearcher searcher = new IndexSearcher(directory, true); //4
       Term t = new Term(fieldName, searchString);
       Query query = new TermQuery(t); //5
       int hitCount = TestUtil.hitCount(searcher, query); //6
       searcher.Close();
       return hitCount;
 }
    protected void Page_Load(object sender, EventArgs e)
    {

        //if (Session["KeyWords"] == null ? false : true)
        //{
        //    Response.Redirect("Search.aspx");
        //}
        String text = Session["KeyWords"].ToString();
        ChineseAnalyzer analyzer = new ChineseAnalyzer();
        TokenStream ts = analyzer.TokenStream("ItemName", new System.IO.StringReader(text));
        Lucene.Net.Analysis.Token token;
        try
        {
            int n = 0;
            while ((token = ts.Next()) != null)
            {
                this.lbMsg.Text += (n++) + "->" + token.TermText() + " " + token.StartOffset() + " " + token.EndOffset() + " " + token.Type() + "<br>";
                 //   Response.Write((n++) + "->" + token.TermText() + " " + token.StartOffset() + " "
                 //+ token.EndOffset() + " " + token.Type() + "<br>");
            }
        }
        catch
        {
            this.lbMsg.Text = "wrong";
        } 

       // Analyzer analyzer = new StandardAnalyzer();
        Directory directory = FSDirectory.GetDirectory(Server.MapPath("/indexFile/"), false);

        IndexSearcher isearcher = new IndexSearcher(directory);

        Query query;
        query = QueryParser.Parse(Session["KeyWords"].ToString(), "ItemName", analyzer);
        //query = QueryParser.Parse("2", "nid", analyzer);
        Hits hits = isearcher.Search(query);
        this.lbMsg.Text += "<font color=red>共找到" + hits.Length() + "条记录</font><br>";
        //Response.Write("<font color=red>共找到" + hits.Length() + "条记录</font><br>");

        for (int i = 0; i < hits.Length(); i++)
        {

            Document hitDoc = hits.Doc(i);
            this.lbMsg.Text += "编号:" + hitDoc.Get("ItemID").ToString() + "<br>"
                + "分类:" + hitDoc.Get("CategoryName").ToString() + "<br>"
                + "专题:" + hitDoc.Get("ProductName").ToString() + "<br>"
                + "标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>";
            //Response.Write("编号:" + hitDoc.Get("ItemID").ToString() + "<br>");
            //Response.Write("分类:" + hitDoc.Get("CategoryName").ToString() + "<br>");
            //Response.Write("标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>");
            //Response.Write("专题:" + hitDoc.Get("ProductName").ToString() + "<br>");
        }
        isearcher.Close();
        directory.Close();
    }
Esempio n. 19
0
        public virtual ActionResult SearchIndex(string term)
        {
            //Setup indexer

            Directory directory = FSDirectory.GetDirectory("LuceneIndex", true);
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriter writer = new IndexWriter(directory, analyzer, true);

            IndexReader red = IndexReader.Open(directory);
            int totDocs = red.MaxDoc();
            red.Close();

            foreach (var ticket in _ticketRepo.GetTicketsByProject(CurrentProject, 0, 1000).Items)
            {
                AddListingToIndex(ticket, writer);
            }

            writer.Optimize();
            //Close the writer
            writer.Close();

            //Setup searcher
            IndexSearcher searcher = new IndexSearcher(directory);
            MultiFieldQueryParser parser = new MultiFieldQueryParser(
                                         new string[] {
                                             "summary", "keyName" },
                                         analyzer);

            Query query = parser.Parse(term);
            Hits hits = searcher.Search(query);

            var tickets = new List<Ticket>();

            for (int i = 0; i < hits.Length(); i++)
            {
                Document doc = hits.Doc(i);

                int id = 0;
                if (int.TryParse(doc.Get("id"), out id))
                {
                    tickets.Add(_ticketRepo.GetTicketById(id));
                }
            }

            //Clean up everything
            searcher.Close();
            directory.Close();

            return View(new SearchIndexModel()
            {
                Tickets = tickets
            });
        }
        public void TestMemLeakage()
        {
            CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = true;

            int LoopCount = 100;
            Analyzer[] analyzers = new Analyzer[LoopCount];
            RAMDirectory[] dirs = new RAMDirectory[LoopCount];
            IndexWriter[] indexWriters = new IndexWriter[LoopCount];

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      analyzers[i] = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT);
                                                                      dirs[i] = new RAMDirectory();
                                                                      indexWriters[i] = new IndexWriter(dirs[i], analyzers[i], true, IndexWriter.MaxFieldLength.UNLIMITED);
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      Document document = new Document();
                                                                      document.Add(new Field("field", "some test", Field.Store.NO, Field.Index.ANALYZED));
                                                                      indexWriters[i].AddDocument(document);
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      analyzers[i].Dispose();
                                                                      indexWriters[i].Dispose();
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      IndexSearcher searcher = new IndexSearcher(dirs[i]);
                                                                      TopDocs d = searcher.Search(new TermQuery(new Term("field", "test")), 10);
                                                                      searcher.Close();
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => dirs[i].Dispose());

            GC.Collect(GC.MaxGeneration);
            GC.WaitForPendingFinalizers();

            int aliveObjects = 0;
            foreach (WeakReference w in CloseableThreadLocalProfiler.Instances)
            {
                object o = w.Target;
                if (o != null) aliveObjects++;
            }

            CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = false;

            Assert.AreEqual(0, aliveObjects);
        }
		public static void  Main(System.String[] args)
		{
			try
			{
				Directory directory = new RAMDirectory();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(directory, analyzer, true);
				
				int MAX_DOCS = 225;
				
				for (int j = 0; j < MAX_DOCS; j++)
				{
					Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document();
					d.Add(new Field(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.TOKENIZED));
					d.Add(new Field(ID_FIELD, System.Convert.ToString(j), Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(d);
				}
				writer.Close();
				
				// try a search without OR
				Searcher searcher = new IndexSearcher(directory);
				Hits hits = null;
				
				Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
				
				Query query = parser.Parse(HIGH_PRIORITY);
				System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
				
				hits = searcher.Search(query);
				PrintHits(hits);
				
				searcher.Close();
				
				// try a new search with OR
				searcher = new IndexSearcher(directory);
				hits = null;
				
				parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
				
				query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
				System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
				
				hits = searcher.Search(query);
				PrintHits(hits);
				
				searcher.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
Esempio n. 22
0
		/// <summary>
		/// Method for retrieving a list of documents where the keyword is present
		/// </summary>
		/// <param Name="ObjectType">[not implemented] search only available for documents</param>
		/// <param Name="Keyword">The word being searched for</param>
		/// <param Name="Max">The maximum limit on results returned</param>
		/// <returns>A list of documentnames indexed by the id of the document</returns>
		public static Hashtable Search(Guid ObjectType, string Keyword, int Max) 
		{
			Hashtable results = new Hashtable();
			IndexSearcher searcher = new IndexSearcher(index.Indexer.IndexDirectory);
			Query query = QueryParser.Parse(Keyword, "Content", new StandardAnalyzer());
			Hits hits;
			
			// Sorting
			SortField[] sf = {new SortField("SortText")};
			try 
			{
				hits = searcher.Search(query, new Sort(sf));
				if (hits.Length() < Max)
					Max = hits.Length();

				for (int i=0;i<Max;i++) 
				{
					try 
					{
						results.Add(
							hits.Doc(i).Get("Id"), 
							hits.Doc(i).Get("Text"));
					} 
					catch 
					{
					}
				}

				searcher.Close();
			} 
			catch (Exception ee)
			{
				searcher.Close();
                throw ee;
			}

			return results;
			
		}
Esempio n. 23
0
		public static void  Main(System.String[] args)
		{
			try
			{
				Directory directory = new RAMDirectory();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(directory, analyzer, true);
				
				System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"};
				for (int j = 0; j < docs.Length; j++)
				{
					Document d = new Document();
					d.Add(Field.Text("contents", docs[j]));
					writer.AddDocument(d);
				}
				writer.Close();
				
				Searcher searcher = new IndexSearcher(directory);
				
				System.String[] queries = new System.String[]{"\"a c e\""};
				Hits hits = null;
				
				QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer);
				parser.SetPhraseSlop(4);
				for (int j = 0; j < queries.Length; j++)
				{
					Query query = parser.Parse(queries[j]);
					System.Console.Out.WriteLine("Query: " + query.ToString("contents"));
					
					//DateFilter filter =
					//  new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
					//DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
					//System.out.println(filter);
					
					hits = searcher.Search(query);
					
					System.Console.Out.WriteLine(hits.Length() + " total results");
					for (int i = 0; i < hits.Length() && i < 10; i++)
					{
						Document d = hits.Doc(i);
						System.Console.Out.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents"));
					}
				}
				searcher.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
Esempio n. 24
0
        static void Main(string[] args)
        {
            //Setup indexer

            Directory directory = FSDirectory.GetDirectory("LuceneIndex");
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriter writer = new IndexWriter(directory, analyzer);

            IndexReader red = IndexReader.Open(directory);
            int totDocs = red.MaxDoc();
            red.Close();

            //Add documents to the index
            string text = String.Empty;
            Console.WriteLine("Enter the text you want to add to the index:");
            Console.Write(">");
            int txts = totDocs;
            int j = 0;
            while ((text = Console.ReadLine()) != String.Empty)
            {
                AddTextToIndex(txts++, text, writer);
                j++;
                Console.Write(">");
            }

            writer.Optimize();
            //Close the writer
            writer.Flush();
            writer.Close();

            Console.WriteLine(j + " lines added, "+txts+" documents total");

            //Setup searcher
            IndexSearcher searcher = new IndexSearcher(directory);
            QueryParser parser = new QueryParser("postBody", analyzer);

            Console.WriteLine("Enter the search string:");
            Console.Write(">");

            while ((text = Console.ReadLine()) != String.Empty)
            {
                Search(text, searcher, parser);
                Console.Write(">");
            }

            //Clean up everything
            searcher.Close();
            directory.Close();
        }
Esempio n. 25
0
 private void CloseSearchers(IndexReader primary_reader,
                             LNS.IndexSearcher primary_searcher,
                             IndexReader secondary_reader,
                             LNS.IndexSearcher secondary_searcher)
 {
     primary_searcher.Close();
     if (secondary_searcher != null)
     {
         secondary_searcher.Close();
     }
     ReleaseReader(primary_reader);
     if (secondary_reader != null)
     {
         ReleaseReader(secondary_reader);
     }
 }
Esempio n. 26
0
		public virtual void  TestDeprecatedCstrctors()
		{
			Query query = new RangeQuery(null, new Term("content", "C"), false);
			InitializeIndex(new System.String[]{"A", "B", "C", "D"});
			IndexSearcher searcher = new IndexSearcher(dir);
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length, "A,B,C,D, only B in range");
			searcher.Close();
			
			query = new RangeQuery(new Term("content", "C"), null, false);
			InitializeIndex(new System.String[]{"A", "B", "C", "D"});
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "A,B,C,D, only B in range");
			searcher.Close();
		}
		// search methods
		public static IEnumerable<SampleData> GetAllIndexRecords() {
			// validate search index
			if (!System.IO.Directory.EnumerateFiles(_luceneDir).Any()) return new List<SampleData>();

			// set up lucene searcher
			var searcher = new IndexSearcher(_directory, false);
			var reader = IndexReader.Open(_directory, false);
			var docs = new List<Document>();
			var term = reader.TermDocs();
			while (term.Next()) docs.Add(searcher.Doc(term.Doc()));
			reader.Close();
			reader.Dispose();
			searcher.Close();
			searcher.Dispose();
			return _mapLuceneToDataList(docs);
		}
Esempio n. 28
0
        /***
         * Understands the lucene query syntax
         */
        public List <Utilities.Language.TextIndexing.IndexResult> GetDocumentsWithQuery(string query)
        {
            List <Utilities.Language.TextIndexing.IndexResult> fingerprints = new List <Utilities.Language.TextIndexing.IndexResult>();
            HashSet <string> fingerprints_already_seen = new HashSet <string>();

            try
            {
                using (Lucene.Net.Index.IndexReader index_reader = Lucene.Net.Index.IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true))
                {
                    using (Lucene.Net.Search.IndexSearcher index_searcher = new Lucene.Net.Search.IndexSearcher(index_reader))
                    {
                        Lucene.Net.QueryParsers.QueryParser query_parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_29, "content", analyzer);

                        Lucene.Net.Search.Query query_object = query_parser.Parse(query);
                        Lucene.Net.Search.Hits  hits         = index_searcher.Search(query_object);

                        var i = hits.Iterator();
                        while (i.MoveNext())
                        {
                            Lucene.Net.Search.Hit hit = (Lucene.Net.Search.Hit)i.Current;
                            string fingerprint        = hit.Get("fingerprint");
                            string page = hit.Get("page");

                            if (!fingerprints_already_seen.Contains(fingerprint))
                            {
                                fingerprints_already_seen.Add(fingerprint);

                                IndexResult index_result = new IndexResult {
                                    fingerprint = fingerprint, score = hit.GetScore()
                                };
                                fingerprints.Add(index_result);
                            }
                        }

                        // Close the index
                        index_searcher.Close();
                    }
                    index_reader.Close();
                }
            }
            catch (Exception ex)
            {
                Logging.Warn(ex, "GetDocumentsWithQuery: There was a problem opening the index file for searching.");
            }

            return(fingerprints);
        }
Esempio n. 29
0
        public TextSearchResult Search(string query, Tenant tenant)
        {
            var result = new TextSearchResult(module);

            if (string.IsNullOrEmpty(query) || !Directory.Exists(path))
            {
                return result;
            }

            var dir = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo(path));
            var searcher = new IndexSearcher(dir, false);
            try
            {
                var analyzer = new AnalyzersProvider().GetAnalyzer(tenant.GetCulture().TwoLetterISOLanguageName);
                var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Text", analyzer);
                parser.SetDefaultOperator(QueryParser.Operator.AND);
                if (TextIndexCfg.MaxQueryLength < query.Length)
                {
                    query = query.Substring(0, TextIndexCfg.MaxQueryLength);
                }
                Query q = null;
                try
                {
                    q = parser.Parse(query);
                }
                catch (Lucene.Net.QueryParsers.ParseException) { }
                if (q == null)
                {
                    q = parser.Parse(QueryParser.Escape(query));
                }

#pragma warning disable 618
                var hits = searcher.Search(q);
#pragma warning restore 618
                for (int i = 0; i < hits.Length(); i++)
                {
                    var doc = hits.Doc(i);
                    result.AddIdentifier(doc.Get("Id"));
                }
            }
            finally
            {
                searcher.Close();
                dir.Close();
            }
            return result;
        }
Esempio n. 30
0
        /// <summary>
        /// 打开索引
        /// </summary>
        private void OpenIndex()
        {
            //如果不存在索引则创建空白索引
            if (!File.Exists(Directorys.IndexDirectory + "segments.gen"))
            {
                IndexWriter empty = new IndexWriter(Directorys.IndexDirectory, new ThesaurusAnalyzer(), true);
                empty.Optimize();
                empty.Close();
            }

            //如果索引器已经创建则先关闭索引器
            if (searcher != null)
            {
                searcher.Close();
            }
            searcher = new Lucene.Net.Search.IndexSearcher(Directorys.IndexDirectory);
        }
Esempio n. 31
0
        } // constructor

        public int getNumDocsInIndex()
        {
            try
            {
                IndexSearcher searcher = new IndexSearcher(luceneIndexDir);
                try
                {
                    return(searcher.MaxDoc());
                }
                finally
                {
                    searcher.Close();
                }
            }
            catch
            { }
            return(-1);
        }
		public virtual void  TestMultiValuedNRQ()
		{
			System.Random rnd = NewRandom();
			
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED);
			
			//DecimalFormat format = new DecimalFormat("00000000000", new System.Globalization.CultureInfo("en-US").NumberFormat);
			
			for (int l = 0; l < 5000; l++)
			{
				Document doc = new Document();
				for (int m = 0, c = rnd.Next(10); m <= c; m++)
				{
					int value_Renamed = rnd.Next(System.Int32.MaxValue);
                    doc.Add(new Field("asc", value_Renamed.ToString().PadLeft(11, '0'), Field.Store.NO, Field.Index.NOT_ANALYZED));
					doc.Add(new NumericField("trie", Field.Store.NO, true).SetIntValue(value_Renamed));
				}
				writer.AddDocument(doc);
			}
			writer.Close();
			
			Searcher searcher = new IndexSearcher(directory, true);
			for (int i = 0; i < 50; i++)
			{
				int lower = rnd.Next(System.Int32.MaxValue);
				int upper = rnd.Next(System.Int32.MaxValue);
				if (lower > upper)
				{
					int a = lower; lower = upper; upper = a;
				}
				TermRangeQuery cq = new TermRangeQuery("asc", lower.ToString().PadLeft(11, '0'),  upper.ToString().PadLeft(11, '0'), true, true);
				System.Int32 tempAux = (System.Int32) lower;
				System.Int32 tempAux2 = (System.Int32) upper;
				NumericRangeQuery tq = NumericRangeQuery.NewIntRange("trie", tempAux, tempAux2, true, true);
				TopDocs trTopDocs = searcher.Search(cq, 1);
				TopDocs nrTopDocs = searcher.Search(tq, 1);
				Assert.AreEqual(trTopDocs.totalHits, nrTopDocs.totalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal");
			}
			searcher.Close();
			
			directory.Close();
		}
Esempio n. 33
0
        public static void Main(String[] args)
        {
            if (args.Length != 2)
            {
                Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>");
                return;
            }

            var directory = FSDirectory.Open(new DirectoryInfo(args[0]));
            var searcher = new IndexSearcher(directory, true);

            String query = args[1];
            const string field = "contents";

            Query q = Expand(query, searcher, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), field, 0.9f);
            System.Console.Out.WriteLine("Query: " + q.ToString(field));

            searcher.Close();
            directory.Close();
        }
        public override List<ISearchEntity> GetSearchResult(out int MatchCount)
        {
            Analyzer analyzer = new StandardAnalyzer();

            IndexSearcher searcher = new IndexSearcher(searchInfo.ConfigElement.IndexDirectory);
            MultiFieldQueryParser parserName = new MultiFieldQueryParser(new string[] { "productname", "keywords", "description" }, analyzer);

            Query queryName = parserName.Parse(searchInfo.QueryString);
            Query queryCategory = new WildcardQuery(new Term("catepath", "*" + searchInfo.Category + "*"));

            BooleanQuery bQuery = new BooleanQuery();
            bQuery.Add(queryName, BooleanClause.Occur.MUST);
            if (searchInfo.Category != 0) bQuery.Add(queryCategory, BooleanClause.Occur.MUST);

            Hits hits = searcher.Search(bQuery, GetSort());

            List<ISearchEntity> ResultList = new List<ISearchEntity>();

            for (int i = 0; i < hits.Length(); i++)
            {
                Document doc = hits.Doc(i);

                ResultList.Add((ISearchEntity)new ProductModel()
                {
                    EntityIdentity = Convert.ToInt32(doc.Get("productid")),
                    ProductName = doc.Get("productname"),
                    CategoryID = Convert.ToInt32(doc.Get("cateid")),
                    CategoryPath = doc.Get("catepath"),
                    Keywords = doc.Get("keywords"),
                    Description = doc.Get("description"),
                    Price = Convert.ToDecimal(doc.Get("price")),
                    CreateTime = Convert.ToDateTime(doc.Get("createtime")),
                    UpdateTime = Convert.ToDateTime(doc.Get("updatetime")),
                    ProductImage = Convert.ToString(doc.Get("mainimage"))
                });
            }
            searcher.Close();

            MatchCount = hits.Length();
            return ResultList;
        }
Esempio n. 35
0
		public virtual void  TestExclusive()
		{
			Query query = new TermRangeQuery("content", "A", "C", false, false);
			InitializeIndex(new System.String[]{"A", "B", "C", "D"});
			IndexSearcher searcher = new IndexSearcher(dir);
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "A,B,C,D, only B in range");
			searcher.Close();
			
			InitializeIndex(new System.String[]{"A", "B", "D"});
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "A,B,D, only B in range");
			searcher.Close();
			
			AddDoc("C");
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "C added, still only B in range");
			searcher.Close();
		}
Esempio n. 36
0
        public void Test_Store_RAMDirectory()
        {
            Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory();

            //Index 1 Doc
            Lucene.Net.Index.IndexWriter  wr  = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));
            wr.AddDocument(doc);
            wr.Dispose();

            //now serialize it
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();
            serializer.Serialize(memoryStream, ramDIR);

            //Close DIR
            ramDIR.Close();
            ramDIR = null;

            //now deserialize
            memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream);

            //Add 1 more doc
            wr  = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
            doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));
            wr.AddDocument(doc);
            wr.Dispose();

            //Search
            Lucene.Net.Search.IndexSearcher     s       = new Lucene.Net.Search.IndexSearcher(ramDIR2);
            Lucene.Net.QueryParsers.QueryParser qp      = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_CURRENT, "field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT));
            Lucene.Net.Search.Query             q       = qp.Parse("value1");
            Lucene.Net.Search.TopDocs           topDocs = s.Search(q, 100);
            s.Close();

            Assert.AreEqual(topDocs.TotalHits, 2, "See the issue: LUCENENET-174");
        }
Esempio n. 37
0
    protected void SearchButton_Click(object sender, EventArgs e)
    {
        if (!string.IsNullOrEmpty(SearchTextBox.Text))
        {
            Lucene.Net.Store.RAMDirectory ramDir = new Lucene.Net.Store.RAMDirectory(luceneDBPath);

            String srch = SearchTextBox.Text;
            Lucene.Net.Search.IndexSearcher idx = new Lucene.Net.Search.IndexSearcher(ramDir);
            Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("_searchtxt", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.Operator.AND);
            Lucene.Net.Search.BooleanQuery.SetMaxClauseCount(100);

            Lucene.Net.Search.Hits hits = idx.Search(qp.Parse(srch));

            List<int> aIds = new List<int>();
            for (int i = 0; i < hits.Length(); i++)
            {
                Lucene.Net.Documents.Document doc = hits.Doc(i);
                int aid = 0;
                if (int.TryParse(doc.Get("id"), out aid))
                {
                    aIds.Add(aid);
                }
            }

            using (DataClassesDataContext dtx = new DataClassesDataContext())
            {
                var arts = from a in dtx.Articles
                           where aIds.Contains(a.Id)
                           select a;

                DataList1.DataSource = arts;
                DataList1.DataBind();
            }

            idx.Close();

            GC.Collect();
        }
    }
        public void TestCustomExternalQuery()
        {
            QueryParser qp   = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr);
            String      qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
            Query       q1   = qp.Parse(qtxt);

            Query q = new CustomExternalQuery(q1);

            Log(q);

            IndexSearcher s    = new IndexSearcher(dir);
            TopDocs       hits = s.Search(q, 1000);

            Assert.AreEqual(N_DOCS, hits.TotalHits);
            for (int i = 0; i < N_DOCS; i++)
            {
                int   doc   = hits.ScoreDocs[i].Doc;
                float score = hits.ScoreDocs[i].Score;
                Assert.AreEqual(score, (float)1 + (4 * doc) % N_DOCS, 0.0001, "doc=" + doc);
            }
            s.Close();
        }
Esempio n. 39
0
		public static void  Main(System.String[] args)
		{
			if (args.Length != 2)
			{
				System.Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>");
                return;
			}
			
			FSDirectory directory = FSDirectory.GetDirectory(args[0], false);
			IndexSearcher searcher = new IndexSearcher(directory);
			
			System.String query = args[1];
			System.String field = "contents";
			
			Query q = Expand(query, searcher, new StandardAnalyzer(), field, 0.9f);
			System.Console.Out.WriteLine("Query: " + q.ToString(field));
			
			
			
			searcher.Close();
			directory.Close();
		}
Esempio n. 40
0
		public virtual void  TestInclusive()
		{
			Query query = new TermRangeQuery("content", "A", "C", true, true);
			
			InitializeIndex(new System.String[]{"A", "B", "C", "D"});
			IndexSearcher searcher = new IndexSearcher(dir, true);
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(3, hits.Length, "A,B,C,D - A,B,C in range");
			searcher.Close();
			
			InitializeIndex(new System.String[]{"A", "B", "D"});
			searcher = new IndexSearcher(dir, true);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length, "A,B,D - A and B in range");
			searcher.Close();
			
			AddDoc("C");
			searcher = new IndexSearcher(dir, true);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(3, hits.Length, "C added - A, B, C in range");
			searcher.Close();
		}
Esempio n. 41
0
 public void TearDown()
 {
     reader.Close();
     searcher.Close();
     directory.Close();
 }
Esempio n. 42
0
 public override void  TearDown()
 {
     base.TearDown();
     searcher.Close();
     directory.Close();
 }
Esempio n. 43
0
        public virtual void  TestPhraseQueryInConjunctionScorer()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory);

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term("source", "marketing"));
            phraseQuery.Add(new Term("source", "info"));
            ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(phraseQuery, searcher);


            TermQuery    termQuery    = new TermQuery(new Term("contents", "foobar"));
            BooleanQuery booleanQuery = new BooleanQuery();

            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            QueryUtils.Check(termQuery, searcher);


            searcher.Close();

            writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            doc    = new Document();
            doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(directory);

            termQuery   = new TermQuery(new Term("contents", "woo"));
            phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("contents", "map"));
            phraseQuery.Add(new Term("contents", "entry"));

            hits = searcher.Search(termQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);


            booleanQuery = new BooleanQuery();
            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(booleanQuery, searcher);


            searcher.Close();
            directory.Close();
        }
Esempio n. 44
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String test1 = "eating chocolate in a computer lab";                                             //6 terms
            System.String test2 = "computer in a computer lab";                                                     //5 terms
            System.String test3 = "a chocolate lab grows old";                                                      //5 terms
            System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new MockRAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1, null);
                writer.AddDocument(testDoc2, null);
                writer.AddDocument(testDoc3, null);
                writer.AddDocument(testDoc4, null);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir, true, null);
                TermEnum      termEnum      = knownSearcher.reader_ForNUnit.Terms(null);
                TermDocs      termDocs      = knownSearcher.reader_ForNUnit.TermDocs(null);
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.Similarity;
                while (termEnum.Next(null) == true)
                {
                    Term term = termEnum.Term;
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term, null);
                    while (termDocs.Next(null))
                    {
                        int docId = termDocs.Doc;
                        int freq  = termDocs.Freq;
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        ITermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field", null);
                        float           tf     = sim.Tf(freq);
                        float           idf    = sim.Idf(knownSearcher.DocFreq(term, null), knownSearcher.MaxDoc);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text.Equals(vTerms[i]))
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query      query = new TermQuery(new Term("field", "chocolate"));
                ScoreDoc[] hits  = knownSearcher.Search(query, null, 1000, null).ScoreDocs;
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length == 3);
                float score = hits[0].Score;

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(hits[0].Doc == 2);
                Assert.IsTrue(hits[1].Doc == 3);
                Assert.IsTrue(hits[2].Doc == 0);
                ITermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, "field", null);
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32 freqInt = -1;
                    try
                    {
                        freqInt = (System.Int32)test4Map[term];
                    }
                    catch (Exception)
                    {
                        Assert.IsTrue(false);
                    }
                    Assert.IsTrue(freqInt == freq);
                }
                SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, mapper, null);
                var vectorEntrySet = mapper.TermVectorEntrySet;
                Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10);
                TermVectorEntry last = null;
                foreach (TermVectorEntry tve in vectorEntrySet)
                {
                    if (tve != null && last != null)
                    {
                        Assert.IsTrue(last.Frequency >= tve.Frequency, "terms are not properly sorted");
                        System.Int32 expectedFreq = (System.Int32)test4Map[tve.Term];
                        //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields
                        Assert.IsTrue(tve.Frequency == 2 * expectedFreq, "Frequency is not correct:");
                    }
                    last = tve;
                }

                FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, fieldMapper, null);
                var map = fieldMapper.FieldToTerms;
                Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2);
                vectorEntrySet = map["field"];
                Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be");
                Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10);
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Esempio n. 45
0
        public virtual void  TestBasic()
        {
            Directory   dir      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.MergeFactor = 2;
            writer.SetMaxBufferedDocs(2);
            writer.SetSimilarity(new SimpleSimilarity());


            System.Text.StringBuilder sb   = new System.Text.StringBuilder(265);
            System.String             term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document d = new Document();
                sb.Append(term).Append(" ");
                System.String content = sb.ToString();
                Field         noTf    = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
                noTf.OmitTermFreqAndPositions = true;
                d.Add(noTf);

                Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
                d.Add(tf);

                writer.AddDocument(d);
                //System.out.println(d);
            }

            writer.Optimize();
            // flush
            writer.Close();
            _TestUtil.CheckIndex(dir);

            /*
             * Verify the index
             */
            Searcher searcher = new IndexSearcher(dir, true);

            searcher.Similarity = new SimpleSimilarity();

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d2 = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d2);


            searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
            //System.out.println(CountingHitCollector.getCount());



            searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
            //System.out.println(CountingHitCollector.getCount());



            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
            Assert.IsTrue(15 == CountingHitCollector.GetCount());

            searcher.Close();
            dir.Close();
        }
        private void  TestNormalization(int nDocs, System.String message)
        {
            Query query = new TermQuery(new Term("contents", "doc0"));

            RAMDirectory  ramDirectory1;
            IndexSearcher indexSearcher1;

            ScoreDoc[] hits;

            ramDirectory1 = new MockRAMDirectory();

            // First put the documents in the same index
            InitIndex(ramDirectory1, nDocs, true, null);             // documents with a single token "doc0", "doc1", etc...
            InitIndex(ramDirectory1, nDocs, false, "x");             // documents with two tokens "doc0" and "x", "doc1" and x, etc...

            indexSearcher1 = new IndexSearcher(ramDirectory1);
            indexSearcher1.SetDefaultFieldSortScoring(true, true);

            hits = indexSearcher1.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(2, hits.Length, message);

            // Store the scores for use later
            float[] scores = new float[] { hits[0].score, hits[1].score };

            Assert.IsTrue(scores[0] > scores[1], message);

            indexSearcher1.Close();
            ramDirectory1.Close();
            hits = null;



            RAMDirectory  ramDirectory2;
            IndexSearcher indexSearcher2;

            ramDirectory1 = new MockRAMDirectory();
            ramDirectory2 = new MockRAMDirectory();

            // Now put the documents in a different index
            InitIndex(ramDirectory1, nDocs, true, null);            // documents with a single token "doc0", "doc1", etc...
            InitIndex(ramDirectory2, nDocs, true, "x");             // documents with two tokens "doc0" and "x", "doc1" and x, etc...

            indexSearcher1 = new IndexSearcher(ramDirectory1);
            indexSearcher1.SetDefaultFieldSortScoring(true, true);
            indexSearcher2 = new IndexSearcher(ramDirectory2);
            indexSearcher2.SetDefaultFieldSortScoring(true, true);

            Searcher searcher = GetMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });

            hits = searcher.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(2, hits.Length, message);

            // The scores should be the same (within reason)
            Assert.AreEqual(scores[0], hits[0].score, 1e-6, message);             // This will a document from ramDirectory1
            Assert.AreEqual(scores[1], hits[1].score, 1e-6, message);             // This will a document from ramDirectory2



            // Adding a Sort.RELEVANCE object should not change anything
            hits = searcher.Search(query, null, 1000, Sort.RELEVANCE).ScoreDocs;

            Assert.AreEqual(2, hits.Length, message);

            Assert.AreEqual(scores[0], hits[0].score, 1e-6, message);             // This will a document from ramDirectory1
            Assert.AreEqual(scores[1], hits[1].score, 1e-6, message);             // This will a document from ramDirectory2

            searcher.Close();

            ramDirectory1.Close();
            ramDirectory2.Close();
        }
Esempio n. 47
0
        public virtual void  TestQuery()
        {
            RAMDirectory dir = new RAMDirectory();
            IndexWriter  iw  = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(2);             // force multi-segment
            AddDoc("one", iw, 1f);
            AddDoc("two", iw, 20f);
            AddDoc("three four", iw, 300f);
            iw.Close();

            IndexReader   ir         = IndexReader.Open(dir);
            IndexSearcher is_Renamed = new IndexSearcher(ir);

            ScoreDoc[] hits;

            // assert with norms scoring turned off

            hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "one");
            Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "two");
            Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "three four");

            // assert with norms scoring turned on

            MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key");

            hits = is_Renamed.Search(normsQuery, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);

            Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "three four");
            Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "two");
            Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "one");

            // change norm & retest
            ir.SetNorm(0, "key", 400f);
            normsQuery = new MatchAllDocsQuery("key");
            hits       = is_Renamed.Search(normsQuery, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);

            Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "one");
            Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "three four");
            Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "two");

            // some artificial queries to trigger the use of skipTo():

            BooleanQuery bq = new BooleanQuery();

            bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
            bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
            hits = is_Renamed.Search(bq, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);

            bq = new BooleanQuery();
            bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
            bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST);
            hits = is_Renamed.Search(bq, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);

            // delete a document:
            is_Renamed.GetIndexReader().DeleteDocument(0);
            hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);

            // test parsable toString()
            QueryParser qp = new QueryParser("key", analyzer);

            hits = is_Renamed.Search(qp.Parse(new MatchAllDocsQuery().ToString()), null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);

            // test parsable toString() with non default boost
            Query maq = new MatchAllDocsQuery();

            maq.SetBoost(2.3f);
            Query pq = qp.Parse(maq.ToString());

            hits = is_Renamed.Search(pq, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);

            is_Renamed.Close();
            ir.Close();
            dir.Close();
        }