public ActionResult Search(string query) { ViewData["Message"] = "query : " + query; var searcher = new IndexSearcher( new Lucene.Net.Store.SimpleFSDirectory(new DirectoryInfo(Configuration.IndexDirectory)), readOnly: true); var fieldsToSearchIn = new[] {Configuration.Fields.Name, Configuration.Fields.Description}; var queryanalizer = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fieldsToSearchIn, new BrazilianAnalyzer()); var numberOfResults = 10; var top10Results = searcher.Search(queryanalizer.Parse(query), numberOfResults); var docs = new List<DocumentViewModel>(); foreach (var scoreDoc in top10Results.scoreDocs) { var document = searcher.Doc(scoreDoc.doc); var name = document.GetField(Configuration.Fields.Name).StringValue(); var description = document.GetField(Configuration.Fields.Description).StringValue(); var link = document.GetField(Configuration.Fields.Link).StringValue(); docs.Add(new DocumentViewModel(name, description, link)); } return View(new SearchViewModel(docs)); }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory); // Create a Sort object. reverse is set to true. // problem occurs only with SortField.AUTO: Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true)); QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort).scoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].doc); System.String text = document.Get(TEXT_FIELD); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder)); }
public static LuceneResult SearchBIMXchange(string field, string key, int pageSize, int pageNumber) { const string luceneIndexPath = "C:\\LuceneIndex"; var directory = FSDirectory.Open(new DirectoryInfo(luceneIndexPath)); var analyzer = new StandardAnalyzer(Version.LUCENE_29); var parser = new QueryParser(Version.LUCENE_29, field, analyzer); var query = parser.Parse(String.Format("{0}*", key)); var searcher = new IndexSearcher(directory, true); var topDocs = searcher.Search(query, 1000000); var docs = new List<Document>(); var start = (pageNumber-1)*pageSize; for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++) { var scoreDoc = topDocs.ScoreDocs[i]; var docId = scoreDoc.doc; var doc = searcher.Doc(docId); docs.Add(doc); } searcher.Close(); directory.Close(); var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits}; return result; }
public SearchResults Find(string terms) { Directory directory = FSDirectory.GetDirectory("./index",false); // Now search the index: var isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": //Query query = QueryParser.Parse("text", "fieldname", analyzer); var qp = new QueryParser("description", _analyzer); Query query = qp.Parse(terms); Hits hits = isearcher.Search(query); var sr = new SearchResults(); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); sr.Add(new Result() { Name = hitDoc.Get("name"), Description = hitDoc.Get("description") }); } isearcher.Close(); directory.Close(); return sr; }
public static void Main(System.String[] args) { /* System.Runtime.Remoting.RemotingConfiguration.Configure("Lucene.Net.Search.RemoteSearchable.config"); System.Runtime.Remoting.Channels.ChannelServices.RegisterChannel(new System.Runtime.Remoting.Channels.Http.HttpChannel(1099));*/ System.String indexName = null; if (args != null && args.Length == 1) indexName = args[0]; if (indexName == null) { System.Console.Out.WriteLine("Usage: Lucene.Net.search.RemoteSearchable <index>"); return ; } // create and install a security manager if (true) // if (System_Renamed.getSecurityManager() == null) // {{Aroush-1.4.3}} Do we need this line?! { // System_Renamed.setSecurityManager(new RMISecurityManager()); // {{Aroush-1.4.3}} Do we need this line?! } Lucene.Net.Search.Searchable local = new IndexSearcher(indexName); RemoteSearchable impl = new RemoteSearchable(local); // bind the implementation to "Searchable" System.Runtime.Remoting.RemotingServices.Marshal(impl, "tcp://localhost:1099/Searchable"); System.Console.ReadLine(); }
public Task<SearchResultCollection> Search(string search) { return System.Threading.Tasks.Task.Run(() => { var src = new SearchResultCollection(); if (string.IsNullOrWhiteSpace(search)) return src; try { var parser = new QueryParser(Version.LUCENE_30,"All", analyzer); Query q = new TermQuery(new Term("All", search)); using (var indexSearcher = new IndexSearcher(directory, true)) { Query query = parser.Parse(search); TopDocs result = indexSearcher.Search(query, 50); foreach (ScoreDoc h in result.ScoreDocs) { Document doc = indexSearcher.Doc(h.Doc); string id = doc.Get("id"); BaseContent value; if (LookupTable.TryGetValue(id, out value)) src.Add(new SearchResult {Relevance = h.Score, Content = value}); } } } catch (Exception e) { Logger.Log("DataServer","Error lucene search",e.Message,Logger.Level.Error); } return src; }); }
public static LuceneResult MultiSearchBIMXchange(Dictionary<string,string> terms, int pageSize, int pageNumber) { var directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex")); var booleanQuery = new BooleanQuery(); foreach(var term in terms) { var query = new TermQuery(new Term(term.Key, term.Value)); booleanQuery.Add(query,BooleanClause.Occur.MUST); } var searcher = new IndexSearcher(directory, true); var topDocs = searcher.Search(booleanQuery, 10); var docs = new List<Document>(); var start = (pageNumber - 1) * pageSize; for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++) { var scoreDoc = topDocs.ScoreDocs[i]; var docId = scoreDoc.doc; var doc = searcher.Doc(docId); docs.Add(doc); } searcher.Close(); directory.Close(); var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits}; return result; }
public virtual void TestSorting() { Directory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(1000); writer.AddDocument(Adoc(new System.String[]{"id", "a", "title", "ipod", "str_s", "a"})); writer.AddDocument(Adoc(new System.String[]{"id", "b", "title", "ipod ipod", "str_s", "b"})); writer.AddDocument(Adoc(new System.String[]{"id", "c", "title", "ipod ipod ipod", "str_s", "c"})); writer.AddDocument(Adoc(new System.String[]{"id", "x", "title", "boosted", "str_s", "x"})); writer.AddDocument(Adoc(new System.String[]{"id", "y", "title", "boosted boosted", "str_s", "y"})); writer.AddDocument(Adoc(new System.String[]{"id", "z", "title", "boosted boosted boosted", "str_s", "z"})); IndexReader r = writer.GetReader(); writer.Close(); IndexSearcher searcher = new IndexSearcher(r); RunTest(searcher, true); RunTest(searcher, false); searcher.Close(); r.Close(); directory.Close(); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); AddDoc("one", iw); AddDoc("two", iw); AddDoc("three four", iw); iw.Close(); IndexSearcher is_Renamed = new IndexSearcher(dir); Hits hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(3, hits.Length()); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(3, hits.Length()); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(1, hits.Length()); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(2, hits.Length()); is_Renamed.Close(); }
/// <summary> /// Set up a new index in RAM with three test phrases and the supplied Analyzer. /// </summary> /// <exception cref="Exception"> if an error occurs with index writer or searcher </exception> public override void SetUp() { base.SetUp(); analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), 2); directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc; doc = new Document(); doc.Add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "just another test sentence", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "a sentence which contains no test", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); reader = DirectoryReader.Open(directory); searcher = NewSearcher(reader); }
public static void Finish() { s.IndexReader.Dispose(); s = null; Dir.Dispose(); Dir = null; }
// Called by one node once it has reopened, to notify all // other nodes. this is just a mock (since it goes and // directly updates all other nodes, in RAM)... in a real // env this would hit the wire, sending version & // collection stats to all other nodes: internal virtual void BroadcastNodeReopen(int nodeID, long version, IndexSearcher newSearcher) { if (VERBOSE) { Console.WriteLine("REOPEN: nodeID=" + nodeID + " version=" + version + " maxDoc=" + newSearcher.IndexReader.MaxDoc); } // Broadcast new collection stats for this node to all // other nodes: foreach (string field in FieldsToShare) { CollectionStatistics stats = newSearcher.CollectionStatistics(field); foreach (NodeState node in Nodes) { // Don't put my own collection stats into the cache; // we pull locally: if (node.MyNodeID != nodeID) { node.CollectionStatsCache[new FieldAndShardVersion(nodeID, version, field)] = stats; } } } foreach (NodeState node in Nodes) { node.UpdateNodeVersion(nodeID, version); } }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0) { termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0) { termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory, true); }
public void Initialize_Indexes_All_Nodes() { string elementIdForTestingSearch = _deepNodeFinder.GetNodesForIndexing()[0].Id; int expectedNumNodes = _deepNodeFinder.GetNodesForIndexing().Length; Assert.AreEqual("usfr-pte_NetCashFlowsProvidedUsedOperatingActivitiesDirectAbstract", elementIdForTestingSearch, "TEST SANITY: element id for test search"); Assert.AreEqual(1595, expectedNumNodes, "TEST SANITY: Number of nodes in found in the test taxonomy"); IndexReader indexReader = IndexReader.Open(_indexMgr.LuceneDirectory_ForTesting); Assert.AreEqual(expectedNumNodes, indexReader.NumDocs(), "An incorrect number of documents were found in the Lucene directory after initialization"); IndexSearcher searcher = new IndexSearcher(_indexMgr.LuceneDirectory_ForTesting); try { Hits results = searcher.Search(new TermQuery(new Term(LuceneNodeIndexer.ELEMENTID_FOR_DELETING_FIELD, elementIdForTestingSearch))); Assert.AreEqual(1, results.Length(), "Search results should only have 1 hit"); Assert.AreEqual(elementIdForTestingSearch, results.Doc(0).Get(LuceneNodeIndexer.ELEMENTID_FIELD), "Search results yielded the wrong element!"); } finally { searcher.Close(); } }
public void SearchFiltered(IndexWriter writer, Directory directory, Filter filter, bool optimize) { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); doc.Add(new Field(FIELD, i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } if (optimize) writer.Optimize(); writer.Close(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term(FIELD, "36")), Occur.SHOULD); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc[] hits = indexSearcher.Search(booleanQuery, filter, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public void Dispose() { if (!_disposed) { if (_index != null) { _index.Dispose(); _index = null; } if (_analyzer != null) { _analyzer.Dispose(); _analyzer = null; } if (_directory != null) { _directory.Dispose(); _directory = null; } _disposed = true; } }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
public override void TearDown() { searcher.Close(); searcher = null; GC.Collect(); base.TearDown(); }
public virtual void TestTermWithoutWildcard() { RAMDirectory indexStore = GetIndexStore("field", new System.String[] {"nowildcard", "nowildcardx"}); IndexSearcher searcher = new IndexSearcher(indexStore, true); MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard")); AssertMatches(searcher, wq, 1); wq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; wq.Boost = 0.1f; Query q = searcher.Rewrite(wq); Assert.IsTrue(q is TermQuery); Assert.AreEqual(q.Boost, wq.Boost); wq.RewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; wq.Boost = 0.2f; q = searcher.Rewrite(wq); Assert.True(q is ConstantScoreQuery); Assert.AreEqual(q.Boost, wq.Boost); wq.RewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; wq.Boost = 0.3F; q = searcher.Rewrite(wq); Assert.True(q is ConstantScoreQuery); Assert.AreEqual(q.Boost, wq.Boost); wq.RewriteMethod = MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; wq.Boost = 0.4F; q = searcher.Rewrite(wq); Assert.True(q is ConstantScoreQuery); Assert.AreEqual(q.Boost, wq.Boost); }
public void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public Data searchLucene(Data data) { Search_gl search = new Search_gl(); List<string> item = new List<string>(); Lucene.Net.Store.Directory directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\LuceneIndex")); var analyzer = new StandardAnalyzer(Version.LUCENE_29); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //QueryParser queryParser = new QueryParser(Version.LUCENE_29, "summary", analyzer); //search for single field MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] {"name", "summary"}, analyzer); //search for multifield Query query = parser.Parse((data.getString("search")) + "*"); //cant search blank text with wildcard as first character TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; int count = hits.Length; for (int i = 0; i < count; i++) { int docId = hits[i].doc; float score = hits[i].score; Document doc = searcher.Doc(docId); string id = doc.Get("id"); item.Add(id); } Data list = search.search(data, item.ToArray()); reader.Close(); searcher.Close(); return list; }
public ProjectData[] Search(string searchTerm) { IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory); IntegralCollector searcherCollector = new IntegralCollector(); // Setup the fields to search through string[] searchfields = new string[] { "name", "vessel" }; // Build our booleanquery that will be a combination of all the queries for each individual search term var finalQuery = new BooleanQuery(); var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, searchfields, analyzer); // Split the search string into separate search terms by word string[] terms = searchTerm.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); foreach (string term in terms) finalQuery.Add(parser.Parse(term.Replace("~", "") + "~"),Occur.SHOULD); searcher.Search(finalQuery, searcherCollector); var results = new ProjectData[searcherCollector.Docs.Count]; for (int i = 0; i < searcherCollector.Docs.Count; i++) { var doc = searcher.Doc(searcherCollector.Docs[i]); results[i] = new ProjectData(doc.Get("name"), doc.Get("vessel")); } return results; }
public ISearchResult Search(string query) { var timer = new Stopwatch(); timer.Start(); var directory = FSDirectory.Open(new DirectoryInfo(path)); var analyzer = new StandardAnalyzer(Version.LUCENE_29); var searcher = new IndexSearcher(directory, true); var queryParser = new QueryParser(Version.LUCENE_29, "text", analyzer); var result = searcher.Search(queryParser.Parse(query), 20); var docs = (from scoreDoc in result.scoreDocs let doc = searcher.Doc(scoreDoc.doc) let fields = new Dictionary<string, string> { { "title", doc.Get("title") }, { "text", doc.Get("text") } } select new LuceneDocument { Id = scoreDoc.doc.ToString(), Fields = fields }).ToList(); var ret = new SearchResult { Query = query, Total = result.totalHits, Documents = docs, Source = Name }; searcher.Close(); directory.Close(); timer.Stop(); ret.Duration = (decimal) timer.Elapsed.TotalSeconds; return ret; }
void ResetSearcher() { var start = DateTime.Now; searcherLock.EnterWriteLock(); try { if (lastReset > start) return; lastReset = DateTime.Now; var newSearcher = new IndexSearcher( MMapDirectory.GetDirectory(this.path, false) ); var oldSearcher = this.searcher; this.searcher = newSearcher; if (oldSearcher != null) { oldSearcher.Close(); } } finally { searcherLock.ExitWriteLock(); } }
public void MrsJones() { using (var dir = new RAMDirectory()) using (var analyzer = new LowerCaseKeywordAnalyzer()) { using (var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { var document = new Lucene.Net.Documents.Document(); document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); writer.AddDocument(document); } var searcher = new IndexSearcher(dir, true); var termEnum = searcher.IndexReader.Terms(); while (termEnum.Next()) { var buffer = termEnum.Term.Text; Console.WriteLine(buffer); } var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); var query = queryParser.Parse("Name:\"MRS. S*\""); Console.WriteLine(query); var result = searcher.Search(query, 10); Assert.NotEqual(0, result.TotalHits); } }
public ArrayList getNotesMatchingTitle(string search) { ArrayList snotes = new ArrayList (); try { QueryParser parser = new QueryParser ("title", analyzer); string lucsearch = search + "*^4" + " content:" + search + "*"; Query query = parser.Parse (lucsearch); IndexSearcher searcher = new IndexSearcher (lucIdx); Hits hits = searcher.Search (query); int results = hits.Length (); Console.WriteLine ("Found {0} results", results); for (int i = 0; i < results; i++) { Document doc = hits.Doc (i); //float score = hits.Score (i); snotes.Add (new Note (doc.Get ("title"), doc.Get ("lastmod"))); } } catch (Exception e) { Console.WriteLine ("ERROR Search: " + e.Message); } return snotes; }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.OmitNorms = true; Field field = NewField("field", "", customType); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override Weight CreateWeight(IndexSearcher searcher) { return(new PhraseWeight(this, searcher)); }
public virtual void TestRangeQueryRand() { // NOTE: uses index build in *super* setUp IndexReader reader = signedIndexReader; IndexSearcher search = NewSearcher(reader); string minRP = Pad(signedIndexDir.minR); string maxRP = Pad(signedIndexDir.maxR); int numDocs = reader.NumDocs; assertEquals("num of docs", numDocs, 1 + maxId - minId); ScoreDoc[] result; // test extremes, bounded on both ends result = search.Search(Csrq("rand", minRP, maxRP, T, T), null, numDocs).ScoreDocs; assertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("rand", minRP, maxRP, T, F), null, numDocs).ScoreDocs; assertEquals("all but biggest", numDocs - 1, result.Length); result = search.Search(Csrq("rand", minRP, maxRP, F, T), null, numDocs).ScoreDocs; assertEquals("all but smallest", numDocs - 1, result.Length); result = search.Search(Csrq("rand", minRP, maxRP, F, F), null, numDocs).ScoreDocs; assertEquals("all but extremes", numDocs - 2, result.Length); // unbounded result = search.Search(Csrq("rand", minRP, null, T, F), null, numDocs).ScoreDocs; assertEquals("smallest and up", numDocs, result.Length); result = search.Search(Csrq("rand", null, maxRP, F, T), null, numDocs).ScoreDocs; assertEquals("biggest and down", numDocs, result.Length); result = search.Search(Csrq("rand", minRP, null, F, F), null, numDocs).ScoreDocs; assertEquals("not smallest, but up", numDocs - 1, result.Length); result = search.Search(Csrq("rand", null, maxRP, F, F), null, numDocs).ScoreDocs; assertEquals("not biggest, but down", numDocs - 1, result.Length); // very small sets result = search.Search(Csrq("rand", minRP, minRP, F, F), null, numDocs).ScoreDocs; assertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("rand", maxRP, maxRP, F, F), null, numDocs).ScoreDocs; assertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("rand", minRP, minRP, T, T), null, numDocs).ScoreDocs; assertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("rand", null, minRP, F, T), null, numDocs).ScoreDocs; assertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("rand", maxRP, maxRP, T, T), null, numDocs).ScoreDocs; assertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("rand", maxRP, null, T, F), null, numDocs).ScoreDocs; assertEquals("max,nul,T,T", 1, result.Length); }
public virtual void TestRangeFilterId() { IndexReader reader = IndexReader.Open(signedIndex.index, true); IndexSearcher Search = new IndexSearcher(reader); int medId = ((maxId - minId) / 2); System.String minIP = Pad(minId); System.String maxIP = Pad(maxId); System.String medIP = Pad(medId); int numDocs = reader.NumDocs(); Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs"); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); // test id, bounded on both ends result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, maxIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, maxIP, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but last"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, maxIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but first"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, maxIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 2, result.Length, "all but ends"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", medIP, maxIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + maxId - medId, result.Length, "med and up"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, medIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + medId - minId, result.Length, "up to med"); // unbounded id result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", null, null, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "min and up"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", null, maxIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "max and down"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, null, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not min, but up"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", null, maxIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not max, but down"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", medIP, maxIP, T, F), numDocs).ScoreDocs; Assert.AreEqual(maxId - medId, result.Length, "med and up, not max"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, medIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(medId - minId, result.Length, "not min, up to med"); // very small sets result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, minIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "min,min,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", medIP, medIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "med,med,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", maxIP, maxIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "max,max,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", minIP, minIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "min,min,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", null, minIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "nul,min,F,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", maxIP, maxIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,max,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", maxIP, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,nul,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("id", medIP, medIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "med,med,T,T"); }
public virtual void Test() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random, dir, analyzer); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); int charsToIndex = AtLeast(100000); int charsIndexed = 0; //System.out.println("bytesToIndex=" + charsToIndex); while (charsIndexed < charsToIndex) { Document doc = docs.NextDoc(); charsIndexed += doc.Get("body").Length; w.AddDocument(doc); //System.out.println(" bytes=" + charsIndexed + " add: " + doc); } IndexReader r = w.GetReader(); //System.out.println("numDocs=" + r.NumDocs); w.Dispose(); IndexSearcher s = NewSearcher(r); Terms terms = MultiFields.GetFields(r).GetTerms("body"); int termCount = 0; TermsEnum termsEnum = terms.GetEnumerator(); while (termsEnum.MoveNext()) { termCount++; } Assert.IsTrue(termCount > 0); // Target ~10 terms to search: double chance = 10.0 / termCount; termsEnum = terms.GetEnumerator(termsEnum); IDictionary <BytesRef, TopDocs> answers = new Dictionary <BytesRef, TopDocs>(); while (termsEnum.MoveNext()) { if (Random.NextDouble() <= chance) { BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term); answers[term] = s.Search(new TermQuery(new Term("body", term)), 100); } } if (answers.Count > 0) { CountdownEvent startingGun = new CountdownEvent(1); int numThreads = TestUtil.NextInt32(Random, 2, 5); ThreadJob[] threads = new ThreadJob[numThreads]; for (int threadID = 0; threadID < numThreads; threadID++) { ThreadJob thread = new ThreadAnonymousClass(this, s, answers, startingGun); threads[threadID] = thread; thread.Start(); } startingGun.Signal(); foreach (ThreadJob thread in threads) { thread.Join(); } } r.Dispose(); dir.Dispose(); }
public virtual void TestParsingAndSearching() { string field = "content"; string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" }; // queries that should find all docs Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) }; // queries that should find no docs Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) }; PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } }; WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } }; // prepare the index Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, docs[i], Field.Store.NO)); iw.AddDocument(doc); } iw.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); // test queries that must find all foreach (Query q in matchAll) { if (VERBOSE) { Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none foreach (Query q in matchNone) { if (VERBOSE) { Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); } // thest the prefi queries find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { Query q = matchOneDocPrefix[i][j]; if (VERBOSE) { Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } // test the wildcard queries find only one doc for (int i = 0; i < matchOneDocWild.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { Query q = matchOneDocWild[i][j]; if (VERBOSE) { Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } reader.Dispose(); dir.Dispose(); }
private void AssertMatches(IndexSearcher searcher, Query q, int expectedMatches) { ScoreDoc[] result = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(expectedMatches, result.Length); }
public virtual void TestNullOrSubScorer() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField("field", "a b c d", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); // this test relies upon coord being the default implementation, // otherwise scores are different! s.Similarity = new DefaultSimilarity(); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, BooleanClause.Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5F, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, BooleanClause.Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1 / 3F), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, BooleanClause.Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2 / 3F), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Dispose(); w.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); // LUCENENET specific: Moved this logic here to ensure that it is executed // after the class is setup - a field is way to early to execute this. bool supportsDocValues = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) == false; AllSortFields = new List <SortField> { #pragma warning disable 612,618 new SortField("byte", SortFieldType.BYTE, false), new SortField("short", SortFieldType.INT16, false), #pragma warning restore 612,618 new SortField("int", SortFieldType.INT32, false), new SortField("long", SortFieldType.INT64, false), new SortField("float", SortFieldType.SINGLE, false), new SortField("double", SortFieldType.DOUBLE, false), new SortField("bytes", SortFieldType.STRING, false), new SortField("bytesval", SortFieldType.STRING_VAL, false), #pragma warning disable 612,618 new SortField("byte", SortFieldType.BYTE, true), new SortField("short", SortFieldType.INT16, true), #pragma warning restore 612,618 new SortField("int", SortFieldType.INT32, true), new SortField("long", SortFieldType.INT64, true), new SortField("float", SortFieldType.SINGLE, true), new SortField("double", SortFieldType.DOUBLE, true), new SortField("bytes", SortFieldType.STRING, true), new SortField("bytesval", SortFieldType.STRING_VAL, true), SortField.FIELD_SCORE, SortField.FIELD_DOC }; if (supportsDocValues) { AllSortFields.AddRange(new SortField[] { new SortField("intdocvalues", SortFieldType.INT32, false), new SortField("floatdocvalues", SortFieldType.SINGLE, false), new SortField("sortedbytesdocvalues", SortFieldType.STRING, false), new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, false), new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, false), new SortField("intdocvalues", SortFieldType.INT32, true), new SortField("floatdocvalues", SortFieldType.SINGLE, true), new SortField("sortedbytesdocvalues", SortFieldType.STRING, true), new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, true), new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, true) }); } // Also test missing first / last for the "string" sorts: foreach (string field in new string[] { "bytes", "sortedbytesdocvalues" }) { for (int rev = 0; rev < 2; rev++) { bool reversed = rev == 0; SortField sf = new SortField(field, SortFieldType.STRING, reversed); sf.MissingValue = SortField.STRING_FIRST; AllSortFields.Add(sf); sf = new SortField(field, SortFieldType.STRING, reversed); sf.MissingValue = SortField.STRING_LAST; AllSortFields.Add(sf); } } int limit = AllSortFields.Count; for (int i = 0; i < limit; i++) { SortField sf = AllSortFields[i]; if (sf.Type == SortFieldType.INT32) { SortField sf2 = new SortField(sf.Field, SortFieldType.INT32, sf.IsReverse); sf2.MissingValue = Random.Next(); AllSortFields.Add(sf2); } else if (sf.Type == SortFieldType.INT64) { SortField sf2 = new SortField(sf.Field, SortFieldType.INT64, sf.IsReverse); sf2.MissingValue = Random.NextInt64(); AllSortFields.Add(sf2); } else if (sf.Type == SortFieldType.SINGLE) { SortField sf2 = new SortField(sf.Field, SortFieldType.SINGLE, sf.IsReverse); sf2.MissingValue = (float)Random.NextDouble(); AllSortFields.Add(sf2); } else if (sf.Type == SortFieldType.DOUBLE) { SortField sf2 = new SortField(sf.Field, SortFieldType.DOUBLE, sf.IsReverse); sf2.MissingValue = Random.NextDouble(); AllSortFields.Add(sf2); } } Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, Dir); int numDocs = AtLeast(200); for (int i = 0; i < numDocs; i++) { IList <Field> fields = new List <Field>(); fields.Add(NewTextField("english", English.Int32ToEnglish(i), Field.Store.NO)); fields.Add(NewTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); fields.Add(NewStringField("byte", "" + ((sbyte)Random.Next()).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); fields.Add(NewStringField("short", "" + ((short)Random.Next()).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); fields.Add(new Int32Field("int", Random.Next(), Field.Store.NO)); fields.Add(new Int64Field("long", Random.NextInt64(), Field.Store.NO)); fields.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO)); fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO)); fields.Add(NewStringField("bytes", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO)); fields.Add(NewStringField("bytesval", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO)); fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO)); if (supportsDocValues) { fields.Add(new NumericDocValuesField("intdocvalues", Random.Next())); fields.Add(new SingleDocValuesField("floatdocvalues", (float)Random.NextDouble())); fields.Add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)))); fields.Add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)))); fields.Add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)))); } Document document = new Document(); document.Add(new StoredField("id", "" + i)); if (isVerbose) { Console.WriteLine(" add doc id=" + i); } foreach (Field field in fields) { // So we are sometimes missing that field: if (Random.Next(5) != 4) { document.Add(field); if (isVerbose) { Console.WriteLine(" " + field); } } } iw.AddDocument(document); if (Random.Next(50) == 17) { iw.Commit(); } } Reader = iw.GetReader(); iw.Dispose(); Searcher = NewSearcher(Reader); if (isVerbose) { Console.WriteLine(" searcher=" + Searcher); } }
public virtual void TestCRTReopen() { //test behaving badly //should be high enough int maxStaleSecs = 20; //build crap data just to store it. string s = " abcdefghijklmnopqrstuvwxyz "; char[] chars = s.ToCharArray(); StringBuilder builder = new StringBuilder(2048); for (int i = 0; i < 2048; i++) { builder.Append(chars[Random().Next(chars.Length)]); } string content = builder.ToString(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128); IndexWriterConfig config = new IndexWriterConfig( #pragma warning disable 612, 618 Version.LUCENE_46, #pragma warning restore 612, 618 new MockAnalyzer(Random())); config.SetIndexDeletionPolicy(sdp); config.SetOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(dir, config); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); TrackingIndexWriter tiw = new TrackingIndexWriter(iw); ControlledRealTimeReopenThread <IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread <IndexSearcher>(tiw, sm, maxStaleSecs, 0); controlledRealTimeReopenThread.SetDaemon(true); controlledRealTimeReopenThread.Start(); IList <ThreadClass> commitThreads = new List <ThreadClass>(); for (int i = 0; i < 500; i++) { if (i > 0 && i % 50 == 0) { ThreadClass commitThread = new RunnableAnonymousInnerClassHelper(this, sdp, dir, iw); commitThread.Start(); commitThreads.Add(commitThread); } Document d = new Document(); d.Add(new TextField("count", i + "", Field.Store.NO)); d.Add(new TextField("content", content, Field.Store.YES)); long start = Environment.TickCount; long l = tiw.AddDocument(d); controlledRealTimeReopenThread.WaitForGeneration(l); long wait = Environment.TickCount - start; assertTrue("waited too long for generation " + wait, wait < (maxStaleSecs * 1000)); IndexSearcher searcher = sm.Acquire(); TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10); sm.Release(searcher); assertEquals(1, td.TotalHits); } foreach (ThreadClass commitThread in commitThreads) { commitThread.Join(); } controlledRealTimeReopenThread.Dispose(); sm.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestKnownSetOfDocuments() { System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new MockRAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader_ForNUnit.Terms(); TermDocs termDocs = knownSearcher.reader_ForNUnit.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i])) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("field", "chocolate")); ScoreDoc[] hits = knownSearcher.Search(query, null, 1000).scoreDocs; //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length == 3); float score = hits[0].score; /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(hits[0].doc == 2); Assert.IsTrue(hits[1].doc == 3); Assert.IsTrue(hits[2].doc == 0); TermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, "field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = -1; try { freqInt = (System.Int32)test4Map[term]; } catch (Exception) { Assert.IsTrue(false); } Assert.IsTrue(freqInt == freq); } SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, mapper); System.Collections.Generic.SortedDictionary <object, object> vectorEntrySet = mapper.GetTermVectorEntrySet(); Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10); TermVectorEntry last = null; foreach (TermVectorEntry tve in vectorEntrySet.Keys) { if (tve != null && last != null) { Assert.IsTrue(last.GetFrequency() >= tve.GetFrequency(), "terms are not properly sorted"); System.Int32 expectedFreq = (System.Int32)test4Map[tve.GetTerm()]; //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields Assert.IsTrue(tve.GetFrequency() == 2 * expectedFreq, "Frequency is not correct:"); } last = tve; } FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, fieldMapper); System.Collections.IDictionary map = fieldMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2); vectorEntrySet = (System.Collections.Generic.SortedDictionary <Object, Object>)map["field"]; Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be"); Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10); knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestFieldCacheRangeFilterLongs() { IndexReader reader = IndexReader.Open(signedIndex.index, true); IndexSearcher Search = new IndexSearcher(reader); int numDocs = reader.NumDocs(); int medId = ((maxId - minId) / 2); System.Int64 minIdO = (long)minId; System.Int64 maxIdO = (long)maxId; System.Int64 medIdO = (long)medId; Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs"); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); // test id, bounded on both ends result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, maxIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, maxIdO, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but last"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, maxIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but first"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, maxIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 2, result.Length, "all but ends"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", medIdO, maxIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + maxId - medId, result.Length, "med and up"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, medIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + medId - minId, result.Length, "up to med"); // unbounded id result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", null, null, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "min and up"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", null, maxIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "max and down"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, null, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not min, but up"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", null, maxIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not max, but down"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", medIdO, maxIdO, T, F), numDocs).ScoreDocs; Assert.AreEqual(maxId - medId, result.Length, "med and up, not max"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, medIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(medId - minId, result.Length, "not min, up to med"); // very small sets result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, minIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "min,min,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", medIdO, medIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "med,med,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", maxIdO, maxIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "max,max,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", minIdO, minIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "min,min,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", null, minIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "nul,min,F,T"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", maxIdO, maxIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,max,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", maxIdO, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,nul,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", medIdO, medIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "med,med,T,T"); // special cases System.Int64 tempAux = (long)System.Int64.MaxValue; result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", tempAux, null, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "overflow special case"); System.Int64 tempAux2 = (long)System.Int64.MinValue; result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", null, tempAux2, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "overflow special case"); result = Search.Search(q, FieldCacheRangeFilter.NewLongRange("id", maxIdO, minIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "inverse range"); }
public override Weight CreateWeight(IndexSearcher searcher) { return(new WeightAnonymousInnerClassHelper(this)); }
public virtual void TestRandomStringSort() { Random random = new Random(Random.Next()); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif random, dir); bool allowDups = random.NextBoolean(); ISet <string> seen = new JCG.HashSet <string>(); int maxLength = TestUtil.NextInt32(random, 5, 100); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { Document doc = new Document(); // 10% of the time, the document is missing the value: BytesRef br; if (LuceneTestCase.Random.Next(10) != 7) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random, maxLength); } else { s = TestUtil.RandomUnicodeString(random, maxLength); } if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (VERBOSE) { Console.WriteLine(" " + numDocs + ": s=" + s); } br = new BytesRef(s); if (DefaultCodecSupportsDocValues) { doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } doc.Add(NewStringField("string", s, Field.Store.NO)); docValues.Add(br); } else { br = null; if (VERBOSE) { Console.WriteLine(" " + numDocs + ": <missing>"); } docValues.Add(null); if (DefaultCodecSupportsDocValues) { doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } } doc.Add(new StoredField("id", numDocs)); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.GetReader().Dispose(); } } IndexReader r = writer.GetReader(); writer.Dispose(); if (VERBOSE) { Console.WriteLine(" reader=" + r); } IndexSearcher idxS = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif r, false); int ITERS = AtLeast(100); for (int iter = 0; iter < ITERS; iter++) { bool reverse = random.NextBoolean(); TopFieldDocs hits; SortField sf; bool sortMissingLast; bool missingIsNull; if (DefaultCodecSupportsDocValues && random.NextBoolean()) { sf = new SortField("stringdv", SortFieldType.STRING, reverse); // Can only use sort missing if the DVFormat // supports docsWithField: sortMissingLast = DefaultCodecSupportsDocsWithField && Random.NextBoolean(); missingIsNull = DefaultCodecSupportsDocsWithField; } else { sf = new SortField("string", SortFieldType.STRING, reverse); sortMissingLast = Random.NextBoolean(); missingIsNull = true; } if (sortMissingLast) { sf.MissingValue = SortField.STRING_LAST; } Sort sort; if (random.NextBoolean()) { sort = new Sort(sf); } else { sort = new Sort(sf, SortField.FIELD_DOC); } int hitCount = TestUtil.NextInt32(random, 1, r.MaxDoc + 20); RandomFilter f = new RandomFilter(random, (float)random.NextDouble(), docValues); int queryType = random.Next(3); if (queryType == 0) { // force out of order BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; hits = idxS.Search(bq, f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else if (queryType == 1) { hits = idxS.Search(new ConstantScoreQuery(f), null, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else { hits = idxS.Search(new MatchAllDocsQuery(), f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " " + hits.TotalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort); } // Compute expected results: var expected = f.MatchValues.ToList(); expected.Sort(Comparer <BytesRef> .Create((a, b) => { if (a == null) { if (b == null) { return(0); } if (sortMissingLast) { return(1); } else { return(-1); } } else if (b == null) { if (sortMissingLast) { return(-1); } else { return(1); } } else { return(a.CompareTo(b)); } })); if (reverse) { expected.Reverse(); } if (VERBOSE) { Console.WriteLine(" expected:"); for (int idx = 0; idx < expected.Count; idx++) { BytesRef br = expected[idx]; if (br == null && missingIsNull == false) { br = new BytesRef(); } Console.WriteLine(" " + idx + ": " + (br == null ? "<missing>" : br.Utf8ToString())); if (idx == hitCount - 1) { break; } } } if (VERBOSE) { Console.WriteLine(" actual:"); for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = (BytesRef)fd.Fields[0]; Console.WriteLine(" " + hitIDX + ": " + (br == null ? "<missing>" : br.Utf8ToString()) + " id=" + idxS.Doc(fd.Doc).Get("id")); } } for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = expected[hitIDX]; if (br == null && missingIsNull == false) { br = new BytesRef(); } // Normally, the old codecs (that don't support // docsWithField via doc values) will always return // an empty BytesRef for the missing case; however, // if all docs in a given segment were missing, in // that case it will return null! So we must map // null here, too: BytesRef br2 = (BytesRef)fd.Fields[0]; if (br2 == null && missingIsNull == false) { br2 = new BytesRef(); } Assert.AreEqual(br, br2, "hit=" + hitIDX + " has wrong sort value"); } } r.Dispose(); dir.Dispose(); }
public virtual void TestInfiniteValues() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); doc.Add(new DoubleField("double", double.NegativeInfinity, Field.Store.NO)); doc.Add(new Int64Field("long", long.MinValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", double.PositiveInfinity, Field.Store.NO)); doc.Add(new Int64Field("long", long.MaxValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", 0.0, Field.Store.NO)); doc.Add(new Int64Field("long", 0L, Field.Store.NO)); writer.AddDocument(doc); foreach (double d in TestNumericUtils.DOUBLE_NANs) { doc = new Document(); doc.Add(new DoubleField("double", d, Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); IndexReader r = DirectoryReader.Open(dir); IndexSearcher s = NewSearcher(r); Query q = NumericRangeQuery.NewInt64Range("long", null, null, true, true); TopDocs topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt64Range("long", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt64Range("long", long.MinValue, long.MaxValue, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt64Range("long", long.MinValue, long.MaxValue, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NaN, double.NaN, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(TestNumericUtils.DOUBLE_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count"); r.Dispose(); dir.Dispose(); }
internal virtual void TestSort(bool useFrom, bool VERBOSE) { IndexReader reader = null; Directory dir = null; if (!VERBOSE) { Console.WriteLine("Verbosity disabled. Enable manually if needed."); } int numDocs = VERBOSE ? AtLeast(50) : AtLeast(1000); //final int numDocs = AtLeast(50); string[] tokens = new string[] { "a", "b", "c", "d", "e" }; if (VERBOSE) { Console.WriteLine("TEST: make index"); } { dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); // w.setDoRandomForceMerge(false); // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100)); string[] content = new string[AtLeast(20)]; for (int contentIDX = 0; contentIDX < content.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); int numTokens = TestUtil.NextInt(Random(), 1, 10); for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) { sb.Append(tokens[Random().Next(tokens.Length)]).Append(' '); } content[contentIDX] = sb.ToString(); } for (int docIDX = 0; docIDX < numDocs; docIDX++) { Document doc = new Document(); doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO)); doc.Add(NewTextField("text", content[Random().Next(content.Length)], Field.Store.NO)); doc.Add(new SingleField("float", (float)Random().NextDouble(), Field.Store.NO)); int intValue; if (Random().Next(100) == 17) { intValue = int.MinValue; } else if (Random().Next(100) == 17) { intValue = int.MaxValue; } else { intValue = Random().Next(); } doc.Add(new Int32Field("int", intValue, Field.Store.NO)); if (VERBOSE) { Console.WriteLine(" doc=" + doc); } w.AddDocument(doc); } reader = w.Reader; w.Dispose(); } // NOTE: sometimes reader has just one segment, which is // important to test IndexSearcher searcher = NewSearcher(reader); IndexReaderContext ctx = searcher.TopReaderContext; ShardSearcher[] subSearchers; int[] docStarts; if (ctx is AtomicReaderContext) { subSearchers = new ShardSearcher[1]; docStarts = new int[1]; subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx); docStarts[0] = 0; } else { CompositeReaderContext compCTX = (CompositeReaderContext)ctx; int size = compCTX.Leaves.Count; subSearchers = new ShardSearcher[size]; docStarts = new int[size]; int docBase = 0; for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++) { AtomicReaderContext leave = compCTX.Leaves[searcherIDX]; subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX); docStarts[searcherIDX] = docBase; docBase += leave.Reader.MaxDoc; } } IList <SortField> sortFields = new List <SortField>(); sortFields.Add(new SortField("string", SortFieldType.STRING, true)); sortFields.Add(new SortField("string", SortFieldType.STRING, false)); sortFields.Add(new SortField("int", SortFieldType.INT32, true)); sortFields.Add(new SortField("int", SortFieldType.INT32, false)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, true)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, false)); sortFields.Add(new SortField(null, SortFieldType.SCORE, true)); sortFields.Add(new SortField(null, SortFieldType.SCORE, false)); sortFields.Add(new SortField(null, SortFieldType.DOC, true)); sortFields.Add(new SortField(null, SortFieldType.DOC, false)); for (int iter = 0; iter < 1000 * RANDOM_MULTIPLIER; iter++) { // TODO: custom FieldComp... Query query = new TermQuery(new Term("text", tokens[Random().Next(tokens.Length)])); Sort sort; if (Random().Next(10) == 4) { // Sort by score sort = null; } else { SortField[] randomSortFields = new SortField[TestUtil.NextInt(Random(), 1, 3)]; for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++) { randomSortFields[sortIDX] = sortFields[Random().Next(sortFields.Count)]; } sort = new Sort(randomSortFields); } int numHits = TestUtil.NextInt(Random(), 1, numDocs + 5); //final int numHits = 5; if (VERBOSE) { Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits); } int from = -1; int size = -1; // First search on whole index: TopDocs topHits; if (sort == null) { if (useFrom) { TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random().NextBoolean()); searcher.Search(query, c); from = TestUtil.NextInt(Random(), 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = searcher.Search(query, numHits); } } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random().NextBoolean()); searcher.Search(query, c); if (useFrom) { from = TestUtil.NextInt(Random(), 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = c.GetTopDocs(0, numHits); } } if (VERBOSE) { if (useFrom) { Console.WriteLine("from=" + from + " size=" + size); } Console.WriteLine(" top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore)); if (topHits.ScoreDocs != null) { for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = topHits.ScoreDocs[hitIDX]; Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } // ... then all shards: Weight w = searcher.CreateNormalizedWeight(query); TopDocs[] shardHits = new TopDocs[subSearchers.Length]; for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) { TopDocs subHits; ShardSearcher subSearcher = subSearchers[shardIDX]; if (sort == null) { subHits = subSearcher.Search(w, numHits); } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random().NextBoolean()); subSearcher.Search(w, c); subHits = c.GetTopDocs(0, numHits); } shardHits[shardIDX] = subHits; if (VERBOSE) { Console.WriteLine(" shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString())); if (subHits.ScoreDocs != null) { foreach (ScoreDoc sd in subHits.ScoreDocs) { Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } } // Merge: TopDocs mergedHits; if (useFrom) { mergedHits = TopDocs.Merge(sort, from, size, shardHits); } else { mergedHits = TopDocs.Merge(sort, numHits, shardHits); } if (mergedHits.ScoreDocs != null) { // Make sure the returned shards are correct: for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = mergedHits.ScoreDocs[hitIDX]; Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard"); } } TestUtil.AssertEquals(topHits, mergedHits); } reader.Dispose(); dir.Dispose(); }
/// <summary> /// Release a searcher previously obtained from {@link /// #acquire}. /// /// <p><b>NOTE</b>: it's fine to call this after close. /// </summary> public virtual void Release(IndexSearcher s) { s.IndexReader.DecRef(); }
public override void BeforeClass() { base.BeforeClass(); noDocs = AtLeast(4096); distance = (1L << 60) / noDocs; directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 100, 1000)).SetMergePolicy(NewLogMergePolicy())); FieldType storedLong = new FieldType(Int64Field.TYPE_NOT_STORED); storedLong.IsStored = true; storedLong.Freeze(); FieldType storedLong8 = new FieldType(storedLong); storedLong8.NumericPrecisionStep = 8; FieldType storedLong4 = new FieldType(storedLong); storedLong4.NumericPrecisionStep = 4; FieldType storedLong6 = new FieldType(storedLong); storedLong6.NumericPrecisionStep = 6; FieldType storedLong2 = new FieldType(storedLong); storedLong2.NumericPrecisionStep = 2; FieldType storedLongNone = new FieldType(storedLong); storedLongNone.NumericPrecisionStep = int.MaxValue; FieldType unstoredLong = Int64Field.TYPE_NOT_STORED; FieldType unstoredLong8 = new FieldType(unstoredLong); unstoredLong8.NumericPrecisionStep = 8; FieldType unstoredLong6 = new FieldType(unstoredLong); unstoredLong6.NumericPrecisionStep = 6; FieldType unstoredLong4 = new FieldType(unstoredLong); unstoredLong4.NumericPrecisionStep = 4; FieldType unstoredLong2 = new FieldType(unstoredLong); unstoredLong2.NumericPrecisionStep = 2; Int64Field field8 = new Int64Field("field8", 0L, storedLong8), field6 = new Int64Field("field6", 0L, storedLong6), field4 = new Int64Field("field4", 0L, storedLong4), field2 = new Int64Field("field2", 0L, storedLong2), fieldNoTrie = new Int64Field("field" + int.MaxValue, 0L, storedLongNone), ascfield8 = new Int64Field("ascfield8", 0L, unstoredLong8), ascfield6 = new Int64Field("ascfield6", 0L, unstoredLong6), ascfield4 = new Int64Field("ascfield4", 0L, unstoredLong4), ascfield2 = new Int64Field("ascfield2", 0L, unstoredLong2); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.Add(field8); doc.Add(field6); doc.Add(field4); doc.Add(field2); doc.Add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc.Add(ascfield8); doc.Add(ascfield6); doc.Add(ascfield4); doc.Add(ascfield2); // Add a series of noDocs docs with increasing long values, by updating the fields for (int l = 0; l < noDocs; l++) { long val = distance * l + startOffset; field8.SetInt64Value(val); field6.SetInt64Value(val); field4.SetInt64Value(val); field2.SetInt64Value(val); fieldNoTrie.SetInt64Value(val); val = l - (noDocs / 2); ascfield8.SetInt64Value(val); ascfield6.SetInt64Value(val); ascfield4.SetInt64Value(val); ascfield2.SetInt64Value(val); writer.AddDocument(doc); } reader = writer.GetReader(); searcher = NewSearcher(reader); writer.Dispose(); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, store, analyzer); Document d = new Document(); d.Add(NewTextField("field", "bogus", Field.Store.YES)); writer.AddDocument(d); IndexReader reader = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1")); pos.NextDoc(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2")); pos.NextDoc(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); store.Dispose(); }
public bool DoPrune(double ageSec, IndexSearcher searcher) { return(ageSec > MaxAgeSec); }
public virtual void Test() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); long startTime = DateTime.Now.Millisecond; // TODO: replace w/ the @nightly test data; make this // into an optional @nightly stress test Document doc = new Document(); Field body = NewTextField("body", "", Field.Store.NO); doc.Add(body); StringBuilder sb = new StringBuilder(); for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int numTerms = Random().Next(10); for (int termCount = 0; termCount < numTerms; termCount++) { sb.Append(Random().NextBoolean() ? "aaa" : "bbb"); sb.Append(' '); } body.StringValue = sb.ToString(); w.AddDocument(doc); sb.Remove(0, sb.Length); } IndexReader r = w.Reader; w.Dispose(); long endTime = DateTime.Now.Millisecond; if (VERBOSE) { Console.WriteLine("BUILD took " + (endTime - startTime)); } IndexSearcher s = NewSearcher(r); AtomicBoolean failed = new AtomicBoolean(); AtomicLong netSearch = new AtomicLong(); ThreadClass[] threads = new ThreadClass[NUM_SEARCH_THREADS]; for (int threadID = 0; threadID < NUM_SEARCH_THREADS; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, s, failed, netSearch); threads[threadID].SetDaemon(true); } foreach (ThreadClass t in threads) { t.Start(); } foreach (ThreadClass t in threads) { t.Join(); } if (VERBOSE) { Console.WriteLine(NUM_SEARCH_THREADS + " threads did " + netSearch.Get() + " searches"); } r.Dispose(); dir.Dispose(); }
/// <summary> /// Expert: Constructs an appropriate Weight implementation for this query. /// /// <p> /// Only implemented by primitive queries, which re-write to themselves. /// </summary> public virtual Weight CreateWeight(IndexSearcher searcher) { throw new System.NotSupportedException("Query " + this + " does not implement createWeight"); }
public virtual void TestRandomPhrases() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy())); IList <IList <string> > docs = new List <IList <string> >(); Documents.Document d = new Documents.Document(); Field f = NewTextField("f", "", Field.Store.NO); d.Add(f); Random r = Random(); int NUM_DOCS = AtLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks int termCount = TestUtil.NextInt(Random(), 4097, 8200); IList <string> doc = new List <string>(); StringBuilder sb = new StringBuilder(); while (doc.Count < termCount) { if (r.Next(5) == 1 || docs.Count == 0) { // make new non-empty-string term string term; while (true) { term = TestUtil.RandomUnicodeString(r); if (term.Length > 0) { break; } } IOException priorException = null; TokenStream ts = analyzer.TokenStream("ignore", new StringReader(term)); try { ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { string text = termAttr.ToString(); doc.Add(text); sb.Append(text).Append(' '); } ts.End(); } catch (IOException e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } else { // pick existing sub-phrase IList <string> lastDoc = docs[r.Next(docs.Count)]; int len = TestUtil.NextInt(r, 1, 10); int start = r.Next(lastDoc.Count - len); for (int k = start; k < start + len; k++) { string t = lastDoc[k]; doc.Add(t); sb.Append(t).Append(' '); } } } docs.Add(doc); f.StringValue = sb.ToString(); w.AddDocument(d); } IndexReader reader = w.Reader; IndexSearcher s = NewSearcher(reader); w.Dispose(); // now search int num = AtLeast(10); for (int i = 0; i < num; i++) { int docID = r.Next(docs.Count); IList <string> doc = docs[docID]; int numTerm = TestUtil.NextInt(r, 2, 20); int start = r.Next(doc.Count - numTerm); PhraseQuery pq = new PhraseQuery(); StringBuilder sb = new StringBuilder(); for (int t = start; t < start + numTerm; t++) { pq.Add(new Term("f", doc[t])); sb.Append(doc[t]).Append(' '); } TopDocs hits = s.Search(pq, NUM_DOCS); bool found = false; for (int j = 0; j < hits.ScoreDocs.Length; j++) { if (hits.ScoreDocs[j].Doc == docID) { found = true; break; } } Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start); } reader.Dispose(); dir.Dispose(); }
public virtual void TestPayloadsPos0() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, new MockPayloadAnalyzer()); Document doc = new Document(); doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k"))); writer.AddDocument(doc); IndexReader readerFromWriter = writer.GetReader(); AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter); DocsAndPositionsEnum tp = r.GetTermPositionsEnum(new Term("content", "a")); int count = 0; Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // "a" occurs 4 times Assert.AreEqual(4, tp.Freq); Assert.AreEqual(0, tp.NextPosition()); Assert.AreEqual(1, tp.NextPosition()); Assert.AreEqual(3, tp.NextPosition()); Assert.AreEqual(6, tp.NextPosition()); // only one doc has "a" Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc()); IndexSearcher @is = NewSearcher(readerFromWriter); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; bool sawZero = false; if (Verbose) { Console.WriteLine("\ngetPayloadSpans test"); } Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); while (pspans.Next()) { if (Verbose) { Console.WriteLine("doc " + pspans.Doc + ": span " + pspans.Start + " to " + pspans.End); } var payloads = pspans.GetPayload(); sawZero |= pspans.Start == 0; foreach (var bytes in payloads) { count++; if (Verbose) { Console.WriteLine(" payload: " + Encoding.UTF8.GetString(bytes)); } } } Assert.IsTrue(sawZero); Assert.AreEqual(5, count); // System.out.println("\ngetSpans test"); Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); count = 0; sawZero = false; while (spans.Next()) { count++; sawZero |= spans.Start == 0; // System.out.println(spans.Doc() + " - " + spans.Start() + " - " + // spans.End()); } Assert.AreEqual(4, count); Assert.IsTrue(sawZero); // System.out.println("\nPayloadSpanUtil test"); sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext); var pls = psu.GetPayloadsForQuery(snq); count = pls.Count; foreach (var bytes in pls) { string s = Encoding.UTF8.GetString(bytes); //System.out.println(s); sawZero |= s.Equals("pos: 0", StringComparison.Ordinal); } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); writer.Dispose(); @is.IndexReader.Dispose(); dir.Dispose(); }
public override Weight CreateWeight(IndexSearcher searcher) { return(AssertingWeight.Wrap(new Random(Random.Next()), @in.CreateWeight(searcher))); }
public ThreadAnonymousInnerClassHelper(TestSearchWithThreads outerInstance, IndexSearcher s, AtomicBoolean failed, AtomicLong netSearch) { this.OuterInstance = outerInstance; this.s = s; this.Failed = failed; this.NetSearch = netSearch; col = new TotalHitCountCollector(); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); iw.SetMaxBufferedDocs(2); // force multi-segment AddDoc("one", iw, 1f); AddDoc("two", iw, 20f); AddDoc("three four", iw, 300f); iw.Close(); IndexReader ir = IndexReader.Open((Directory)dir, false, null); IndexSearcher is_Renamed = new IndexSearcher(ir); ScoreDoc[] hits; // assert with norms scoring turned off hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc, null).Get("key", null), "one"); Assert.AreEqual(ir.Document(hits[1].Doc, null).Get("key", null), "two"); Assert.AreEqual(ir.Document(hits[2].Doc, null).Get("key", null), "three four"); // assert with norms scoring turned on MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc, null).Get("key", null), "three four"); Assert.AreEqual(ir.Document(hits[1].Doc, null).Get("key", null), "two"); Assert.AreEqual(ir.Document(hits[2].Doc, null).Get("key", null), "one"); // change norm & retest ir.SetNorm(0, "key", 400f, null); normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc, null).Get("key", null), "one"); Assert.AreEqual(ir.Document(hits[1].Doc, null).Get("key", null), "three four"); Assert.AreEqual(ir.Document(hits[2].Doc, null).Get("key", null), "two"); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), Occur.MUST); bq.Add(new MatchAllDocsQuery(), Occur.MUST); hits = is_Renamed.Search(bq, null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), Occur.MUST); hits = is_Renamed.Search(bq, null, 1000, null).ScoreDocs; Assert.AreEqual(1, hits.Length); // delete a document: is_Renamed.IndexReader.DeleteDocument(0, null); hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "key", analyzer); hits = is_Renamed.Search(qp.Parse(new MatchAllDocsQuery().ToString()), null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() with non default boost Query maq = new MatchAllDocsQuery(); maq.Boost = 2.3f; Query pq = qp.Parse(maq.ToString()); hits = is_Renamed.Search(pq, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); is_Renamed.Close(); ir.Close(); dir.Close(); }
public virtual void TestPhraseQueryInConjunctionScorer() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "foobar", Field.Store.YES)); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), phraseQuery, searcher, Similarity); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(Random(), termQuery, searcher, Similarity); reader.Dispose(); writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE)); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES)); writer.AddDocument(doc); reader = writer.Reader; writer.Dispose(); searcher = NewSearcher(reader); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), booleanQuery, searcher, Similarity); reader.Dispose(); directory.Dispose(); }
public virtual void TestEnforceDeletions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10))); // asserts below requires no unexpected merges: // NOTE: cannot use writer.getReader because RIW (on // flipping a coin) may give us a newly opened reader, // but we use .reopen on this reader below and expect to // (must) get an NRT reader: DirectoryReader reader = DirectoryReader.Open(writer.IndexWriter, true); // same reason we don't wrap? IndexSearcher searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); // add a doc, refresh the reader, and check that it's there Document doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1); Assert.AreEqual(1, docs.TotalHits, "Should find a hit..."); Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1"))); CachingWrapperFilter filter = new CachingWrapperFilter(startFilter); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.IsTrue(filter.GetSizeInBytes() > 0); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); Query constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // make sure we get a cache hit when we reopen reader // that had no change to deletions // fake delete (deletes nothing): writer.DeleteDocuments(new Term("foo", "bar")); IndexReader oldReader = reader; reader = RefreshReader(reader); Assert.IsTrue(reader == oldReader); int missCount = filter.missCount; docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // cache hit: Assert.AreEqual(missCount, filter.missCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); missCount = filter.missCount; docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // cache hit Assert.AreEqual(missCount, filter.missCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // apply deletes dynamically: filter = new CachingWrapperFilter(startFilter); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); missCount = filter.missCount; Assert.IsTrue(missCount > 0); constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.missCount); writer.AddDocument(doc); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits..."); Assert.IsTrue(filter.missCount > missCount); missCount = filter.missCount; constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.missCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.missCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.missCount); // NOTE: silliness to make sure JRE does not eliminate // our holding onto oldReader to prevent // CachingWrapperFilter's WeakHashMap from dropping the // entry: Assert.IsTrue(oldReader != null); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
} // end of DisjunctionMaxWeight inner class /// <summary> /// Create the Weight used to score us </summary> public override Weight CreateWeight(IndexSearcher searcher) { return(new DisjunctionMaxWeight(this, searcher)); }
public virtual void TestRangeQueryId() { // NOTE: uses index build in *super* setUp IndexReader reader = signedIndexReader; IndexSearcher search = NewSearcher(reader); if (Verbose) { Console.WriteLine("TEST: reader=" + reader); } int medId = ((maxId - minId) / 2); string minIP = Pad(minId); string maxIP = Pad(maxId); string medIP = Pad(medId); int numDocs = reader.NumDocs; assertEquals("num of docs", numDocs, 1 + maxId - minId); ScoreDoc[] result; // test id, bounded on both ends result = search.Search(Csrq("id", minIP, maxIP, T, T), null, numDocs).ScoreDocs; assertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, F), null, numDocs).ScoreDocs; assertEquals("all but last", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("all but last", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, T), null, numDocs).ScoreDocs; assertEquals("all but first", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("all but first", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, F), null, numDocs).ScoreDocs; assertEquals("all but ends", numDocs - 2, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("all but ends", numDocs - 2, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, T), null, numDocs).ScoreDocs; assertEquals("med and up", 1 + maxId - medId, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("med and up", 1 + maxId - medId, result.Length); result = search.Search(Csrq("id", minIP, medIP, T, T), null, numDocs).ScoreDocs; assertEquals("up to med", 1 + medId - minId, result.Length); result = search.Search(Csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("up to med", 1 + medId - minId, result.Length); // unbounded id result = search.Search(Csrq("id", minIP, null, T, F), null, numDocs).ScoreDocs; assertEquals("min and up", numDocs, result.Length); result = search.Search(Csrq("id", null, maxIP, F, T), null, numDocs).ScoreDocs; assertEquals("max and down", numDocs, result.Length); result = search.Search(Csrq("id", minIP, null, F, F), null, numDocs).ScoreDocs; assertEquals("not min, but up", numDocs - 1, result.Length); result = search.Search(Csrq("id", null, maxIP, F, F), null, numDocs).ScoreDocs; assertEquals("not max, but down", numDocs - 1, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, F), null, numDocs).ScoreDocs; assertEquals("med and up, not max", maxId - medId, result.Length); result = search.Search(Csrq("id", minIP, medIP, F, T), null, numDocs).ScoreDocs; assertEquals("not min, up to med", medId - minId, result.Length); // very small sets result = search.Search(Csrq("id", minIP, minIP, F, F), null, numDocs).ScoreDocs; assertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("id", medIP, medIP, F, F), null, numDocs).ScoreDocs; assertEquals("med,med,F,F", 0, result.Length); result = search.Search(Csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("med,med,F,F", 0, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, F, F), null, numDocs).ScoreDocs; assertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("id", minIP, minIP, T, T), null, numDocs).ScoreDocs; assertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("id", null, minIP, F, T), null, numDocs).ScoreDocs; assertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, T, T), null, numDocs).ScoreDocs; assertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, null, T, F), null, numDocs).ScoreDocs; assertEquals("max,nul,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("max,nul,T,T", 1, result.Length); result = search.Search(Csrq("id", medIP, medIP, T, T), null, numDocs).ScoreDocs; assertEquals("med,med,T,T", 1, result.Length); result = search.Search(Csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; assertEquals("med,med,T,T", 1, result.Length); }
public SqloogleMiaSearcher(string indexPath, int resultsLimit = 50) { _resultsLimit = resultsLimit; var fields = new[] { "server", "database", "schema", "name", "equality", "inequality", "included" }; var boosts = new Dictionary<string, float> { { "server", .4F }, { "database", .3F }, { "schema", .2F }, { "name", .1F }, { "equality", .0F }, { "inequality", .0F }, { "included", .0F } }; _directory = FSDirectory.Open(new DirectoryInfo(indexPath)); _searcher = new IndexSearcher(_directory, true); _analyzer = new StandardAnalyzer(Version.LUCENE_30); _parser = new MultiFieldQueryParser(Version.LUCENE_30, fields, _analyzer, boosts) { DefaultOperator = QueryParser.Operator.AND }; _logger.Trace("Searcher is ready."); }