public virtual void TestAsterisk() { Directory indexStore = GetIndexStore("body", new string[] { "metal", "metals" }); IndexReader reader = DirectoryReader.Open(indexStore); IndexSearcher searcher = NewSearcher(reader); Query query1 = new TermQuery(new Term("body", "metal")); Query query2 = new WildcardQuery(new Term("body", "metal*")); Query query3 = new WildcardQuery(new Term("body", "m*tal")); Query query4 = new WildcardQuery(new Term("body", "m*tal*")); Query query5 = new WildcardQuery(new Term("body", "m*tals")); BooleanQuery query6 = new BooleanQuery(); query6.Add(query5, BooleanClause.Occur.SHOULD); BooleanQuery query7 = new BooleanQuery(); query7.Add(query3, BooleanClause.Occur.SHOULD); query7.Add(query5, BooleanClause.Occur.SHOULD); // Queries do not automatically lower-case search terms: Query query8 = new WildcardQuery(new Term("body", "M*tal*")); AssertMatches(searcher, query1, 1); AssertMatches(searcher, query2, 2); AssertMatches(searcher, query3, 1); AssertMatches(searcher, query4, 2); AssertMatches(searcher, query5, 1); AssertMatches(searcher, query6, 1); AssertMatches(searcher, query7, 2); AssertMatches(searcher, query8, 0); AssertMatches(searcher, new WildcardQuery(new Term("body", "*tall")), 0); AssertMatches(searcher, new WildcardQuery(new Term("body", "*tal")), 1); AssertMatches(searcher, new WildcardQuery(new Term("body", "*tal*")), 2); reader.Dispose(); indexStore.Dispose(); }
public void AndExtension() { BooleanQuery originalQuery = new BooleanQuery(); BooleanQuery innerQuery = new BooleanQuery(); Term term = new Term("_name", "value1"); TermQuery termQuery1 = new TermQuery(term); innerQuery.Add(termQuery1, Occur.MUST); Term term2 = new Term("_name", "value2"); TermQuery termQuery2 = new TermQuery(term2); innerQuery.Add(termQuery2, Occur.MUST); originalQuery.Add(innerQuery, Occur.MUST); string queryString = originalQuery.ToString(); QueryBuilder builder = new QueryBuilder(); builder.And ( x => x.Term("_name", "value1"), x => x.Term("_name", "value2") ); Query replacementQuery = builder.Build(); string newQueryString = replacementQuery.ToString(); Assert.AreEqual(queryString, newQueryString); Console.Write(queryString); }
public void Cache() { CreateData(); IFullTextSession s = Search.CreateFullTextSession(OpenSession()); s.Transaction.Begin(); BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("teacher", "andre")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("teacher", "max")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("teacher", "aaron")), BooleanClause.Occur.SHOULD); IFullTextQuery ftQuery = s.CreateFullTextQuery(query, typeof(Driver)); Assert.AreEqual(3, ftQuery.ResultSize, "No filter should happen"); ftQuery = s.CreateFullTextQuery(query, typeof(Driver)); ftQuery.EnableFullTextFilter("cachetest"); Assert.AreEqual(0, ftQuery.ResultSize, "Should filter out all"); ftQuery = s.CreateFullTextQuery(query, typeof(Driver)); ftQuery.EnableFullTextFilter("cachetest"); try { int i = ftQuery.ResultSize; } catch (NotSupportedException) { Assert.Fail("Cache does not work"); } s.Transaction.Commit(); s.Close(); DeleteData(); }
public Query GetQuery() { if (String.IsNullOrWhiteSpace(Term) && String.IsNullOrWhiteSpace(Type) && !CreatedOnTo.HasValue && !CreatedOnFrom.HasValue && Parent == null) return new MatchAllDocsQuery(); var booleanQuery = new BooleanQuery(); if (!String.IsNullOrWhiteSpace(Term)) { var indexDefinition = IndexingHelper.Get<AdminWebpageIndexDefinition>(); var analyser = indexDefinition.GetAnalyser(); var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, indexDefinition.SearchableFieldNames, analyser); Query query = Term.SafeGetSearchQuery(parser, analyser); booleanQuery.Add(query, Occur.MUST); } if (CreatedOnFrom.HasValue || CreatedOnTo.HasValue) booleanQuery.Add(GetDateQuery(), Occur.MUST); if (!string.IsNullOrEmpty(Type)) booleanQuery.Add(new TermQuery(new Term(FieldDefinition.GetFieldName<TypeFieldDefinition>(), Type)), Occur.MUST); if (Parent != null) booleanQuery.Add( new TermQuery(new Term(FieldDefinition.GetFieldName<ParentIdFieldDefinition>(), Parent.Id.ToString())), Occur.MUST); return booleanQuery; }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1.0 / 3), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2.0 / 3), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
public static Query BuildQuery(string query) { var matchCollection = untokenizedQuery.Matches(query); if (matchCollection.Count == 0) return new QueryParser(Version.LUCENE_CURRENT, "", new StandardAnalyzer(Version.LUCENE_CURRENT)).Parse(query); var sb = new StringBuilder(query); var booleanQuery = new BooleanQuery(); foreach (Match match in matchCollection) { BooleanClause.Occur occur; switch (match.Groups[1].Value) { case "+": occur=BooleanClause.Occur.MUST; break; case "-": occur=BooleanClause.Occur.MUST_NOT; break; default: occur=BooleanClause.Occur.SHOULD; break; } booleanQuery.Add(new TermQuery(new Term(match.Groups[2].Value, match.Groups[3].Value)), occur); sb.Replace(match.Value, ""); } var remaining = sb.ToString().Trim(); if(remaining.Length > 0) { booleanQuery.Add(new QueryParser(Version.LUCENE_CURRENT, "", new StandardAnalyzer(Version.LUCENE_CURRENT)).Parse(remaining), BooleanClause.Occur.SHOULD); } return booleanQuery; }
public virtual void TestQueries() { Single_Renamed = Single(Random()); Parallel_Renamed = Parallel(Random()); QueryTest(new TermQuery(new Term("f1", "v1"))); QueryTest(new TermQuery(new Term("f1", "v2"))); QueryTest(new TermQuery(new Term("f2", "v1"))); QueryTest(new TermQuery(new Term("f2", "v2"))); QueryTest(new TermQuery(new Term("f3", "v1"))); QueryTest(new TermQuery(new Term("f3", "v2"))); QueryTest(new TermQuery(new Term("f4", "v1"))); QueryTest(new TermQuery(new Term("f4", "v2"))); BooleanQuery bq1 = new BooleanQuery(); bq1.Add(new TermQuery(new Term("f1", "v1")), Occur.MUST); bq1.Add(new TermQuery(new Term("f4", "v1")), Occur.MUST); QueryTest(bq1); Single_Renamed.IndexReader.Dispose(); Single_Renamed = null; Parallel_Renamed.IndexReader.Dispose(); Parallel_Renamed = null; Dir.Dispose(); Dir = null; Dir1.Dispose(); Dir1 = null; Dir2.Dispose(); Dir2 = null; }
public virtual void TestMethod() { Directory directory = NewDirectory(); string[] values = new string[] { "1", "2", "3", "4" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < values.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(FIELD, values[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; writer.Dispose(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = NewSearcher(ir); ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Number of matched documents"); ir.Dispose(); directory.Dispose(); }
protected virtual Lucene.Net.Search.Query VisitWithinRadius(WithinRadiusNode node, LuceneQueryMapperState mappingState) { SpatialContext ctx = SpatialContext.GEO; var strategy = new PointVectorStrategy(ctx, Sitecore.ContentSearch.Spatial.Common.Constants.LocationFieldName); if (node.Latitude is double && node.Longitude is double && node.Radius is double) { var distance = DistanceUtils.Dist2Degrees((double)node.Radius, DistanceUtils.EARTH_MEAN_RADIUS_MI); Circle circle = ctx.MakeCircle((double)node.Longitude,(double)node.Latitude, distance); var spatialArgs = new SpatialArgs(SpatialOperation.IsWithin, circle); var dq = strategy.MakeQuery(spatialArgs); DistanceReverseValueSource valueSource = new DistanceReverseValueSource(strategy, circle.GetCenter(), distance); ValueSourceFilter vsf = new ValueSourceFilter(new QueryWrapperFilter(dq), valueSource, 0, distance); var filteredSpatial = new FilteredQuery(new MatchAllDocsQuery(), vsf); mappingState.FilterQuery = filteredSpatial; Lucene.Net.Search.Query spatialRankingQuery = new FunctionQuery(valueSource); Random r = new Random(DateTime.Now.Millisecond); var randomNumber = r.Next(10000101,11000101); Lucene.Net.Search.Query dummyQuery = Lucene.Net.Search.NumericRangeQuery.NewIntRange("__smallcreateddate", randomNumber, Int32.Parse(DateTime.Now.ToString("yyyyMMdd")), true, true); BooleanQuery bq = new BooleanQuery(); bq.Add(filteredSpatial, Occur.MUST); bq.Add(spatialRankingQuery, Occur.MUST); bq.Add(dummyQuery, Occur.SHOULD); return bq; } throw new NotSupportedException("Wrong parameters type, Radius, latitude and longitude must be of type double"); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); AddDoc("one", iw); AddDoc("two", iw); AddDoc("three four", iw); iw.Close(); IndexSearcher is_Renamed = new IndexSearcher(dir); Hits hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(3, hits.Length()); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(3, hits.Length()); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(1, hits.Length()); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(2, hits.Length()); is_Renamed.Close(); }
public Query GetQuery() { var booleanQuery = new BooleanQuery { { new TermRangeQuery( FieldDefinition.GetFieldName<PublishedOnFieldDefinition>(), null, DateTools.DateToString(CurrentRequestData.Now, DateTools.Resolution.SECOND), false, true), Occur.MUST } }; if (!String.IsNullOrWhiteSpace(Term)) { var indexDefinition = IndexingHelper.Get<WebpageSearchIndexDefinition>(); var analyser = indexDefinition.GetAnalyser(); var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, indexDefinition.SearchableFieldNames, analyser); Query query = Term.SafeGetSearchQuery(parser, analyser); booleanQuery.Add(query, Occur.MUST); } if (CreatedOnFrom.HasValue || CreatedOnTo.HasValue) booleanQuery.Add(GetDateQuery(), Occur.MUST); if (!string.IsNullOrEmpty(Type)) booleanQuery.Add(new TermQuery(new Term(FieldDefinition.GetFieldName<TypeFieldDefinition>(), Type)), Occur.MUST); if (Parent != null) booleanQuery.Add( new TermQuery(new Term(FieldDefinition.GetFieldName<ParentIdFieldDefinition>(), Parent.Id.ToString())), Occur.MUST); return booleanQuery; }
/// <summary> /// Searches the index. /// </summary> /// <param name="queryText"></param> /// <param name="categoryNames"></param> /// <param name="pageIndex"></param> /// <param name="pageSize"></param> /// <param name="roleIds"></param> /// <returns></returns> public SearchResultCollection Find(string queryText, IList<string> categoryNames, int pageIndex, int pageSize, IEnumerable<int> roleIds) { long startTicks = DateTime.Now.Ticks; // the overall-query BooleanQuery query = new BooleanQuery(); // add our parsed query if (!String.IsNullOrEmpty(queryText)) { Query multiQuery = MultiFieldQueryParser.Parse(new[] { queryText, queryText, queryText }, new[] { "title", "summary", "contents" }, new StandardAnalyzer()); query.Add(multiQuery, BooleanClause.Occur.MUST); } // add the security constraint - must be satisfied query.Add(this.BuildSecurityQuery(roleIds), BooleanClause.Occur.MUST); // Add the category query (if available) if (categoryNames != null) { query.Add(this.BuildCategoryQuery(categoryNames), BooleanClause.Occur.MUST); } IndexSearcher searcher = new IndexSearcher(this._indexDirectory); Hits hits = searcher.Search(query); int start = pageIndex * pageSize; int end = (pageIndex + 1) * pageSize; if (hits.Length() <= end) { end = hits.Length(); } SearchResultCollection results = new SearchResultCollection(end); results.TotalCount = hits.Length(); results.PageIndex = pageIndex; for (int i = start; i < end; i++) { SearchResult result = new SearchResult(); result.Title = hits.Doc(i).Get("title"); result.Summary = hits.Doc(i).Get("summary"); result.Author = hits.Doc(i).Get("author"); result.ModuleType = hits.Doc(i).Get("moduletype"); result.Path = hits.Doc(i).Get("path"); string[] categories = hits.Doc(i).GetValues("category"); result.Category = categories != null ? String.Join(", ", categories) : String.Empty; //Caused 'String was not recognized as a valid DateTime.' when site culture was set //to Africa (and maybe others: "z" Custom Format Specifier) //See: http://stackoverflow.com/questions/284775/how-do-i-parse-and-convert-datetimes-to-the-rfc-822-date-time-format result.DateCreated = Rfc822DateTime.Parse(hits.Doc(i).Get("datecreated")).ToLocalTime(); //result.DateCreated = DateTime.Parse((hits.Doc(i).Get("datecreated"))); result.Score = hits.Score(i); result.Boost = hits.Doc(i).GetBoost(); result.SectionId = Int32.Parse(hits.Doc(i).Get("sectionid")); results.Add(result); } searcher.Close(); results.ExecutionTime = DateTime.Now.Ticks - startTicks; return results; }
public List<int> Get(Query searchQuery) { var indexSearcher = _productSearcher.IndexSearcher; var name = FieldDefinition.GetFieldName<ProductSearchCategoriesDefinition>(); var valueCollector = new ValueCollector(indexSearcher, name); indexSearcher.Search(searchQuery, valueCollector); var categoryIds = valueCollector.Values[name].Select(s => Convert.ToInt32(s)).Distinct().ToList(); if (!categoryIds.Any()) return categoryIds; var mainQuery = new BooleanQuery(); var idsQuery = new BooleanQuery(); const string idFieldName = "id"; foreach (var categoryId in categoryIds) { idsQuery.Add(new TermQuery(new Term(idFieldName, categoryId.ToString())), Occur.SHOULD); } mainQuery.Add(idsQuery, Occur.MUST); var publishedOn = FieldDefinition.GetFieldName<PublishedOnFieldDefinition>(); mainQuery.Add(new TermRangeQuery( publishedOn, null, DateTools.DateToString(CurrentRequestData.Now, DateTools.Resolution.SECOND), false, true), Occur.MUST); var webpageSearcher = _indexSearcher.IndexSearcher; var webpageValueCollector = new ValueCollector(webpageSearcher, idFieldName); webpageSearcher.Search(mainQuery, null, webpageValueCollector); return webpageValueCollector.Values[idFieldName].Select(s => Convert.ToInt32(s)) .Intersect(categoryIds) .ToList(); }
public static BooleanQuery ParseRange(string fieldName, long lowerValue, long upperValue, bool inclusive) { if (lowerValue > upperValue) { return null; } //var rangeQuery = new BooleanQuery(); var dateQuery = new BooleanQuery(); BooleanQuery.SetMaxClauseCount(int.MaxValue); for (long i = lowerValue; i < upperValue; i++) { var term = new Term(fieldName, i.ToString()); var q = new TermQuery(term); dateQuery.Add(q, BooleanClause.Occur.SHOULD); } if (inclusive) { var term = new Term(fieldName, upperValue.ToString()); var q = new TermQuery(term); dateQuery.Add(q, BooleanClause.Occur.SHOULD); } //if (dateQuery.GetClauses() != null || dateQuery.GetClauses().Length != 0) //{ // rangeQuery.Add(dateQuery, BooleanClause.Occur.MUST); //} return dateQuery; }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
private void HandleRangeFacet(string index, Facet facet, IndexQuery indexQuery, IndexSearcher currentIndexSearcher, Dictionary<string, IEnumerable<FacetValue>> results) { var rangeResults = new List<FacetValue>(); foreach (var range in facet.Ranges) { var baseQuery = database.IndexStorage.GetLuceneQuery(index, indexQuery, database.IndexQueryTriggers); //TODO the built-in parser can't handle [NULL TO 100.0}, i.e. a mix of [ and } //so we need to handle this ourselves (greater and less-than-or-equal) var rangeQuery = database.IndexStorage.GetLuceneQuery(index, new IndexQuery { Query = facet.Name + ":" + range }, database.IndexQueryTriggers); var joinedQuery = new BooleanQuery(); joinedQuery.Add(baseQuery, BooleanClause.Occur.MUST); joinedQuery.Add(rangeQuery, BooleanClause.Occur.MUST); var topDocs = currentIndexSearcher.Search(joinedQuery, null, 1); if (topDocs.TotalHits > 0) { rangeResults.Add(new FacetValue { Count = topDocs.TotalHits, Range = range }); } } results[facet.Name] = rangeResults; }
public virtual void TestBQ14() { BooleanQuery q = new BooleanQuery(true); q.Add(new TermQuery(new Term(FIELD, "QQQQQ")), BooleanClause.Occur.SHOULD); q.Add(new TermQuery(new Term(FIELD, "w1")), BooleanClause.Occur.SHOULD); Qtest(q, new int[] { 0, 1, 2, 3 }); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] values = new System.String[]{"1", "2", "3", "4"}; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < values.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Close(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = new IndexSearcher(directory); Hits hits = indexSearcher.Search(query); Assert.AreEqual(2, hits.Length(), "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public virtual void TestBQ1() { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term(FIELD, "w1")), BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "w2")), BooleanClause.Occur.MUST); Qtest(query, new int[] { 0, 1, 2, 3 }); }
public static Lucene.Net.Search.Query FilterQueryByClasses(IESI.ISet<System.Type> classesAndSubclasses, Lucene.Net.Search.Query luceneQuery) { // A query filter is more practical than a manual class filtering post query (esp on scrollable resultsets) // it also probably minimise the memory footprint if (classesAndSubclasses == null) { return luceneQuery; } BooleanQuery classFilter = new BooleanQuery(); // annihilate the scoring impact of DocumentBuilder.CLASS_FIELDNAME classFilter.SetBoost(0); foreach (System.Type clazz in classesAndSubclasses) { Term t = new Term(DocumentBuilder.CLASS_FIELDNAME, TypeHelper.LuceneTypeName(clazz)); TermQuery termQuery = new TermQuery(t); classFilter.Add(termQuery, BooleanClause.Occur.SHOULD); } BooleanQuery filteredQuery = new BooleanQuery(); filteredQuery.Add(luceneQuery, BooleanClause.Occur.MUST); filteredQuery.Add(classFilter, BooleanClause.Occur.MUST); return filteredQuery; }
private void AddDocumentPage_INTERNAL(string fingerprint, int page, Document document) { // Write to the index // Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start(); lock (index_writer_lock) { // l1_clk.LockPerfTimerStop(); if (null == index_writer) { Logging.Info("+Creating a new lucene IndexWriter"); index_writer = new Lucene.Net.Index.IndexWriter(LIBRARY_INDEX_BASE_PATH, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); Logging.Info("-Creating a new lucene IndexWriter"); } // Delete the document if it already exists Lucene.Net.Search.BooleanQuery bq = new Lucene.Net.Search.BooleanQuery(); bq.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("fingerprint", fingerprint)), Lucene.Net.Search.BooleanClause.Occur.MUST); bq.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("page", System.Convert.ToString(page))), Lucene.Net.Search.BooleanClause.Occur.MUST); index_writer.DeleteDocuments(bq); // Add the new document if (null != document) { index_writer.AddDocument(document); } } }
public void CombinedFilters() { CreateData(); IFullTextSession s = Search.CreateFullTextSession(OpenSession()); s.Transaction.Begin(); BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("teacher", "andre")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("teacher", "max")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("teacher", "aaron")), BooleanClause.Occur.SHOULD); IFullTextQuery ftQuery = s.CreateFullTextQuery(query, typeof(Driver)); ftQuery.EnableFullTextFilter("bestDriver"); ftQuery.EnableFullTextFilter("security").SetParameter("Login", "andre"); Assert.AreEqual(1, ftQuery.ResultSize, "Should filter to limit to Emmanuel"); ftQuery = s.CreateFullTextQuery(query, typeof(Driver)); ftQuery.EnableFullTextFilter("bestDriver"); ftQuery.EnableFullTextFilter("security").SetParameter("login", "andre"); ftQuery.DisableFullTextFilter("security"); ftQuery.DisableFullTextFilter("bestDriver"); Assert.AreEqual(3, ftQuery.ResultSize, "Should not filter anymore"); s.Transaction.Commit(); s.Close(); DeleteData(); }
public virtual void TestAsterisk() { RAMDirectory indexStore = GetIndexStore("body", new System.String[]{"metal", "metals"}); IndexSearcher searcher = new IndexSearcher(indexStore); Query query1 = new TermQuery(new Term("body", "metal")); Query query2 = new WildcardQuery(new Term("body", "metal*")); Query query3 = new WildcardQuery(new Term("body", "m*tal")); Query query4 = new WildcardQuery(new Term("body", "m*tal*")); Query query5 = new WildcardQuery(new Term("body", "m*tals")); BooleanQuery query6 = new BooleanQuery(); query6.Add(query5, false, false); BooleanQuery query7 = new BooleanQuery(); query7.Add(query3, false, false); query7.Add(query5, false, false); // Queries do not automatically lower-case search terms: Query query8 = new WildcardQuery(new Term("body", "M*tal*")); AssertMatches(searcher, query1, 1); AssertMatches(searcher, query2, 2); AssertMatches(searcher, query3, 1); AssertMatches(searcher, query4, 2); AssertMatches(searcher, query5, 1); AssertMatches(searcher, query6, 1); AssertMatches(searcher, query7, 2); AssertMatches(searcher, query8, 0); }
public void TestQuery() { BooleanQuery booleanQuery=new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term("name","medcl")),BooleanClause.Occur.MUST); booleanQuery.Add(new TermQuery(new Term("age","25")),BooleanClause.Occur.MUST); new ElasticSearch.Client.ElasticSearchClient("localhost").Search("index", "type", booleanQuery.ToString()); }
public static Query MergeQueries(Query queryA, Query queryB, BooleanClause.Occur queryAOccurence, BooleanClause.Occur queryBOccurence) { BooleanQuery compoundQuery = new BooleanQuery(); compoundQuery.Add(new BooleanClause(queryA, queryAOccurence)); compoundQuery.Add(new BooleanClause(queryB, queryBOccurence)); return compoundQuery; }
public virtual void TestANDImplicit() { BooleanQuery expected = new BooleanQuery(); expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST); expected.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST); assertEquals(expected, Parse("foo bar")); }
public virtual void TestFlat() { BooleanQuery q = new BooleanQuery(); q.Add(new BooleanClause(t1, Occur.SHOULD)); q.Add(new BooleanClause(t2, Occur.SHOULD)); q.Add(new BooleanClause(c1, Occur.SHOULD)); q.Add(new BooleanClause(c2, Occur.SHOULD)); Assert.AreEqual(1, Search(q)); }
static Query createQuery2(string tag, string value, string value2) { Query query1 = new TermQuery(new Term(tag, value)); Query query2 = new TermQuery(new Term(tag, value2)); BooleanQuery query = new BooleanQuery(); query.Add(query1, Occur.MUST); query.Add(query2, Occur.MUST); return query; }
public virtual void TestFlat() { BooleanQuery q = new BooleanQuery(); q.Add(new BooleanClause(T1, BooleanClause.Occur.SHOULD)); q.Add(new BooleanClause(T2, BooleanClause.Occur.SHOULD)); q.Add(new BooleanClause(C1, BooleanClause.Occur.SHOULD)); q.Add(new BooleanClause(C2, BooleanClause.Occur.SHOULD)); Assert.AreEqual(1, Search(q)); }
private void AddItemToIndex(AbstactDocument doc, IndexWriter writer) { var query = new BooleanQuery(); query.Add(new TermQuery(new Term("Id", doc.Id.ToString())), Occur.MUST); query.Add(new TermQuery(new Term("Type", doc.Type)), Occur.MUST); writer.DeleteDocuments(query); writer.AddDocument(doc.Document); }
public virtual void TestBQ11() { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term(FIELD, "w1")), BooleanClause.Occur.SHOULD); TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w1")); boostedQuery.Boost = 1000; query.Add(boostedQuery, BooleanClause.Occur.SHOULD); Qtest(query, new int[] { 0, 1, 2, 3 }); }
/// <summary> Add a clause to a boolean query.</summary> private static void Add(BooleanQuery q, System.String k, System.String v, float boost) { Query tq = new TermQuery(new Term(k, v)); tq.SetBoost(boost); q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD)); }
public void TestBooleanQuerySerialization() { Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery(); lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Occur.MUST); System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); #pragma warning disable SYSLIB0011 // Type or member is obsolete (BinaryFormatter) bf.Serialize(ms, lucQuery); ms.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms); #pragma warning restore SYSLIB0011 // Type or member is obsolete (BinaryFormatter) ms.Close(); Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization"); using var reader = DirectoryReader.Open(dir); //Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); int hitCount = searcher.Search(lucQuery, 20).TotalHits; //searcher.Close(); searcher = new Lucene.Net.Search.IndexSearcher(reader); int hitCount2 = searcher.Search(lucQuery2, 20).TotalHits; Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts"); }
public void TestBooleanQuerySerialization() { Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery(); lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Occur.MUST); System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); bf.Serialize(ms, lucQuery); ms.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms); ms.Close(); Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization"); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount = searcher.Search(lucQuery, 20).TotalHits; searcher.Close(); searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount2 = searcher.Search(lucQuery2, 20).TotalHits; Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts"); }
public void Test_Util_Parameter() { Lucene.Net.Search.BooleanQuery queryPreSerialized = new Lucene.Net.Search.BooleanQuery(); queryPreSerialized.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("country", "Russia")), Occur.MUST); queryPreSerialized.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("country", "France")), Occur.MUST); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, queryPreSerialized); //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Search.BooleanQuery queryPostSerialized = (Lucene.Net.Search.BooleanQuery)serializer.Deserialize(memoryStream); memoryStream.Close(); Assert.AreEqual(queryPreSerialized, queryPostSerialized, "See the issue: LUCENENET-170"); }
/// <summary> /// Adds the content sub query. /// </summary> /// <param name="query">The boolean query.</param> /// <param name="key">The field key.</param> /// <param name="value">The field value.</param> /// <param name="matchVariant">The match variant.</param> /// <param name="condition">The condition.</param> /// <param name="isFirst">if set to <c>true</c> [is first].</param> private void AddContentSubQuery(LuceneSearch.BooleanQuery query, string key, string value, MatchVariant matchVariant, QueryCondition condition) { if (matchVariant == MatchVariant.NotEquals) { query.Add(new LuceneSearch.TermQuery(new Term(key, value)), LuceneSearch.Occur.MUST_NOT); return; } LuceneSearch.Occur occurrence = this.GetOccur(condition); LuceneSearch.TermRangeQuery rangeQuery = this.GetRangeQuery(key, value, matchVariant); if (rangeQuery != null) { query.Add(rangeQuery, occurrence); return; } string[] keywords = value.Split(' '); if (keywords.Length > 1) { LuceneSearch.PhraseQuery phraseQuery = new Lucene.Net.Search.PhraseQuery(); foreach (string keyword in keywords) { phraseQuery.Add(new Term(key, keyword)); } query.Add(phraseQuery, occurrence); } else if (matchVariant == MatchVariant.Like) { query.Add(new LuceneSearch.WildcardQuery(new Term(key, value + "*")), occurrence); } else { query.Add(new LuceneSearch.TermQuery(new Term(key, value)), occurrence); } }
/// <summary> /// Builds the result query. /// </summary> /// <returns>returns Query object</returns> public Lucene.Net.Search.BooleanQuery BuildResultQuery() { this.AddSearchRoot(this.resultQuery, this.internalQuery.SearchRoot); if (this.internalQuery.FirstNode != null) { Lucene.Net.Search.BooleanQuery condition = this.resultQuery; this.BuildQuery(new Lucene.Net.Search.BooleanQuery(), this.internalQuery.FirstNode); condition.Add(this.resultQuery, Lucene.Net.Search.Occur.MUST); this.resultQuery = condition; } return(this.resultQuery); }
/// <summary> Parses a query, searching on the fields specified. Use this if you need /// to specify certain fields as required, and others as prohibited. /// <p/> /// Usage: /// <code> /// String[] query = {"query1", "query2", "query3"}; /// String[] fields = {"filename", "contents", "description"}; /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, /// BooleanClause.Occur.MUST, /// BooleanClause.Occur.MUST_NOT}; /// MultiFieldQueryParser.parse(query, fields, flags, analyzer); /// </code> /// <p/> /// The code above would construct a query: /// /// <code> /// (filename:query1) +(contents:query2) -(description:query3) /// </code> /// /// </summary> /// <param name="matchVersion">Lucene version to match; this is passed through to /// QueryParser. /// </param> /// <param name="queries">Queries string to parse /// </param> /// <param name="fields">Fields to search on /// </param> /// <param name="flags">Flags describing the fields /// </param> /// <param name="analyzer">Analyzer to use /// </param> /// <throws> ParseException </throws> /// <summary> if query parsing fails /// </summary> /// <throws> IllegalArgumentException </throws> /// <summary> if the length of the queries, fields, and flags array differ /// </summary> public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, Occur[] flags, Analyzer analyzer) { if (!(queries.Length == fields.Length && queries.Length == flags.Length)) { throw new System.ArgumentException("queries, fields, and flags array have have different length"); } BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.Length; i++) { QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.Parse(queries[i]); if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0)) { bQuery.Add(q, flags[i]); } } return(bQuery); }
/// <summary> Parses a query, searching on the fields specified. Use this if you need /// to specify certain fields as required, and others as prohibited. /// <p/> /// Uasge: /// <code> /// String[] fields = {"filename", "contents", "description"}; /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, /// BooleanClause.Occur.MUST, /// BooleanClause.Occur.MUST_NOT}; /// MultiFieldQueryParser.parse("query", fields, flags, analyzer); /// </code> /// <p/> /// The code above would construct a query: /// /// <code> /// (filename:query) +(contents:query) -(description:query) /// </code> /// /// </summary> /// <param name="matchVersion">Lucene version to match; this is passed through to /// QueryParser. /// </param> /// <param name="query">Query string to parse /// </param> /// <param name="fields">Fields to search on /// </param> /// <param name="flags">Flags describing the fields /// </param> /// <param name="analyzer">Analyzer to use /// </param> /// <throws> ParseException </throws> /// <summary> if query parsing fails /// </summary> /// <throws> IllegalArgumentException </throws> /// <summary> if the length of the fields array differs from the length of /// the flags array /// </summary> public static Query Parse(Version matchVersion, string query, string[] fields, Occur[] flags, Analyzer analyzer) { if (fields.Length != flags.Length) { throw new System.ArgumentException("fields.length != flags.length"); } BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.Length; i++) { QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.Parse(query); if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0)) { bQuery.Add(q, flags[i]); } } return(bQuery); }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(PriorityQueue <object[]> q) { BooleanQuery query = new BooleanQuery(); System.Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { System.Object[] ar = (System.Object[])cur; TermQuery tq = new TermQuery(new Term((System.String)ar[1], (System.String)ar[0])); if (boost) { if (qterms == 0) { bestScore = (float)((System.Single)ar[2]); } float myScore = (float)((System.Single)ar[2]); tq.Boost = myScore / bestScore; } try { query.Add(tq, Occur.SHOULD); } catch (BooleanQuery.TooManyClauses) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return(query); }
/// <summary> /// Adds the serch root. /// </summary> /// <param name="query">The query.</param> /// <param name="searchRoot">The search root.</param> protected virtual void AddSearchRoot(LuceneSearch.BooleanQuery query, string searchRoot) { if (!string.IsNullOrEmpty(searchRoot)) { if (ID.IsID(searchRoot)) { searchRoot = this.GetItemPath(new ID(searchRoot)); } else { Item rootItem = this.Database.SelectSingleItem(searchRoot); if (rootItem != null) { searchRoot = this.GetItemPath(rootItem.ID); } } query.Add(new LuceneSearch.TermQuery(new Term(Sitecore.Search.BuiltinFields.Path, searchRoot)), LuceneSearch.Occur.MUST); } }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(Lucene.Net.Util.PriorityQueue q) { Lucene.Net.Search.BooleanQuery query = new Lucene.Net.Search.BooleanQuery(); Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { PQRecord ar = (PQRecord)cur; Lucene.Net.Search.TermQuery tq = new Lucene.Net.Search.TermQuery(new Term(ar.topField, ar.word)); if (boost) { if (qterms == 0) { bestScore = ar.score; } float myScore = ar.score; tq.SetBoost(myScore / bestScore); } try { query.Add(tq, Lucene.Net.Search.BooleanClause.Occur.SHOULD); } catch (Lucene.Net.Search.BooleanQuery.TooManyClauses) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return(query); }
//////////////////////////////////////////////////////////////// // // Special logic for handling our set of required queries // // This is the easy case: we just combine all of the queries // into one big BooleanQuery. private static BetterBitArray DoRequiredQueries(LNS.IndexSearcher primary_searcher, ArrayList primary_queries, BetterBitArray primary_whitelist) { LNS.BooleanQuery combined_query; combined_query = new LNS.BooleanQuery(); foreach (LNS.Query query in primary_queries) { combined_query.Add(query, LNS.BooleanClause.Occur.MUST); } LuceneBitArray matches; matches = new LuceneBitArray(primary_searcher, combined_query); if (primary_whitelist != null) { matches.And(primary_whitelist); } return(matches); }
public virtual void TestRandomQueries() { string[] vals = new string[] { "w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz" }; int tot = 0; BooleanQuery q1 = null; try { // increase number of iterations for more complete testing int num = AtLeast(20); for (int i = 0; i < num; i++) { int level = Random.Next(3); q1 = RandBoolQuery(new Random(Random.Next()), Random.NextBoolean(), level, field, vals, null); // Can't sort by relevance since floating point numbers may not quite // match up. Sort sort = Sort.INDEXORDER; QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q1, Searcher); // baseline sim try { // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop. Searcher.Similarity = BigSearcher.Similarity; // random sim QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q1, Searcher); } finally { Searcher.Similarity = new DefaultSimilarity(); // restore } TopFieldCollector collector = TopFieldCollector.Create(sort, 1000, false, true, true, true); Searcher.Search(q1, null, collector); ScoreDoc[] hits1 = collector.GetTopDocs().ScoreDocs; collector = TopFieldCollector.Create(sort, 1000, false, true, true, false); Searcher.Search(q1, null, collector); ScoreDoc[] hits2 = collector.GetTopDocs().ScoreDocs; tot += hits2.Length; CheckHits.CheckEqual(q1, hits1, hits2); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, Occur.SHOULD); q3.Add(new PrefixQuery(new Term("field2", "b")), Occur.SHOULD); TopDocs hits4 = BigSearcher.Search(q3, 1); Assert.AreEqual(MulFactor * collector.TotalHits + NUM_EXTRA_DOCS / 2, hits4.TotalHits); } } catch (Exception) { // For easier debugging Console.WriteLine("failed query: " + q1); throw; } // System.out.println("Total hits:"+tot); }
/// <summary> /// Adds the simple query. /// </summary> /// <param name="query">The boolean query.</param> /// <param name="key">The field key.</param> /// <param name="value">The field value.</param> /// <param name="condition">The condition.</param> /// <param name="isFirst">if set to <c>true</c> [is first].</param> private void AddIdQuery(LuceneSearch.BooleanQuery query, string key, string value, QueryCondition condition) { value = this.GetItemPath(new ID(value)); LuceneSearch.Occur occurrence = this.GetOccur(condition); query.Add(new LuceneSearch.TermQuery(new Term(key, value)), occurrence); }
public virtual void TestPhraseQueryInConjunctionScorer() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "foobar", Field.Store.YES)); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), phraseQuery, searcher, Similarity); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(Random(), termQuery, searcher, Similarity); reader.Dispose(); writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE)); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES)); writer.AddDocument(doc); reader = writer.Reader; writer.Dispose(); searcher = NewSearcher(reader); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), booleanQuery, searcher, Similarity); reader.Dispose(); directory.Dispose(); }
// Random rnd is passed in so that the exact same random query may be created // more than once. public static BooleanQuery RandBoolQuery(Random rnd, bool allowMust, int level, string field, string[] vals, Callback cb) { BooleanQuery current = new BooleanQuery(rnd.Next() < 0); for (int i = 0; i < rnd.Next(vals.Length) + 1; i++) { int qType = 0; // term query if (level > 0) { qType = rnd.Next(10); } Query q; if (qType < 3) { q = new TermQuery(new Term(field, vals[rnd.Next(vals.Length)])); } else if (qType < 4) { Term t1 = new Term(field, vals[rnd.Next(vals.Length)]); Term t2 = new Term(field, vals[rnd.Next(vals.Length)]); PhraseQuery pq = new PhraseQuery(); pq.Add(t1); pq.Add(t2); pq.Slop = 10; // increase possibility of matching q = pq; } else if (qType < 7) { q = new WildcardQuery(new Term(field, "w*")); } else { q = RandBoolQuery(rnd, allowMust, level - 1, field, vals, cb); } int r = rnd.Next(10); Occur occur; if (r < 2) { occur = Occur.MUST_NOT; } else if (r < 5) { if (allowMust) { occur = Occur.MUST; } else { occur = Occur.SHOULD; } } else { occur = Occur.SHOULD; } current.Add(q, occur); } if (cb != null) { cb.PostCreate(current); } return(current); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); iw.SetMaxBufferedDocs(2); // force multi-segment AddDoc("one", iw, 1f); AddDoc("two", iw, 20f); AddDoc("three four", iw, 300f); iw.Close(); IndexReader ir = IndexReader.Open((Directory)dir, false, null); IndexSearcher is_Renamed = new IndexSearcher(ir); ScoreDoc[] hits; // assert with norms scoring turned off hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc, null).Get("key", null), "one"); Assert.AreEqual(ir.Document(hits[1].Doc, null).Get("key", null), "two"); Assert.AreEqual(ir.Document(hits[2].Doc, null).Get("key", null), "three four"); // assert with norms scoring turned on MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc, null).Get("key", null), "three four"); Assert.AreEqual(ir.Document(hits[1].Doc, null).Get("key", null), "two"); Assert.AreEqual(ir.Document(hits[2].Doc, null).Get("key", null), "one"); // change norm & retest ir.SetNorm(0, "key", 400f, null); normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc, null).Get("key", null), "one"); Assert.AreEqual(ir.Document(hits[1].Doc, null).Get("key", null), "three four"); Assert.AreEqual(ir.Document(hits[2].Doc, null).Get("key", null), "two"); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), Occur.MUST); bq.Add(new MatchAllDocsQuery(), Occur.MUST); hits = is_Renamed.Search(bq, null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), Occur.MUST); hits = is_Renamed.Search(bq, null, 1000, null).ScoreDocs; Assert.AreEqual(1, hits.Length); // delete a document: is_Renamed.IndexReader.DeleteDocument(0, null); hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "key", analyzer); hits = is_Renamed.Search(qp.Parse(new MatchAllDocsQuery().ToString()), null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() with non default boost Query maq = new MatchAllDocsQuery(); maq.Boost = 2.3f; Query pq = qp.Parse(maq.ToString()); hits = is_Renamed.Search(pq, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); is_Renamed.Close(); ir.Close(); dir.Close(); }
// Return all directories with name public ICollection GetAllDirectoryNameInfo(string name) { // First we assemble a query to find all of the directories. string field_name; field_name = PropertyToFieldName(PropertyType.Keyword, Property.IsDirectoryPropKey); LNS.Query isdir_query = new LNS.TermQuery(new Term(field_name, "true")); LNS.Query query = null; if (name == null) { query = isdir_query; } else { string dirname_field; dirname_field = PropertyToFieldName(PropertyType.Text, Property.TextFilenamePropKey); LNS.Query dirname_query; dirname_query = LuceneCommon.StringToQuery(dirname_field, name, null); LNS.BooleanQuery bool_query = new LNS.BooleanQuery(); bool_query.Add(isdir_query, LNS.BooleanClause.Occur.MUST); bool_query.Add(dirname_query, LNS.BooleanClause.Occur.MUST); query = bool_query; } // Then we actually run the query LNS.IndexSearcher searcher; //searcher = new LNS.IndexSearcher (SecondaryStore); searcher = LuceneCommon.GetSearcher(SecondaryStore); BetterBitArray matches; matches = new BetterBitArray(searcher.MaxDoc()); BitArrayHitCollector collector; collector = new BitArrayHitCollector(matches); searcher.Search(query, null, collector); // Finally we pull all of the matching documents, // convert them to NameInfo, and store them in a list. ArrayList match_list = new ArrayList(); int i = 0; while (i < matches.Count) { i = matches.GetNextTrueIndex(i); if (i >= matches.Count) { break; } Document doc; doc = searcher.Doc(i, fields_nameinfo); NameInfo info; info = DocumentToNameInfo(doc); match_list.Add(info); ++i; } LuceneCommon.ReleaseSearcher(searcher); return(match_list); }
public virtual void TestBS2DisjunctionNextVsAdvance() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); int numDocs = AtLeast(300); for (int docUpto = 0; docUpto < numDocs; docUpto++) { string contents = "a"; if (Random.Next(20) <= 16) { contents += " b"; } if (Random.Next(20) <= 8) { contents += " c"; } if (Random.Next(20) <= 4) { contents += " d"; } if (Random.Next(20) <= 2) { contents += " e"; } if (Random.Next(20) <= 1) { contents += " f"; } Document doc = new Document(); doc.Add(new TextField("field", contents, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); w.Dispose(); for (int iter = 0; iter < 10 * RandomMultiplier; iter++) { if (Verbose) { Console.WriteLine("iter=" + iter); } IList <string> terms = new JCG.List <string> { "a", "b", "c", "d", "e", "f" }; int numTerms = TestUtil.NextInt32(Random, 1, terms.Count); while (terms.Count > numTerms) { terms.RemoveAt(Random.Next(terms.Count)); } if (Verbose) { Console.WriteLine(" terms=" + terms); } BooleanQuery q = new BooleanQuery(); foreach (string term in terms) { q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD)); } Weight weight = s.CreateNormalizedWeight(q); Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null); // First pass: just use .NextDoc() to gather all hits IList <ScoreDoc> hits = new JCG.List <ScoreDoc>(); while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore())); } if (Verbose) { Console.WriteLine(" " + hits.Count + " hits"); } // Now, randomly next/advance through the list and // verify exact match: for (int iter2 = 0; iter2 < 10; iter2++) { weight = s.CreateNormalizedWeight(q); scorer = weight.GetScorer(s.m_leafContexts[0], null); if (Verbose) { Console.WriteLine(" iter2=" + iter2); } int upto = -1; while (upto < hits.Count) { int nextUpto; int nextDoc; int left = hits.Count - upto; if (left == 1 || Random.nextBoolean()) { // next nextUpto = 1 + upto; nextDoc = scorer.NextDoc(); } else { // advance int inc = TestUtil.NextInt32(Random, 1, left - 1); nextUpto = inc + upto; nextDoc = scorer.Advance(hits[nextUpto].Doc); } if (nextUpto == hits.Count) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc); } else { ScoreDoc hit = hits[nextUpto]; Assert.AreEqual(hit.Doc, nextDoc); // LUCENENET: For some weird reason, on x86 in .NET Framework (optimizations enabled), using == (as they did in Lucene) doesn't work with optimizations enabled, but using AreEqual with epsilon of 0f does. // Test for precise float equality: Assert.AreEqual(hit.Score, scorer.GetScore(), 0f, "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore()); } upto = nextUpto; } } } r.Dispose(); d.Dispose(); }
public BooleanQuery GetCommQuery(string changecommBody, string changecommGroup, string changecommApps, string changecommCI) { Lucene.Net.Analysis.Analyzer commsAnalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Lucene.Net.Search.BooleanQuery.MaxClauseCount = 25000; TextReader textReadCommBody = new StringReader(changecommBody); TextReader textReadCommGroup = new StringReader(changecommGroup); TextReader textReadCommApps = new StringReader(changecommApps); TextReader textReadCommCI = new StringReader(changecommCI); Lucene.Net.Analysis.TokenStream tokenizedCommBody = commsAnalyzer.TokenStream(changecommBody, textReadCommBody); Lucene.Net.Analysis.TokenStream tokenizedCommGroup = commsAnalyzer.TokenStream(changecommGroup, textReadCommGroup); Lucene.Net.Analysis.TokenStream tokenizedCommApps = commsAnalyzer.TokenStream(changecommApps, textReadCommApps); Lucene.Net.Analysis.TokenStream tokenizedCommCI = commsAnalyzer.TokenStream(changecommCI, textReadCommCI); Lucene.Net.Search.BooleanQuery query1 = new Lucene.Net.Search.BooleanQuery(); try { int tokenCount = 0; tokenizedCommBody.Reset(); var termAttrText = tokenizedCommBody.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); while (tokenizedCommBody.IncrementToken()) { tokenCount++; string Term = termAttrText.Term; query1.Add(new Lucene.Net.Search.TermQuery(new Term("change_description", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery query2 = new Lucene.Net.Search.BooleanQuery(); try { tokenizedCommGroup.Reset(); var termAttrTicker = tokenizedCommGroup.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); int tokenCount = 0; while (tokenizedCommGroup.IncrementToken()) { tokenCount++; string Term = termAttrTicker.Term; query2.Add(new Lucene.Net.Search.TermQuery(new Term("change_group", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery query3 = new Lucene.Net.Search.BooleanQuery(); try { tokenizedCommApps.Reset(); var termAttrTicker = tokenizedCommApps.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); int tokenCount = 0; while (tokenizedCommApps.IncrementToken()) { tokenCount++; string Term = termAttrTicker.Term; query3.Add(new Lucene.Net.Search.TermQuery(new Term("application", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery query4 = new Lucene.Net.Search.BooleanQuery(); try { tokenizedCommCI.Reset(); var termAttrTicker = tokenizedCommCI.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); int tokenCount = 0; while (tokenizedCommCI.IncrementToken()) { tokenCount++; string Term = termAttrTicker.Term; query4.Add(new Lucene.Net.Search.TermQuery(new Term("change_CI", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery comQuery = new Lucene.Net.Search.BooleanQuery(); query4.Boost = 5; query3.MinimumNumberShouldMatch = 1; comQuery.Add(query1, Lucene.Net.Search.Occur.SHOULD); comQuery.Add(query2, Lucene.Net.Search.Occur.SHOULD); comQuery.Add(query3, Lucene.Net.Search.Occur.SHOULD); comQuery.Add(query4, Lucene.Net.Search.Occur.SHOULD); return(comQuery); }
public override Query Rewrite(IndexReader reader) { if (!termLongEnough) { // can only match if it's exact return(new TermQuery(Term)); } int maxSize = BooleanQuery.MaxClauseCount; // TODO: Java uses a PriorityQueue. Using Linq, we can emulate it, // however it's considerable slower than the java counterpart. // this should be a temporary thing, fixed before release SortedList <ScoreTerm, ScoreTerm> stQueue = new SortedList <ScoreTerm, ScoreTerm>(); FilteredTermEnum enumerator = GetEnum(reader); try { ScoreTerm st = new ScoreTerm(); do { Term t = enumerator.Term; if (t == null) { break; } float score = enumerator.Difference(); //ignore uncompetetive hits if (stQueue.Count >= maxSize && score <= stQueue.Keys.First().score) { continue; } // add new entry in PQ st.term = t; st.score = score; stQueue.Add(st, st); // possibly drop entries from queue if (stQueue.Count > maxSize) { st = stQueue.Keys.First(); stQueue.Remove(st); } else { st = new ScoreTerm(); } }while (enumerator.Next()); } finally { enumerator.Close(); } BooleanQuery query = new BooleanQuery(true); foreach (ScoreTerm st in stQueue.Keys) { TermQuery tq = new TermQuery(st.term); // found a match tq.Boost = Boost * st.score; // set the boost query.Add(tq, Occur.SHOULD); // add to query } return(query); }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.MergeFactor = 2; writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.OmitTermFreqAndPositions = true; d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir, true); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
public virtual void TestNullOrSubScorer() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(NewTextField("field", "a b c d", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); // this test relies upon coord being the default implementation, // otherwise scores are different! s.Similarity = new DefaultSimilarity(); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5F, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1 / 3F), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2 / 3F), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestExplain() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); Rescorer rescorer = new QueryRescorerAnonymousInnerClassHelper2(this, pq); TopDocs hits2 = rescorer.Rescore(searcher, hits, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); int docID = hits2.ScoreDocs[0].Doc; Explanation explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID); string s = explain.ToString(); Assert.IsTrue(s.Contains("TestQueryRescorer+")); Assert.IsTrue(s.Contains("combined first and second pass score")); Assert.IsTrue(s.Contains("first pass score")); Assert.IsTrue(s.Contains("= second pass score")); Assert.AreEqual(hits2.ScoreDocs[0].Score, explain.Value, 0.0f); docID = hits2.ScoreDocs[1].Doc; explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID); s = explain.ToString(); Assert.IsTrue(s.Contains("TestQueryRescorer+")); Assert.IsTrue(s.Contains("combined first and second pass score")); Assert.IsTrue(s.Contains("first pass score")); Assert.IsTrue(s.Contains("no second pass score")); Assert.IsFalse(s.Contains("= second pass score")); Assert.IsTrue(s.Contains("NON-MATCH")); Assert.IsTrue(Math.Abs(hits2.ScoreDocs[1].Score - explain.Value) < 0.0000001f); r.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); searcher.Similarity = new DefaultSimilarity(); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(); pq.Slop = 5; pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true); TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits3.TotalHits); Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id")); r.Dispose(); dir.Dispose(); }
/// <summary> /// 搜索内容 /// </summary> /// <param name="word">搜索关键字</param> /// <param name="pagesize">每页显示记录数</param> /// <param name="pageindex">当前页码</param> /// <returns></returns> public static SearchResult SearchContent(string modcode, string word, int pagesize, int pageindex, string searchparam1, string searchparam2, string searchparam3) { SearchResult searchResult = new SearchResult(); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); IndexSearcher searcher = new IndexSearcher(directory, true); var analyzer = new PanGuAnalyzer(); //初始化MultiFieldQueryParser以便同时查询多列 Lucene.Net.QueryParsers.MultiFieldQueryParser parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "title", "content" }, analyzer); Lucene.Net.Search.Query query = parser.Parse(word);//初始化Query parser.DefaultOperator = QueryParser.AND_OPERATOR; Lucene.Net.Search.BooleanQuery boolQuery = new Lucene.Net.Search.BooleanQuery(); boolQuery.Add(query, Occur.MUST); if (!string.IsNullOrEmpty(modcode)) { PhraseQuery queryModCode = new PhraseQuery(); queryModCode.Add(new Term("modcode", modcode)); boolQuery.Add(queryModCode, Occur.MUST); } if (!string.IsNullOrEmpty(searchparam1)) { WildcardQuery query1 = new WildcardQuery(new Term("searchparam1", "*" + searchparam1 + "*")); boolQuery.Add(query1, Occur.MUST); } if (!string.IsNullOrEmpty(searchparam2)) { WildcardQuery query1 = new WildcardQuery(new Term("searchparam2", "*" + searchparam2 + "*")); boolQuery.Add(query1, Occur.MUST); } if (!string.IsNullOrEmpty(searchparam3)) { WildcardQuery query1 = new WildcardQuery(new Term("searchparam3", "*" + searchparam3 + "*")); boolQuery.Add(query1, Occur.MUST); } Sort sort = new Sort(new SortField("date", SortField.STRING, true)); var result = searcher.Search(boolQuery, null, 1000, sort); if (result.TotalHits == 0) { searchResult.count = 0; } else { searchResult.count = result.TotalHits; int startNum = 0, endNum = result.TotalHits; if (pagesize > 0) { //当pagesize>0时使用分页功能 startNum = (pageindex - 1) * pagesize; endNum = startNum + pagesize; } ScoreDoc[] docs = result.ScoreDocs; List <JObject> dataList = new List <JObject>(); for (int i = 0; i < docs.Length; i++) { if (i < startNum) { continue; } if (i >= endNum) { break; } Document doc = searcher.Doc(docs[i].Doc); string id = doc.Get("id").ToString(); string title = doc.Get("title").ToString(); string content = doc.Get("content").ToString(); string date = doc.Get("date").ToString(); string param = doc.Get("param").ToString(); string mcode = doc.Get("modcode").ToString(); string param1 = doc.Get("searchparam1").ToString(); string param2 = doc.Get("searchparam2").ToString(); string param3 = doc.Get("searchparam3").ToString(); JObject obj = new JObject(); obj["id"] = id; //创建HTMLFormatter,参数为高亮单词的前后缀 string highLightTag = Util.GetAppSetting("HighLightTag", "<font color=\"red\">|</font>"); string[] tarArr = highLightTag.Split('|'); var simpleHTMLFormatter = new SimpleHTMLFormatter(tarArr[0], tarArr[1]); //创建 Highlighter ,输入HTMLFormatter 和 盘古分词对象Semgent var highlighter = new Highlighter(simpleHTMLFormatter, new PanGu.Segment()); //设置每个摘要段的字符数 int highlightFragmentSize = Util.GetAppSetting("HighlightFragmentSize", "100").ToInt(); highlighter.FragmentSize = highlightFragmentSize; //获取最匹配的摘要段 String bodyPreview = highlighter.GetBestFragment(word, content); string newTitle = highlighter.GetBestFragment(word, title); if (!string.IsNullOrEmpty(newTitle)) { title = newTitle; } obj["title"] = title; obj["content"] = bodyPreview; obj["date"] = date; obj["param"] = param; obj["modcode"] = mcode; obj["searchparam1"] = param1; obj["searchparam2"] = param2; obj["searchparam3"] = param3; dataList.Add(obj); } searchResult.data = dataList; } analyzer.Close(); searcher.Dispose(); directory.Dispose(); return(searchResult); }
public virtual void TestDeMorgan() { Directory dir1 = NewDirectory(); RandomIndexWriter iw1 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir1); Document doc1 = new Document(); doc1.Add(NewTextField("field", "foo bar", Field.Store.NO)); iw1.AddDocument(doc1); IndexReader reader1 = iw1.GetReader(); iw1.Dispose(); Directory dir2 = NewDirectory(); RandomIndexWriter iw2 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir2); Document doc2 = new Document(); doc2.Add(NewTextField("field", "foo baz", Field.Store.NO)); iw2.AddDocument(doc2); IndexReader reader2 = iw2.GetReader(); iw2.Dispose(); BooleanQuery query = new BooleanQuery(); // Query: +foo -ba* query.Add(new TermQuery(new Term("field", "foo")), Occur.MUST); WildcardQuery wildcardQuery = new WildcardQuery(new Term("field", "ba*")); wildcardQuery.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query.Add(wildcardQuery, Occur.MUST_NOT); MultiReader multireader = new MultiReader(reader1, reader2); IndexSearcher searcher = NewSearcher(multireader); Assert.AreEqual(0, searcher.Search(query, 10).TotalHits); Task foo = new Task(TestDeMorgan); TaskScheduler es = TaskScheduler.Default; searcher = new IndexSearcher(multireader, es); if (Verbose) { Console.WriteLine("rewritten form: " + searcher.Rewrite(query)); } Assert.AreEqual(0, searcher.Search(query, 10).TotalHits); multireader.Dispose(); reader1.Dispose(); reader2.Dispose(); dir1.Dispose(); dir2.Dispose(); }
public virtual void TestPhraseQueryInConjunctionScorer() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); doc = new Document(); doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); writer.Optimize(null); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true, null); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(termQuery, searcher); searcher.Close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); doc = new Document(); doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); doc = new Document(); doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); doc = new Document(); doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); writer.Optimize(null); writer.Close(); searcher = new IndexSearcher(directory, true, null); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(booleanQuery, searcher); searcher.Close(); directory.Close(); }
public virtual void TestRandomStringSort() { Random random = new Random(Random.Next()); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif random, dir); bool allowDups = random.NextBoolean(); ISet <string> seen = new JCG.HashSet <string>(); int maxLength = TestUtil.NextInt32(random, 5, 100); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { Document doc = new Document(); // 10% of the time, the document is missing the value: BytesRef br; if (LuceneTestCase.Random.Next(10) != 7) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random, maxLength); } else { s = TestUtil.RandomUnicodeString(random, maxLength); } if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (Verbose) { Console.WriteLine(" " + numDocs + ": s=" + s); } br = new BytesRef(s); if (DefaultCodecSupportsDocValues) { doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } doc.Add(NewStringField("string", s, Field.Store.NO)); docValues.Add(br); } else { br = null; if (Verbose) { Console.WriteLine(" " + numDocs + ": <missing>"); } docValues.Add(null); if (DefaultCodecSupportsDocValues) { doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } } doc.Add(new StoredField("id", numDocs)); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.GetReader().Dispose(); } } IndexReader r = writer.GetReader(); writer.Dispose(); if (Verbose) { Console.WriteLine(" reader=" + r); } IndexSearcher idxS = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif r, false); int ITERS = AtLeast(100); for (int iter = 0; iter < ITERS; iter++) { bool reverse = random.NextBoolean(); TopFieldDocs hits; SortField sf; bool sortMissingLast; bool missingIsNull; if (DefaultCodecSupportsDocValues && random.NextBoolean()) { sf = new SortField("stringdv", SortFieldType.STRING, reverse); // Can only use sort missing if the DVFormat // supports docsWithField: sortMissingLast = DefaultCodecSupportsDocsWithField && Random.NextBoolean(); missingIsNull = DefaultCodecSupportsDocsWithField; } else { sf = new SortField("string", SortFieldType.STRING, reverse); sortMissingLast = Random.NextBoolean(); missingIsNull = true; } if (sortMissingLast) { sf.MissingValue = SortField.STRING_LAST; } Sort sort; if (random.NextBoolean()) { sort = new Sort(sf); } else { sort = new Sort(sf, SortField.FIELD_DOC); } int hitCount = TestUtil.NextInt32(random, 1, r.MaxDoc + 20); RandomFilter f = new RandomFilter(random, (float)random.NextDouble(), docValues); int queryType = random.Next(3); if (queryType == 0) { // force out of order BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; hits = idxS.Search(bq, f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else if (queryType == 1) { hits = idxS.Search(new ConstantScoreQuery(f), null, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else { hits = idxS.Search(new MatchAllDocsQuery(), f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter + " " + hits.TotalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort); } // Compute expected results: var expected = f.matchValues.ToList(); expected.Sort(Comparer <BytesRef> .Create((a, b) => { if (a == null) { if (b == null) { return(0); } if (sortMissingLast) { return(1); } else { return(-1); } } else if (b == null) { if (sortMissingLast) { return(-1); } else { return(1); } } else { return(a.CompareTo(b)); } })); if (reverse) { expected.Reverse(); } if (Verbose) { Console.WriteLine(" expected:"); for (int idx = 0; idx < expected.Count; idx++) { BytesRef br = expected[idx]; if (br == null && missingIsNull == false) { br = new BytesRef(); } Console.WriteLine(" " + idx + ": " + (br == null ? "<missing>" : br.Utf8ToString())); if (idx == hitCount - 1) { break; } } } if (Verbose) { Console.WriteLine(" actual:"); for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = (BytesRef)fd.Fields[0]; Console.WriteLine(" " + hitIDX + ": " + (br == null ? "<missing>" : br.Utf8ToString()) + " id=" + idxS.Doc(fd.Doc).Get("id")); } } for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = expected[hitIDX]; if (br == null && missingIsNull == false) { br = new BytesRef(); } // Normally, the old codecs (that don't support // docsWithField via doc values) will always return // an empty BytesRef for the missing case; however, // if all docs in a given segment were missing, in // that case it will return null! So we must map // null here, too: BytesRef br2 = (BytesRef)fd.Fields[0]; if (br2 == null && missingIsNull == false) { br2 = new BytesRef(); } Assert.AreEqual(br, br2, "hit=" + hitIDX + " has wrong sort value"); } } r.Dispose(); dir.Dispose(); }