public AppQuery WithCategory(int category) { if (category != 0) { Query query = new TermQuery(new Term("Category", category.ToString())); AddQuery(query, BooleanClause.Occur.MUST); } return this; }
public TermWeight(TermQuery enclosingInstance, Searcher searcher) { InitBlock(enclosingInstance); this.similarity = Enclosing_Instance.GetSimilarity(searcher); idfExp = similarity.IdfExplain(Enclosing_Instance.term, searcher); idf = idfExp.Idf; }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
public virtual void TestFieldCacheRangeFilterDoubles() { IndexReader reader = SignedIndexReader; IndexSearcher search = NewSearcher(reader); int numDocs = reader.NumDocs; double? minIdO = Convert.ToDouble(MinId + .5); double? medIdO = Convert.ToDouble((float)minIdO + ((MaxId - MinId)) / 2.0); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", minIdO, medIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs / 2, result.Length, "find all"); int count = 0; result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, medIdO, F, T), numDocs).ScoreDocs; count += result.Length; result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", medIdO, null, F, F), numDocs).ScoreDocs; count += result.Length; Assert.AreEqual(numDocs, count, "sum of two concenatted ranges"); result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, null, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", Convert.ToDouble(double.PositiveInfinity), null, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, Convert.ToDouble(double.NegativeInfinity), F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); }
public virtual void TestReverseDateSort() { IndexSearcher searcher = NewSearcher(Reader); Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.Type_e.STRING, true)); Query query = new TermQuery(new Term(TEXT_FIELD, "document")); // Execute the search and process the search results. string[] actualOrder = new string[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort).ScoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].Doc); string text = document.Get(TEXT_FIELD); actualOrder[i] = text; } // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). string[] expectedOrder = new string[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(Arrays.AsList(expectedOrder), Arrays.AsList(actualOrder)); }
public Task<SearchResultCollection> Search(string search) { return System.Threading.Tasks.Task.Run(() => { var src = new SearchResultCollection(); if (string.IsNullOrWhiteSpace(search)) return src; try { var parser = new QueryParser(Version.LUCENE_30,"All", analyzer); Query q = new TermQuery(new Term("All", search)); using (var indexSearcher = new IndexSearcher(directory, true)) { Query query = parser.Parse(search); TopDocs result = indexSearcher.Search(query, 50); foreach (ScoreDoc h in result.ScoreDocs) { Document doc = indexSearcher.Doc(h.Doc); string id = doc.Get("id"); BaseContent value; if (LookupTable.TryGetValue(id, out value)) src.Add(new SearchResult {Relevance = h.Score, Content = value}); } } } catch (Exception e) { Logger.Log("DataServer","Error lucene search",e.Message,Logger.Level.Error); } return src; }); }
/// <summary> Add a clause to a boolean query.</summary> private static void Add(BooleanQuery q, System.String k, System.String v, float boost) { Query tq = new TermQuery(new Term(k, v)); tq.SetBoost(boost); q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD)); }
public static BooleanQuery ParseRange(string fieldName, long lowerValue, long upperValue, bool inclusive) { if (lowerValue > upperValue) { return null; } //var rangeQuery = new BooleanQuery(); var dateQuery = new BooleanQuery(); BooleanQuery.SetMaxClauseCount(int.MaxValue); for (long i = lowerValue; i < upperValue; i++) { var term = new Term(fieldName, i.ToString()); var q = new TermQuery(term); dateQuery.Add(q, BooleanClause.Occur.SHOULD); } if (inclusive) { var term = new Term(fieldName, upperValue.ToString()); var q = new TermQuery(term); dateQuery.Add(q, BooleanClause.Occur.SHOULD); } //if (dateQuery.GetClauses() != null || dateQuery.GetClauses().Length != 0) //{ // rangeQuery.Add(dateQuery, BooleanClause.Occur.MUST); //} return dateQuery; }
public static LuceneResult MultiSearchBIMXchange(Dictionary<string,string> terms, int pageSize, int pageNumber) { var directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex")); var booleanQuery = new BooleanQuery(); foreach(var term in terms) { var query = new TermQuery(new Term(term.Key, term.Value)); booleanQuery.Add(query,BooleanClause.Occur.MUST); } var searcher = new IndexSearcher(directory, true); var topDocs = searcher.Search(booleanQuery, 10); var docs = new List<Document>(); var start = (pageNumber - 1) * pageSize; for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++) { var scoreDoc = topDocs.ScoreDocs[i]; var docId = scoreDoc.doc; var doc = searcher.Doc(docId); docs.Add(doc); } searcher.Close(); directory.Close(); var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits}; return result; }
public void AndExtension() { BooleanQuery originalQuery = new BooleanQuery(); BooleanQuery innerQuery = new BooleanQuery(); Term term = new Term("_name", "value1"); TermQuery termQuery1 = new TermQuery(term); innerQuery.Add(termQuery1, Occur.MUST); Term term2 = new Term("_name", "value2"); TermQuery termQuery2 = new TermQuery(term2); innerQuery.Add(termQuery2, Occur.MUST); originalQuery.Add(innerQuery, Occur.MUST); string queryString = originalQuery.ToString(); QueryBuilder builder = new QueryBuilder(); builder.And ( x => x.Term("_name", "value1"), x => x.Term("_name", "value2") ); Query replacementQuery = builder.Build(); string newQueryString = replacementQuery.ToString(); Assert.AreEqual(queryString, newQueryString); Console.Write(queryString); }
public virtual void Test() { Term allTerm = new Term(FIELD, "all"); TermQuery termQuery = new TermQuery(allTerm); Weight weight = IndexSearcher.CreateNormalizedWeight(termQuery); Assert.IsTrue(IndexSearcher.TopReaderContext is AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext)IndexSearcher.TopReaderContext; BulkScorer ts = weight.BulkScorer(context, true, ((AtomicReader)context.Reader()).LiveDocs); // we have 2 documents with the term all in them, one document for all the // other values IList<TestHit> docs = new List<TestHit>(); // must call next first ts.Score(new CollectorAnonymousInnerClassHelper(this, context, docs)); Assert.IsTrue(docs.Count == 2, "docs Size: " + docs.Count + " is not: " + 2); TestHit doc0 = docs[0]; TestHit doc5 = docs[1]; // The scores should be the same Assert.IsTrue(doc0.Score == doc5.Score, doc0.Score + " does not equal: " + doc5.Score); /* * Score should be (based on Default Sim.: All floats are approximate tf = 1 * numDocs = 6 docFreq(all) = 2 idf = ln(6/3) + 1 = 1.693147 idf ^ 2 = * 2.8667 boost = 1 lengthNorm = 1 //there is 1 term in every document coord * = 1 sumOfSquaredWeights = (idf * boost) ^ 2 = 1.693147 ^ 2 = 2.8667 * queryNorm = 1 / (sumOfSquaredWeights)^0.5 = 1 /(1.693147) = 0.590 * * score = 1 * 2.8667 * 1 * 1 * 0.590 = 1.69 */ Assert.IsTrue(doc0.Score == 1.6931472f, doc0.Score + " does not equal: " + 1.6931472f); }
public virtual void TestBasic() { // create a sort field and sort by it (reverse order) Query query = new TermQuery(new Term("body", "contents")); IndexReader r = searcher.IndexReader; // Just first pass query TopDocs hits = searcher.Search(query, 10); AreEqual(3, hits.TotalHits); AreEqual("3", r.Document(hits.ScoreDocs[0].Doc).Get("id")); AreEqual("1", r.Document(hits.ScoreDocs[1].Doc).Get("id")); AreEqual("2", r.Document(hits.ScoreDocs[2].Doc).Get("id")); // Now, rescore: Expression e = JavascriptCompiler.Compile("sqrt(_score) + ln(popularity)"); SimpleBindings bindings = new SimpleBindings(); bindings.Add(new SortField("popularity", SortField.Type_e.INT)); bindings.Add(new SortField("_score", SortField.Type_e.SCORE)); Rescorer rescorer = e.GetRescorer(bindings); hits = rescorer.Rescore(searcher, hits, 10); AreEqual(3, hits.TotalHits); AreEqual("2", r.Document(hits.ScoreDocs[0].Doc).Get("id")); AreEqual("1", r.Document(hits.ScoreDocs[1].Doc).Get("id")); AreEqual("3", r.Document(hits.ScoreDocs[2].Doc).Get("id")); string expl = rescorer.Explain(searcher, searcher.Explain(query, hits.ScoreDocs[0].Doc), hits.ScoreDocs[0].Doc).ToString(); // Confirm the explanation breaks out the individual // variables: IsTrue(expl.Contains("= variable \"popularity\"")); // Confirm the explanation includes first pass details: IsTrue(expl.Contains("= first pass score")); IsTrue(expl.Contains("body:contents in")); }
private IEnumerable<int> GetDocIDs(IndexReader reader, string groupByField, string group) { var query = new TermQuery(new Term(groupByField, group)); Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(query)); return filter.GetDocIdSet(reader).AsEnumerable().ToList(); }
public static Lucene.Net.Search.Query FilterQueryByClasses(IESI.ISet<System.Type> classesAndSubclasses, Lucene.Net.Search.Query luceneQuery) { // A query filter is more practical than a manual class filtering post query (esp on scrollable resultsets) // it also probably minimise the memory footprint if (classesAndSubclasses == null) { return luceneQuery; } BooleanQuery classFilter = new BooleanQuery(); // annihilate the scoring impact of DocumentBuilder.CLASS_FIELDNAME classFilter.SetBoost(0); foreach (System.Type clazz in classesAndSubclasses) { Term t = new Term(DocumentBuilder.CLASS_FIELDNAME, TypeHelper.LuceneTypeName(clazz)); TermQuery termQuery = new TermQuery(t); classFilter.Add(termQuery, BooleanClause.Occur.SHOULD); } BooleanQuery filteredQuery = new BooleanQuery(); filteredQuery.Add(luceneQuery, BooleanClause.Occur.MUST); filteredQuery.Add(classFilter, BooleanClause.Occur.MUST); return filteredQuery; }
public virtual void TestAfterClose() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); writer.SetInfoStream(infoStream, null); // create the index CreateIndexNoClose(false, "test", writer); IndexReader r = writer.GetReader(null); writer.Close(); _TestUtil.CheckIndex(dir1); // reader should remain usable even after IndexWriter is closed: Assert.AreEqual(100, r.NumDocs()); Query q = new TermQuery(new Term("indexname", "test")); Assert.AreEqual(100, new IndexSearcher(r).Search(q, 10, null).TotalHits); Assert.Throws <AlreadyClosedException>(() => r.Reopen(null), "failed to hit AlreadyClosedException"); r.Close(); dir1.Close(); }
private static Query ParseQuery(string searchTerm) { var fields = new Dictionary<string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 1.0f}, { "Description", 0.8f }, { "Author", 0.6f } }; var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); searchTerm = QueryParser.Escape(searchTerm).ToLowerInvariant(); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields); var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(1.5f); var disjunctionQuery = new BooleanQuery(); var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.7f); var exactIdQuery = new TermQuery(new Term("Id-Exact", searchTerm)); exactIdQuery.SetBoost(2.5f); foreach(var term in searchTerm.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { conjuctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.MUST); disjunctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*")); wildCardTermQuery.SetBoost(0.7f * field.Value); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } return conjuctionQuery.Combine(new Query[] { exactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); }
public virtual void TestAsterisk() { RAMDirectory indexStore = GetIndexStore("body", new System.String[]{"metal", "metals"}); IndexSearcher searcher = new IndexSearcher(indexStore); Query query1 = new TermQuery(new Term("body", "metal")); Query query2 = new WildcardQuery(new Term("body", "metal*")); Query query3 = new WildcardQuery(new Term("body", "m*tal")); Query query4 = new WildcardQuery(new Term("body", "m*tal*")); Query query5 = new WildcardQuery(new Term("body", "m*tals")); BooleanQuery query6 = new BooleanQuery(); query6.Add(query5, false, false); BooleanQuery query7 = new BooleanQuery(); query7.Add(query3, false, false); query7.Add(query5, false, false); // Queries do not automatically lower-case search terms: Query query8 = new WildcardQuery(new Term("body", "M*tal*")); AssertMatches(searcher, query1, 1); AssertMatches(searcher, query2, 2); AssertMatches(searcher, query3, 1); AssertMatches(searcher, query4, 2); AssertMatches(searcher, query5, 1); AssertMatches(searcher, query6, 1); AssertMatches(searcher, query7, 2); AssertMatches(searcher, query8, 0); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1.0 / 3), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2.0 / 3), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
private IQueryable<Package> SearchCore(SearchFilter searchFilter, out int totalHits) { int numRecords = searchFilter.Skip + searchFilter.Take; var searcher = new IndexSearcher(_directory, readOnly: true); var query = ParseQuery(searchFilter); // IF searching by relevance, boost scores by download count. if (searchFilter.SortProperty == SortProperty.Relevance) { var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT); query = new CustomScoreQuery(query, downloadCountBooster); } var filterTerm = searchFilter.IncludePrerelease ? "IsLatest" : "IsLatestStable"; var termQuery = new TermQuery(new Term(filterTerm, Boolean.TrueString)); var filter = new QueryWrapperFilter(termQuery); var results = searcher.Search(query, filter: filter, n: numRecords, sort: new Sort(GetSortField(searchFilter))); totalHits = results.totalHits; if (results.totalHits == 0 || searchFilter.CountOnly) { return Enumerable.Empty<Package>().AsQueryable(); } var packages = results.scoreDocs .Skip(searchFilter.Skip) .Select(sd => PackageFromDoc(searcher.Doc(sd.doc))) .ToList(); return packages.AsQueryable(); }
public override Query Rewrite(IndexReader reader) { BooleanQuery query = new BooleanQuery(true); TermEnum enumerator = reader.Terms(prefix); try { System.String prefixText = prefix.Text(); System.String prefixField = prefix.Field(); do { Term term = enumerator.Term(); #if !FRAMEWORK_1_1 if (term != null && term.Text().StartsWith(prefixText, StringComparison.Ordinal) && term.Field() == prefixField) #else if (term != null && term.Text().StartsWith(prefixText) && term.Field() == prefixField) #endif { TermQuery tq = new TermQuery(term); // found a match tq.SetBoost(GetBoost()); // set the boost query.Add(tq, BooleanClause.Occur.SHOULD); // add to query //System.out.println("added " + term); } else { break; } } while (enumerator.Next()); } finally { enumerator.Close(); } return query; }
public Result Search (string term, int count, int start) { try { term = term.ToLower (); Term htTerm = new Term ("hottext", term); Query qq1 = new FuzzyQuery (htTerm); Query qq2 = new TermQuery (htTerm); qq2.Boost = 10f; Query qq3 = new PrefixQuery (htTerm); qq3.Boost = 10f; DisjunctionMaxQuery q1 = new DisjunctionMaxQuery (0f); q1.Add (qq1); q1.Add (qq2); q1.Add (qq3); Query q2 = new TermQuery (new Term ("text", term)); q2.Boost = 3f; Query q3 = new TermQuery (new Term ("examples", term)); q3.Boost = 3f; DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f); q.Add (q1); q.Add (q2); q.Add (q3); TopDocs top = SearchInternal (q, count, start); Result r = new Result (term, searcher, top.ScoreDocs); Results.Add (r); return r; } catch (IOException) { Console.WriteLine ("No index in {0}", dir); return null; } }
void LUCENENET_100_ClientSearch() { try { Lucene.Net.Search.Searchable s = (Lucene.Net.Search.Searchable)Activator.GetObject(typeof(Lucene.Net.Search.Searchable), @"tcp://localhost:" + ANYPORT + "/Searcher"); Lucene.Net.Search.MultiSearcher searcher = new Lucene.Net.Search.MultiSearcher(new Lucene.Net.Search.Searchable[] { s }); Lucene.Net.Search.Query q = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field1", "moon")); Lucene.Net.Search.Sort sort = new Lucene.Net.Search.Sort(); sort.SetSort(new Lucene.Net.Search.SortField("field2", Lucene.Net.Search.SortField.INT)); Lucene.Net.Search.TopDocs h = searcher.Search(q, null, 100, sort); if (h.ScoreDocs.Length != 2) { LUCENENET_100_Exception = new SupportClassException("Test_Search_FieldDoc Error. "); } } catch (SupportClassException ex) { LUCENENET_100_Exception = ex; } catch (Exception ex) { Console.WriteLine(ex); } finally { LUCENENET_100_testFinished = true; } }
//END //this method creates document from an ObjectToIndex public void BuildIndex(FileToIndex file) { using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30)) { using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { //check if document exists, if true deletes existing var searchQuery = new TermQuery(new Term("Id", file.Id.ToString())); idxw.DeleteDocuments(searchQuery); //creation Document doc = new Document(); doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED)); //write the document to the index idxw.AddDocument(doc); //optimize and close the writer idxw.Commit(); idxw.Optimize(); } } }
public void TestTermSearch() { using (var dir = FSDirectory.Open(TestEnvironment.TestIndexDirectory)) using (var indexSearcher = new IndexSearcher(dir)) { var termSubjectAnt = new Term("subject", "ant"); var termQuerySubjectAnt = new TermQuery(termSubjectAnt); var topDocsSubjectAnt = indexSearcher.Search(termQuerySubjectAnt, 10); // title=Ant in Action // subject=apache ant build tool junit java development Assert.Equal(1, topDocsSubjectAnt.TotalHits); var termSubjectJUnit = new Term("subject", "junit"); var termQuerySubjectJUnit = new TermQuery(termSubjectJUnit); var topDocsSubjectJUnit = indexSearcher.Search(termQuerySubjectJUnit, 10); // ExplainResults(indexSearcher, termQuerySubjectJUnit, topDocsSubjectJUnit); // title=JUnit in Action, Second Edition // subject=junit unit testing mock objects // title=Ant in Action // subject=apache ant build tool junit java development Assert.Equal(2, topDocsSubjectJUnit.TotalHits); // Ants in Action, "JUnit in Action, Second Edition" } }
public IEnumerable<BeerReview> Get(string reviewText = null, int limit = 10, double minAroma = 0, double minAppearance = 0, double minOverall = 0, double minTaste = 0, double minPalate = 0) { var query = new BooleanQuery(); Query reviewTextQuery = new TermQuery(new Term("reviewText", reviewText)); Query reviewAppearanceQuery = NumericRangeQuery.NewDoubleRange("reviewAppearance", minAppearance, null, minInclusive: true, maxInclusive: true); Query reviewAromaQuery = NumericRangeQuery.NewDoubleRange("reviewAroma", minAroma, null, minInclusive: true, maxInclusive: true); Query reviewPalateQuery = NumericRangeQuery.NewDoubleRange("reviewPalate", minPalate, null, minInclusive: true, maxInclusive: true); Query reviewTasteQuery = NumericRangeQuery.NewDoubleRange("reviewTaste", minTaste, null, minInclusive: true, maxInclusive: true); Query reviewOverallQuery = NumericRangeQuery.NewDoubleRange("reviewOverall", minOverall, null, minInclusive: true, maxInclusive: true); if (reviewText != null) { query.Add(reviewTextQuery, Occur.MUST); } query.Add(reviewAppearanceQuery, Occur.MUST); query.Add(reviewAromaQuery, Occur.MUST); query.Add(reviewOverallQuery, Occur.MUST); query.Add(reviewPalateQuery, Occur.MUST); query.Add(reviewTasteQuery, Occur.MUST); var hits = indexSearcher.Search(query, limit); var beers = new List<BeerReview>(); for (int i = 0; i < hits.ScoreDocs.Count(); i++) { beers.Add(BeerReviewFromDoc(hits.ScoreDocs[i])); } return beers; }
private static IList<int> SearchCore(SearchFilter searchFilter, out int totalHits) { if (!Directory.Exists(LuceneCommon.IndexDirectory)) { totalHits = 0; return new int[0]; } SortField sortField = GetSortField(searchFilter); int numRecords = searchFilter.Skip + searchFilter.Take; using (var directory = new LuceneFileSystem(LuceneCommon.IndexDirectory)) { var searcher = new IndexSearcher(directory, readOnly: true); var query = ParseQuery(searchFilter); var filterTerm = searchFilter.IncludePrerelease ? "IsLatest" : "IsLatestStable"; var termQuery = new TermQuery(new Term(filterTerm, Boolean.TrueString)); Filter filter = new QueryWrapperFilter(termQuery); var results = searcher.Search(query, filter: filter, n: numRecords, sort: new Sort(sortField)); var keys = results.scoreDocs.Skip(searchFilter.Skip) .Select(c => ParseKey(searcher.Doc(c.doc).Get("Key"))) .ToList(); totalHits = results.totalHits; searcher.Close(); return keys; } }
public virtual void TestAsterisk() { Directory indexStore = GetIndexStore("body", new string[] { "metal", "metals" }); IndexReader reader = DirectoryReader.Open(indexStore); IndexSearcher searcher = NewSearcher(reader); Query query1 = new TermQuery(new Term("body", "metal")); Query query2 = new WildcardQuery(new Term("body", "metal*")); Query query3 = new WildcardQuery(new Term("body", "m*tal")); Query query4 = new WildcardQuery(new Term("body", "m*tal*")); Query query5 = new WildcardQuery(new Term("body", "m*tals")); BooleanQuery query6 = new BooleanQuery(); query6.Add(query5, BooleanClause.Occur.SHOULD); BooleanQuery query7 = new BooleanQuery(); query7.Add(query3, BooleanClause.Occur.SHOULD); query7.Add(query5, BooleanClause.Occur.SHOULD); // Queries do not automatically lower-case search terms: Query query8 = new WildcardQuery(new Term("body", "M*tal*")); AssertMatches(searcher, query1, 1); AssertMatches(searcher, query2, 2); AssertMatches(searcher, query3, 1); AssertMatches(searcher, query4, 2); AssertMatches(searcher, query5, 1); AssertMatches(searcher, query6, 1); AssertMatches(searcher, query7, 2); AssertMatches(searcher, query8, 0); AssertMatches(searcher, new WildcardQuery(new Term("body", "*tall")), 0); AssertMatches(searcher, new WildcardQuery(new Term("body", "*tal")), 1); AssertMatches(searcher, new WildcardQuery(new Term("body", "*tal*")), 2); reader.Dispose(); indexStore.Dispose(); }
public TermWeight(TermQuery outerInstance, IndexSearcher searcher, TermContext termStates) { this.OuterInstance = outerInstance; Debug.Assert(termStates != null, "TermContext must not be null"); this.TermStates = termStates; this.Similarity = searcher.Similarity; this.Stats = Similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance.Term_Renamed.Field()), searcher.TermStatistics(outerInstance.Term_Renamed, termStates)); }
protected override ParameterizedSql BuildQuery(TermQuery termQuery) { var termText = EscapeForSql(termQuery.Term.Text); var userVariables = new Dictionary<string, string> {{"field1", termText}}; var sql = FieldPlaceholder + " LIKE '%' + @field1 + '%'"; return new ParameterizedSql(sql, userVariables); }
public virtual Query GetQuery(XmlElement e) { string field = DOMUtils.GetAttributeWithInheritanceOrFail(e, "fieldName"); string value = DOMUtils.GetNonBlankTextOrFail(e); TermQuery tq = new TermQuery(new Term(field, value)); tq.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f); return tq; }
public OpenBitSetDISI TermToBitSet(string term, IndexReader indexReader) { var facetQuery = new TermQuery(new Term(this.Field, term)); var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery)); var bitSet = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc()); return bitSet; }
public ProductQuery WithId(int id) { if (id > 0) { var query = new TermQuery(new Term("id", id.ToString())); this.AddQuery(query); } return this; }
public void TestBoostingQueryEquals() { TermQuery q1 = new TermQuery(new Term("subject:", "java")); TermQuery q2 = new TermQuery(new Term("subject:", "java")); Assert.AreEqual(q1, q2, "Two TermQueries with same attributes should be equal"); BoostingQuery bq1 = new BoostingQuery(q1, q2, 0.1f); BoostingQuery bq2 = new BoostingQuery(q1, q2, 0.1f); Assert.AreEqual(bq1, bq2, "BoostingQuery with same attributes is not equal"); }
public virtual void TestDuringAddDelete() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); writer.SetInfoStream(infoStream, null); writer.MergeFactor = 2; // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(null); IndexReader r = writer.GetReader(null); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); ThreadClass[] threads = new ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread1(endTime, writer, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int sum = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(null); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); sum += new IndexSearcher(r).Search(q, 10, null).TotalHits; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.IsTrue(sum > 0); Assert.AreEqual(0, excs.Count); writer.Close(); _TestUtil.CheckIndex(dir1); r.Close(); dir1.Close(); }
// Returns a list of all files and directories in dir static ICollection GetAllItemsInDirectory(DirectoryInfo dir) { // form the query string parent_uri_str = PathToUri(dir.FullName).ToString(); // Instead of taking the painfull way of using BeagrepAnalyzer, lets just add the prefix manually // LuceneCommon thinks exposing secret property type encoding is bad, I think so too... except for now string key = "prop:k:" + Property.ParentDirUriPropKey; //Logger.Log.Debug ("Querying for {0}={1}", parent_uri_str, key); LNS.Query query = new LNS.TermQuery(new Term(key, parent_uri_str)); // do the search LNS.IndexSearcher searcher; searcher = LuceneCommon.GetSearcher(driver.PrimaryStore); BetterBitArray matches; matches = new BetterBitArray(searcher.MaxDoc()); BitArrayHitCollector collector; collector = new BitArrayHitCollector(matches); searcher.Search(query, null, collector); // Finally we pull all of the matching documents, // convert them to Dirent, and store them in a list. ArrayList match_list = new ArrayList(); int i = 0; while (i < matches.Count) { i = matches.GetNextTrueIndex(i); if (i >= matches.Count) { break; } Document doc; doc = searcher.Doc(i); Dirent info; info = DocumentToDirent(doc); match_list.Add(info); ++i; } LuceneCommon.ReleaseSearcher(searcher); //Logger.Log.Debug ("Found {0} items in {1}", match_list.Count, dir.FullName); return(match_list); }
public HashSet <string> GetDocumentsWithWord(string keyword) { HashSet <string> fingerprints = new HashSet <string>(); try { keyword = ReasonableWord.MakeReasonableWord(keyword); if (null != keyword) { ////Do a quick check for whether there are actually any segments files, otherwise we throw many exceptions in the IndexReader.Open in a very tight loop. ////Added by Nik to cope with some exception...will uncomment this when i know what the problem is... //var segments_files = Directory.GetFiles(LIBRARY_INDEX_BASE_PATH, "segments*", SearchOption.AllDirectories); //if (segments_files.Length <= 0) //{ // Logging.Debug("No index segments files found"); // return fingerprints; //} using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (IndexSearcher index_searcher = new IndexSearcher(index_reader)) { Lucene.Net.Search.TermQuery term_query = new Lucene.Net.Search.TermQuery(new Term("content", keyword)); Lucene.Net.Search.Hits hits = index_searcher.Search(term_query); var i = hits.Iterator(); while (i.MoveNext()) { Lucene.Net.Search.Hit hit = (Lucene.Net.Search.Hit)i.Current; string fingerprint = hit.Get("fingerprint"); fingerprints.Add(fingerprint); } // Close the index index_searcher.Close(); } index_reader.Close(); } } } catch (Exception ex) { Logging.Warn(ex, $"GetDocumentsWithWord: There was a problem opening the index file for searching (path: '{LIBRARY_INDEX_BASE_PATH}', keyword: '{keyword}')"); } return(fingerprints); }
private static void _addToLuceneIndex(SampleData sampleData, IndexWriter writer) { // remove older index entry var searchQuery = new Lucene.Net.Search.TermQuery(new Term("Id", sampleData.Id.ToString())); writer.DeleteDocuments(searchQuery); // add new index entry var doc = new Lucene.Net.Documents.Document(); // add lucene fields mapped to db fields doc.Add(new Field("Id", sampleData.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Name", sampleData.Name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", sampleData.Description, Field.Store.YES, Field.Index.ANALYZED)); // add entry to index writer.AddDocument(doc); }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(PriorityQueue <object[]> q) { BooleanQuery query = new BooleanQuery(); System.Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { System.Object[] ar = (System.Object[])cur; TermQuery tq = new TermQuery(new Term((System.String)ar[1], (System.String)ar[0])); if (boost) { if (qterms == 0) { bestScore = (float)((System.Single)ar[2]); } float myScore = (float)((System.Single)ar[2]); tq.Boost = myScore / bestScore; } try { query.Add(tq, Occur.SHOULD); } catch (BooleanQuery.TooManyClauses) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return(query); }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(Lucene.Net.Util.PriorityQueue q) { Lucene.Net.Search.BooleanQuery query = new Lucene.Net.Search.BooleanQuery(); Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { PQRecord ar = (PQRecord)cur; Lucene.Net.Search.TermQuery tq = new Lucene.Net.Search.TermQuery(new Term(ar.topField, ar.word)); if (boost) { if (qterms == 0) { bestScore = ar.score; } float myScore = ar.score; tq.SetBoost(myScore / bestScore); } try { query.Add(tq, Lucene.Net.Search.BooleanClause.Occur.SHOULD); } catch (Lucene.Net.Search.BooleanQuery.TooManyClauses) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return(query); }
void LUCENENET_100_ClientSearch() { try { Lucene.Net.Search.Searchable s = (Lucene.Net.Search.Searchable)Activator.GetObject(typeof(Lucene.Net.Search.Searchable), @"tcp://localhost:" + ANYPORT + "/Searcher"); Lucene.Net.Search.MultiSearcher searcher = new Lucene.Net.Search.MultiSearcher(new Lucene.Net.Search.Searchable[] { s }); Lucene.Net.Search.Query q = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field1", "moon")); Lucene.Net.Search.Sort sort = new Lucene.Net.Search.Sort(); sort.SetSort(new Lucene.Net.Search.SortField("field2", Lucene.Net.Search.SortField.INT)); Lucene.Net.Search.TopDocs h = searcher.Search(q, null, 100, sort); } catch (Exception ex) { LUCENENET_100_Exception = ex; } finally { LUCENENET_100_testFinished = true; } }
public Hashtable GetStoredUriStrings(string server, string file) { Hashtable uris = new Hashtable(); Term term = new Term(PropertyToFieldName(PropertyType.Keyword, "fixme:file"), file); LNS.QueryFilter filter = new LNS.QueryFilter(new LNS.TermQuery(term)); term = new Term(PropertyToFieldName(PropertyType.Keyword, "fixme:account"), server); LNS.TermQuery query = new LNS.TermQuery(term); LNS.IndexSearcher searcher = LuceneCommon.GetSearcher(PrimaryStore); LNS.Hits hits = searcher.Search(query, filter); for (int i = 0; i < hits.Length(); i++) { StoredInfo info = DocumentToStoredInfo(hits.Doc(i)); uris.Add(info.Uri.ToString(), info.FullyIndexed); } LuceneCommon.ReleaseSearcher(searcher); return(uris); }
public JsonDocumentDto Retrieve(BaristaIndexDefinition indexDefinition, string documentId) { if (documentId.IsNullOrWhiteSpace()) { throw new ArgumentNullException("documentId", @"A document Id must be specified."); } try { var index = GetOrAddIndex(indexDefinition, true); IndexSearcher indexSearcher; using (index.GetSearcher(out indexSearcher)) { var term = new Lucene.Net.Index.Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant()); var termQuery = new Lucene.Net.Search.TermQuery(term); var hits = indexSearcher.Search(termQuery, 1); if (hits.TotalHits == 0) { return(null); } var result = RetrieveSearchResults(indexSearcher, hits).FirstOrDefault(); return(result == null ? null : result.Document); } } catch (Exception ex) { throw new FaultException(ex.Message); } }
internal virtual void TestSort(bool useFrom, bool VERBOSE) { IndexReader reader = null; Directory dir = null; if (!VERBOSE) { Console.WriteLine("Verbosity disabled. Enable manually if needed."); } int numDocs = VERBOSE ? AtLeast(50) : AtLeast(1000); //final int numDocs = AtLeast(50); string[] tokens = new string[] { "a", "b", "c", "d", "e" }; if (VERBOSE) { Console.WriteLine("TEST: make index"); } { dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); // w.setDoRandomForceMerge(false); // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100)); string[] content = new string[AtLeast(20)]; for (int contentIDX = 0; contentIDX < content.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); int numTokens = TestUtil.NextInt32(Random, 1, 10); for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) { sb.Append(tokens[Random.Next(tokens.Length)]).Append(' '); } content[contentIDX] = sb.ToString(); } for (int docIDX = 0; docIDX < numDocs; docIDX++) { Document doc = new Document(); doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO)); doc.Add(NewTextField("text", content[Random.Next(content.Length)], Field.Store.NO)); doc.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO)); int intValue; if (Random.Next(100) == 17) { intValue = int.MinValue; } else if (Random.Next(100) == 17) { intValue = int.MaxValue; } else { intValue = Random.Next(); } doc.Add(new Int32Field("int", intValue, Field.Store.NO)); if (VERBOSE) { Console.WriteLine(" doc=" + doc); } w.AddDocument(doc); } reader = w.GetReader(); w.Dispose(); } // NOTE: sometimes reader has just one segment, which is // important to test IndexSearcher searcher = NewSearcher(reader); IndexReaderContext ctx = searcher.TopReaderContext; ShardSearcher[] subSearchers; int[] docStarts; if (ctx is AtomicReaderContext) { subSearchers = new ShardSearcher[1]; docStarts = new int[1]; subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx); docStarts[0] = 0; } else { CompositeReaderContext compCTX = (CompositeReaderContext)ctx; int size = compCTX.Leaves.Count; subSearchers = new ShardSearcher[size]; docStarts = new int[size]; int docBase = 0; for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++) { AtomicReaderContext leave = compCTX.Leaves[searcherIDX]; subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX); docStarts[searcherIDX] = docBase; docBase += leave.Reader.MaxDoc; } } IList <SortField> sortFields = new List <SortField>(); sortFields.Add(new SortField("string", SortFieldType.STRING, true)); sortFields.Add(new SortField("string", SortFieldType.STRING, false)); sortFields.Add(new SortField("int", SortFieldType.INT32, true)); sortFields.Add(new SortField("int", SortFieldType.INT32, false)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, true)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, false)); sortFields.Add(new SortField(null, SortFieldType.SCORE, true)); sortFields.Add(new SortField(null, SortFieldType.SCORE, false)); sortFields.Add(new SortField(null, SortFieldType.DOC, true)); sortFields.Add(new SortField(null, SortFieldType.DOC, false)); for (int iter = 0; iter < 1000 * RandomMultiplier; iter++) { // TODO: custom FieldComp... Query query = new TermQuery(new Term("text", tokens[Random.Next(tokens.Length)])); Sort sort; if (Random.Next(10) == 4) { // Sort by score sort = null; } else { SortField[] randomSortFields = new SortField[TestUtil.NextInt32(Random, 1, 3)]; for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++) { randomSortFields[sortIDX] = sortFields[Random.Next(sortFields.Count)]; } sort = new Sort(randomSortFields); } int numHits = TestUtil.NextInt32(Random, 1, numDocs + 5); //final int numHits = 5; if (VERBOSE) { Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits); } int from = -1; int size = -1; // First search on whole index: TopDocs topHits; if (sort == null) { if (useFrom) { TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random.NextBoolean()); searcher.Search(query, c); from = TestUtil.NextInt32(Random, 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = searcher.Search(query, numHits); } } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean()); searcher.Search(query, c); if (useFrom) { from = TestUtil.NextInt32(Random, 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = c.GetTopDocs(0, numHits); } } if (VERBOSE) { if (useFrom) { Console.WriteLine("from=" + from + " size=" + size); } Console.WriteLine(" top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore)); if (topHits.ScoreDocs != null) { for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = topHits.ScoreDocs[hitIDX]; Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } // ... then all shards: Weight w = searcher.CreateNormalizedWeight(query); TopDocs[] shardHits = new TopDocs[subSearchers.Length]; for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) { TopDocs subHits; ShardSearcher subSearcher = subSearchers[shardIDX]; if (sort == null) { subHits = subSearcher.Search(w, numHits); } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean()); subSearcher.Search(w, c); subHits = c.GetTopDocs(0, numHits); } shardHits[shardIDX] = subHits; if (VERBOSE) { Console.WriteLine(" shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString())); if (subHits.ScoreDocs != null) { foreach (ScoreDoc sd in subHits.ScoreDocs) { Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } } // Merge: TopDocs mergedHits; if (useFrom) { mergedHits = TopDocs.Merge(sort, from, size, shardHits); } else { mergedHits = TopDocs.Merge(sort, numHits, shardHits); } if (mergedHits.ScoreDocs != null) { // Make sure the returned shards are correct: for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = mergedHits.ScoreDocs[hitIDX]; Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard"); } } TestUtil.AssertEquals(topHits, mergedHits); } reader.Dispose(); dir.Dispose(); }
public virtual void TestRangeFilterIdCollating() { IndexReader reader = IndexReader.Open(signedIndex.index); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("en").CompareInfo; int medId = ((maxId - minId) / 2); System.String minIP = Pad(minId); System.String maxIP = Pad(maxId); System.String medIP = Pad(medId); int numDocs = reader.NumDocs(); Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs"); Hits result; Query q = new TermQuery(new Term("body", "body")); // test id, bounded on both ends result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, T, T, c)); Assert.AreEqual(numDocs, result.Length(), "find all"); result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, T, F, c)); Assert.AreEqual(numDocs - 1, result.Length(), "all but last"); result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, F, T, c)); Assert.AreEqual(numDocs - 1, result.Length(), "all but first"); result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, F, F, c)); Assert.AreEqual(numDocs - 2, result.Length(), "all but ends"); result = search.Search(q, new TermRangeFilter("id", medIP, maxIP, T, T, c)); Assert.AreEqual(1 + maxId - medId, result.Length(), "med and up"); result = search.Search(q, new TermRangeFilter("id", minIP, medIP, T, T, c)); Assert.AreEqual(1 + medId - minId, result.Length(), "up to med"); // unbounded id result = search.Search(q, new TermRangeFilter("id", minIP, null, T, F, c)); Assert.AreEqual(numDocs, result.Length(), "min and up"); result = search.Search(q, new TermRangeFilter("id", null, maxIP, F, T, c)); Assert.AreEqual(numDocs, result.Length(), "max and down"); result = search.Search(q, new TermRangeFilter("id", minIP, null, F, F, c)); Assert.AreEqual(numDocs - 1, result.Length(), "not min, but up"); result = search.Search(q, new TermRangeFilter("id", null, maxIP, F, F, c)); Assert.AreEqual(numDocs - 1, result.Length(), "not max, but down"); result = search.Search(q, new TermRangeFilter("id", medIP, maxIP, T, F, c)); Assert.AreEqual(maxId - medId, result.Length(), "med and up, not max"); result = search.Search(q, new TermRangeFilter("id", minIP, medIP, F, T, c)); Assert.AreEqual(medId - minId, result.Length(), "not min, up to med"); // very small sets result = search.Search(q, new TermRangeFilter("id", minIP, minIP, F, F, c)); Assert.AreEqual(0, result.Length(), "min,min,F,F"); result = search.Search(q, new TermRangeFilter("id", medIP, medIP, F, F, c)); Assert.AreEqual(0, result.Length(), "med,med,F,F"); result = search.Search(q, new TermRangeFilter("id", maxIP, maxIP, F, F, c)); Assert.AreEqual(0, result.Length(), "max,max,F,F"); result = search.Search(q, new TermRangeFilter("id", minIP, minIP, T, T, c)); Assert.AreEqual(1, result.Length(), "min,min,T,T"); result = search.Search(q, new TermRangeFilter("id", null, minIP, F, T, c)); Assert.AreEqual(1, result.Length(), "nul,min,F,T"); result = search.Search(q, new TermRangeFilter("id", maxIP, maxIP, T, T, c)); Assert.AreEqual(1, result.Length(), "max,max,T,T"); result = search.Search(q, new TermRangeFilter("id", maxIP, null, T, F, c)); Assert.AreEqual(1, result.Length(), "max,nul,T,T"); result = search.Search(q, new TermRangeFilter("id", medIP, medIP, T, T, c)); Assert.AreEqual(1, result.Length(), "med,med,T,T"); }
public virtual void TestRangeFilterRandCollating() { // using the unsigned index because collation seems to ignore hyphens IndexReader reader = IndexReader.Open(unsignedIndex.index); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("en").CompareInfo; System.String minRP = Pad(unsignedIndex.minR); System.String maxRP = Pad(unsignedIndex.maxR); int numDocs = reader.NumDocs(); Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs"); Hits result; Query q = new TermQuery(new Term("body", "body")); // test extremes, bounded on both ends result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, T, T, c)); Assert.AreEqual(numDocs, result.Length(), "find all"); result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, T, F, c)); Assert.AreEqual(numDocs - 1, result.Length(), "all but biggest"); result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, F, T, c)); Assert.AreEqual(numDocs - 1, result.Length(), "all but smallest"); result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, F, F, c)); Assert.AreEqual(numDocs - 2, result.Length(), "all but extremes"); // unbounded result = search.Search(q, new TermRangeFilter("rand", minRP, null, T, F, c)); Assert.AreEqual(numDocs, result.Length(), "smallest and up"); result = search.Search(q, new TermRangeFilter("rand", null, maxRP, F, T, c)); Assert.AreEqual(numDocs, result.Length(), "biggest and down"); result = search.Search(q, new TermRangeFilter("rand", minRP, null, F, F, c)); Assert.AreEqual(numDocs - 1, result.Length(), "not smallest, but up"); result = search.Search(q, new TermRangeFilter("rand", null, maxRP, F, F, c)); Assert.AreEqual(numDocs - 1, result.Length(), "not biggest, but down"); // very small sets result = search.Search(q, new TermRangeFilter("rand", minRP, minRP, F, F, c)); Assert.AreEqual(0, result.Length(), "min,min,F,F"); result = search.Search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F, c)); Assert.AreEqual(0, result.Length(), "max,max,F,F"); result = search.Search(q, new TermRangeFilter("rand", minRP, minRP, T, T, c)); Assert.AreEqual(1, result.Length(), "min,min,T,T"); result = search.Search(q, new TermRangeFilter("rand", null, minRP, F, T, c)); Assert.AreEqual(1, result.Length(), "nul,min,F,T"); result = search.Search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T, c)); Assert.AreEqual(1, result.Length(), "max,max,T,T"); result = search.Search(q, new TermRangeFilter("rand", maxRP, null, T, F, c)); Assert.AreEqual(1, result.Length(), "max,nul,T,T"); }
public virtual void TestFieldCacheRangeFilterLongs() { IndexReader reader = SignedIndexReader; IndexSearcher search = NewSearcher(reader); int numDocs = reader.NumDocs; int medId = ((MaxId - MinId) / 2); long?minIdO = Convert.ToInt64(MinId); long?maxIdO = Convert.ToInt64(MaxId); long?medIdO = Convert.ToInt64(medId); Assert.AreEqual(numDocs, 1 + MaxId - MinId, "num of docs"); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); // test id, bounded on both ends result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, maxIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, maxIdO, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but last"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, maxIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but first"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, maxIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 2, result.Length, "all but ends"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", medIdO, maxIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + MaxId - medId, result.Length, "med and up"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, medIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + medId - MinId, result.Length, "up to med"); // unbounded id result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", null, null, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "min and up"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", null, maxIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "max and down"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, null, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not min, but up"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", null, maxIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not max, but down"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", medIdO, maxIdO, T, F), numDocs).ScoreDocs; Assert.AreEqual(MaxId - medId, result.Length, "med and up, not max"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, medIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(medId - MinId, result.Length, "not min, up to med"); // very small sets result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, minIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "min,min,F,F"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", medIdO, medIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "med,med,F,F"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", maxIdO, maxIdO, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "max,max,F,F"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", minIdO, minIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "min,min,T,T"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", null, minIdO, F, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "nul,min,F,T"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", maxIdO, maxIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,max,T,T"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", maxIdO, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,nul,T,T"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", medIdO, medIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "med,med,T,T"); // special cases result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", Convert.ToInt64(long.MaxValue), null, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "overflow special case"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", null, Convert.ToInt64(long.MinValue), F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "overflow special case"); result = search.Search(q, FieldCacheRangeFilter.NewInt64Range("id", maxIdO, minIdO, T, T), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "inverse range"); }
public virtual void TestSimple() { int numNodes = TestUtil.NextInt32(Random, 1, 10); double runTimeSec = AtLeast(3); int minDocsToMakeTerms = TestUtil.NextInt32(Random, 5, 20); int maxSearcherAgeSeconds = TestUtil.NextInt32(Random, 1, 3); if (Verbose) { Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds); } Start(numNodes, runTimeSec, maxSearcherAgeSeconds); JCG.List <PreviousSearchState> priorSearches = new JCG.List <PreviousSearchState>(); IList <BytesRef> terms = null; while (J2N.Time.NanoTime() < endTimeNanos) { bool doFollowon = priorSearches.Count > 0 && Random.Next(7) == 1; // Pick a random node; we will run the query on this node: int myNodeID = Random.Next(numNodes); NodeState.ShardIndexSearcher localShardSearcher; PreviousSearchState prevSearchState; if (doFollowon) { // Pretend user issued a followon query: prevSearchState = priorSearches[Random.Next(priorSearches.Count)]; if (Verbose) { Console.WriteLine("\nTEST: follow-on query age=" + ((J2N.Time.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0)); } try { localShardSearcher = m_nodes[myNodeID].Acquire(prevSearchState.Versions); } catch (SearcherExpiredException see) { // Expected, sometimes; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (Verbose) { Console.WriteLine(" searcher expired during local shard searcher init: " + see); } priorSearches.Remove(prevSearchState); continue; } } else { if (Verbose) { Console.WriteLine("\nTEST: fresh query"); } // Do fresh query: localShardSearcher = m_nodes[myNodeID].Acquire(); prevSearchState = null; } IndexReader[] subs = new IndexReader[numNodes]; PreviousSearchState searchState = null; try { // Mock: now make a single reader (MultiReader) from all node // searchers. In a real shard env you can't do this... we // do it to confirm results from the shard searcher // are correct: int docCount = 0; try { for (int nodeID = 0; nodeID < numNodes; nodeID++) { long subVersion = localShardSearcher.GetNodeVersions()[nodeID]; IndexSearcher sub = m_nodes[nodeID].Searchers.Acquire(subVersion); if (sub is null) { nodeID--; while (nodeID >= 0) { subs[nodeID].DecRef(); subs[nodeID] = null; nodeID--; } throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion); } subs[nodeID] = sub.IndexReader; docCount += subs[nodeID].MaxDoc; } } catch (SearcherExpiredException see) { // Expected if (Verbose) { Console.WriteLine(" searcher expired during mock reader init: " + see); } continue; } IndexReader mockReader = new MultiReader(subs); IndexSearcher mockSearcher = new IndexSearcher(mockReader); Query query; Sort sort; if (prevSearchState != null) { query = prevSearchState.Query; sort = prevSearchState.Sort; } else { if (terms is null && docCount > minDocsToMakeTerms) { // TODO: try to "focus" on high freq terms sometimes too // TODO: maybe also periodically reset the terms...? TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").GetEnumerator(); terms = new JCG.List <BytesRef>(); while (termsEnum.MoveNext()) { terms.Add(BytesRef.DeepCopyOf(termsEnum.Term)); } if (Verbose) { Console.WriteLine("TEST: init terms: " + terms.Count + " terms"); } if (terms.Count == 0) { terms = null; } } if (Verbose) { Console.WriteLine(" maxDoc=" + mockReader.MaxDoc); } if (terms != null) { if (Random.NextBoolean()) { query = new TermQuery(new Term("body", terms[Random.Next(terms.Count)])); } else { string t = terms[Random.Next(terms.Count)].Utf8ToString(); string prefix; if (t.Length <= 1) { prefix = t; } else { prefix = t.Substring(0, TestUtil.NextInt32(Random, 1, 2)); } query = new PrefixQuery(new Term("body", prefix)); } if (Random.NextBoolean()) { sort = null; } else { // TODO: sort by more than 1 field int what = Random.Next(3); if (what == 0) { sort = new Sort(SortField.FIELD_SCORE); } else if (what == 1) { // TODO: this sort doesn't merge // correctly... it's tricky because you // could have > 2.1B docs across all shards: //sort = new Sort(SortField.FIELD_DOC); sort = null; } else if (what == 2) { sort = new Sort(new SortField[] { new SortField("docid", SortFieldType.INT32, Random.NextBoolean()) }); } else { sort = new Sort(new SortField[] { new SortField("title", SortFieldType.STRING, Random.NextBoolean()) }); } } } else { query = null; sort = null; } } if (query != null) { try { searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState); } catch (SearcherExpiredException see) { // Expected; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (Verbose) { Console.WriteLine(" searcher expired during search: " + see); Console.Out.Write(see.StackTrace); } // We can't do this in general: on a very slow // computer it's possible the local searcher // expires before we can finish our search: // assert prevSearchState != null; if (prevSearchState != null) { priorSearches.Remove(prevSearchState); } } } } finally { //m_nodes[myNodeID].Release(localShardSearcher); NodeState.Release(localShardSearcher); // LUCENENET: Made Release() static per CA1822 for performance foreach (IndexReader sub in subs) { if (sub != null) { sub.DecRef(); } } } if (searchState != null && searchState.SearchAfterLocal != null && Random.Next(5) == 3) { priorSearches.Add(searchState); if (priorSearches.Count > 200) { priorSearches.Shuffle(Random); priorSearches.RemoveRange(100, priorSearches.Count - 100); // LUCENENET: Converted end index to length } } } Finish(); }
public virtual void TestDuringAddIndexes_LuceneNet() { MockRAMDirectory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); writer.SetInfoStream(infoStream, null); writer.MergeFactor = 2; // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(null); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(null); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); ThreadClass[] threads = new ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { using (IndexReader r2 = writer.GetReader(null)) { Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r2).Search(q, 10, null).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); r.Close(); Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count); writer.Close(); _TestUtil.CheckIndex(dir1); dir1.Close(); }
/// <summary> /// Unindexes a Node and some full text /// </summary> /// <param name="n">Node</param> /// <param name="text">Full Text</param> protected override void Unindex(INode n, string text) { LucSearch.TermQuery query = new LucSearch.TermQuery(new Term(this._schema.HashField, this.GetHash(n, text))); this._writer.DeleteDocuments(query); }
public virtual void TestRangeFilterId() { IndexReader reader = SignedIndexReader; IndexSearcher search = NewSearcher(reader); int medId = ((MaxId - MinId) / 2); string minIP = Pad(MinId); string maxIP = Pad(MaxId); string medIP = Pad(medId); int numDocs = reader.NumDocs; Assert.AreEqual(numDocs, 1 + MaxId - MinId, "num of docs"); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); // test id, bounded on both ends result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, maxIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, maxIP, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but last"); result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, maxIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but first"); result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, maxIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 2, result.Length, "all but ends"); result = search.Search(q, TermRangeFilter.NewStringRange("id", medIP, maxIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + MaxId - medId, result.Length, "med and up"); result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, medIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1 + medId - MinId, result.Length, "up to med"); // unbounded id result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "min and up"); result = search.Search(q, TermRangeFilter.NewStringRange("id", null, maxIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "max and down"); result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, null, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not min, but up"); result = search.Search(q, TermRangeFilter.NewStringRange("id", null, maxIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not max, but down"); result = search.Search(q, TermRangeFilter.NewStringRange("id", medIP, maxIP, T, F), numDocs).ScoreDocs; Assert.AreEqual(MaxId - medId, result.Length, "med and up, not max"); result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, medIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(medId - MinId, result.Length, "not min, up to med"); // very small sets result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, minIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "min,min,F,F"); result = search.Search(q, TermRangeFilter.NewStringRange("id", medIP, medIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "med,med,F,F"); result = search.Search(q, TermRangeFilter.NewStringRange("id", maxIP, maxIP, F, F), numDocs).ScoreDocs; Assert.AreEqual(0, result.Length, "max,max,F,F"); result = search.Search(q, TermRangeFilter.NewStringRange("id", minIP, minIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "min,min,T,T"); result = search.Search(q, TermRangeFilter.NewStringRange("id", null, minIP, F, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "nul,min,F,T"); result = search.Search(q, TermRangeFilter.NewStringRange("id", maxIP, maxIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,max,T,T"); result = search.Search(q, TermRangeFilter.NewStringRange("id", maxIP, null, T, F), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "max,nul,T,T"); result = search.Search(q, TermRangeFilter.NewStringRange("id", medIP, medIP, T, T), numDocs).ScoreDocs; Assert.AreEqual(1, result.Length, "med,med,T,T"); }
static void Main(string[] args) { //var analyzer = new JieBaAnalyzer(TokenizerMode.Default); var IndexWriterConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, new JieBaAnalyzer(TokenizerMode.Default)); var directory = FSDirectory.Open(new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory + "Lucene")); var indexWriter = new IndexWriter(directory, IndexWriterConfig); var document = new Document(); var fieldList = new List <Field>(); //var test = new StringField("id", "22", Field.Store.YES); //var test = new StringField("id", "22", Field.Store.YES); var fieldType = new FieldType(); //var newFeild = new Field("id", "22", Field.Store.YES, Field.Index.ANALYZED); //var newFeild2 = new Field("soc", "呵呵", Field.Store.YES, Field.Index.ANALYZED); //var newFeild3 = new Field("shot", "内容分类标准以及为读者提供的任何信息", Field.Store.YES, Field.Index.ANALYZED); //var newFeild4 = new Field("content", "《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。", Field.Store.YES, Field.Index.ANALYZED); //fieldList.Add(newFeild); //fieldList.Add(newFeild2); //fieldList.Add(newFeild3); //fieldList.Add(newFeild4); fieldList.Add(new TextField("id", "22", Field.Store.YES)); fieldList.Add(new TextField("soc", "呵呵", Field.Store.YES)); fieldList.Add(new TextField("shot", "内容分类标准以及为读者提供的任何信息", Field.Store.YES)); fieldList.Add(new TextField("content", "《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。", Field.Store.YES)); indexWriter.AddDocument(fieldList); indexWriter.Commit(); while (true) { // 1、创建Directory //var directory = FSDirectory.Open(FileSystems.getDefault().getPath(INDEX_PATH)); // 2、创建IndexReader var directoryReader = DirectoryReader.Open(directory); // 3、根据IndexReader创建IndexSearch IndexSearcher indexSearcher = new IndexSearcher(directoryReader); var queryK = Console.ReadLine(); // MultiFieldQueryParser表示多个域解析, 同时可以解析含空格的字符串,如果我们搜索"上海 中国" var analyzer = new JieBaAnalyzer(TokenizerMode.Search); String[] fields = { "soc", "content" }; Occur[] clauses = { Occur.SHOULD, Occur.SHOULD }; Query multiFieldQuery = MultiFieldQueryParser.Parse(LuceneVersion.LUCENE_48, queryK, fields, clauses, analyzer); var bb = new Lucene.Net.Search.TermQuery(new Term("shot", queryK)); var fuzzy = new FuzzyQuery(new Term("content", queryK)); // 5、根据searcher搜索并且返回TopDocs TopDocs topDocs = indexSearcher.Search(fuzzy, 100); // 搜索前100条结果 Console.WriteLine("找到: " + topDocs.TotalHits); QueryScorer scorer = new QueryScorer(fuzzy, "content"); // 自定义高亮代码 SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style=\"backgroud:red\">", "</span>"); Highlighter highlighter = new Highlighter(htmlFormatter, scorer); //highlighter.set(new SimpleSpanFragmenter(scorer)); foreach (var doc in topDocs.ScoreDocs) { var returnDoc = indexSearcher.Doc(doc.Doc); //Console.WriteLine("soc : " + returnDoc.Get("soc")); var resultHiligh = highlighter.GetBestFragments(analyzer, "content", returnDoc.Get("content"), 3); Console.WriteLine(string.Join("", resultHiligh)); } //Console.WriteLine("go... press enter "); //Console.ReadLine(); } // valindexConfig: IndexWriterConfig = new IndexWriterConfig(new StandardAnalyzer()); // indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND) //// indexConfig.setInfoStream(System.out) // val directory:Directory = FSDirectory.open(Paths.get(indexPath)) //val indexWriter:IndexWriter = new IndexWriter(directory, indexConfig) var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("北京大学生喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("在北京大学生活区喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("腾讯视频致力于打造中国最大的在线视频媒体平台,以丰富的内容、极致的观看体验"); Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segmenter.DeleteWord("湖南"); segmenter.AddWord("湖南"); //segmenter.AddWord("长沙市"); segments = segmenter.Cut("湖南长沙市天心区"); Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); Console.Read(); }
public virtual void TestTermQuery() { Query query = new TermQuery(new Term(FIELD, "tangfulin")); Assert.AreEqual(2, Searcher.Search(query, null, 1000).TotalHits, "Number of matched documents"); }
public virtual void TestDuringAddIndexes() { Assert.Ignore("Known issue"); MockRAMDirectory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); writer.SetInfoStream(infoStream, null); writer.MergeFactor = 2; // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(null); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(null); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); ThreadClass[] threads = new ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(null); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r).Search(q, 10, null).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); r.Close(); try { Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count); } catch { //DIGY: //I think this is an expected behaviour. //There isn't any pending files to be deleted after "writer.Close()". //But, since lucene.java's test case is designed that way //and I might be wrong, I will add a warning // Assert only in debug mode, so that CheckIndex is called during release. #if DEBUG Assert.Inconclusive("", 0, dir1.GetOpenDeletedFiles().Count); #endif } writer.Close(); _TestUtil.CheckIndex(dir1); dir1.Close(); }
public virtual void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); Field field = NewTextField("foo", "", Field.Store.NO); doc.Add(field); Field dvField = new FloatDocValuesField("foo_boost", 0.0F); doc.Add(dvField); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(field2); field.StringValue = "quick brown fox"; field2.StringValue = "quick brown fox"; dvField.FloatValue = 2f; // boost x2 iw.AddDocument(doc); field.StringValue = "jumps over lazy brown dog"; field2.StringValue = "jumps over lazy brown dog"; dvField.FloatValue = 4f; // boost x4 iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); // no boosting IndexSearcher searcher1 = NewSearcher(ir, false, Similarity); Similarity @base = searcher1.Similarity; // boosting IndexSearcher searcher2 = NewSearcher(ir, false, Similarity); searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base); // in this case, we searched on field "foo". first document should have 2x the score. TermQuery tq = new TermQuery(new Term("foo", "quick")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); TopDocs noboost = searcher1.Search(tq, 10); TopDocs boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc)); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON); // this query matches only the second document, which should have 4x the score. tq = new TermQuery(new Term("foo", "jumps")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON); // search on on field bar just for kicks, nothing should happen, since we setup // our sim provider to only use foo_boost for field foo. tq = new TermQuery(new Term("bar", "quick")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON); ir.Dispose(); dir.Dispose(); }
public virtual void TestNullOrSubScorer() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField("field", "a b c d", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); // this test relies upon coord being the default implementation, // otherwise scores are different! s.Similarity = new DefaultSimilarity(); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5F, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1 / 3F), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2 / 3F), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestMixedVectrosVectors() { RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetOpenMode(OpenMode.CREATE)); Document doc = new Document(); FieldType ft2 = new FieldType(TextField.TYPE_STORED); ft2.StoreTermVectors = true; FieldType ft3 = new FieldType(TextField.TYPE_STORED); ft3.StoreTermVectors = true; ft3.StoreTermVectorPositions = true; FieldType ft4 = new FieldType(TextField.TYPE_STORED); ft4.StoreTermVectors = true; ft4.StoreTermVectorOffsets = true; FieldType ft5 = new FieldType(TextField.TYPE_STORED); ft5.StoreTermVectors = true; ft5.StoreTermVectorOffsets = true; ft5.StoreTermVectorPositions = true; doc.Add(NewTextField("field", "one", Field.Store.YES)); doc.Add(NewField("field", "one", ft2)); doc.Add(NewField("field", "one", ft3)); doc.Add(NewField("field", "one", ft4)); doc.Add(NewField("field", "one", ft5)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); Query query = new TermQuery(new Term("field", "one")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Fields vectors = searcher.IndexReader.GetTermVectors(hits[0].Doc); Assert.IsNotNull(vectors); Assert.AreEqual(1, vectors.Count); Terms vector = vectors.GetTerms("field"); Assert.IsNotNull(vector); Assert.AreEqual(1, vector.Count); TermsEnum termsEnum = vector.GetIterator(null); Assert.IsNotNull(termsEnum.Next()); Assert.AreEqual("one", termsEnum.Term.Utf8ToString()); Assert.AreEqual(5, termsEnum.TotalTermFreq); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsNotNull(dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(5, dpEnum.Freq); for (int i = 0; i < 5; i++) { Assert.AreEqual(i, dpEnum.NextPosition()); } dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsNotNull(dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(5, dpEnum.Freq); for (int i = 0; i < 5; i++) { dpEnum.NextPosition(); Assert.AreEqual(4 * i, dpEnum.StartOffset); Assert.AreEqual(4 * i + 3, dpEnum.EndOffset); } reader.Dispose(); }
private void InitBlock(TermQuery enclosingInstance) { this.enclosingInstance = enclosingInstance; }
// Random rnd is passed in so that the exact same random query may be created // more than once. public static BooleanQuery RandBoolQuery(Random rnd, bool allowMust, int level, string field, string[] vals, Callback cb) { BooleanQuery current = new BooleanQuery(rnd.Next() < 0); for (int i = 0; i < rnd.Next(vals.Length) + 1; i++) { int qType = 0; // term query if (level > 0) { qType = rnd.Next(10); } Query q; if (qType < 3) { q = new TermQuery(new Term(field, vals[rnd.Next(vals.Length)])); } else if (qType < 4) { Term t1 = new Term(field, vals[rnd.Next(vals.Length)]); Term t2 = new Term(field, vals[rnd.Next(vals.Length)]); PhraseQuery pq = new PhraseQuery(); pq.Add(t1); pq.Add(t2); pq.Slop = 10; // increase possibility of matching q = pq; } else if (qType < 7) { q = new WildcardQuery(new Term(field, "w*")); } else { q = RandBoolQuery(rnd, allowMust, level - 1, field, vals, cb); } int r = rnd.Next(10); BooleanClause.Occur occur; if (r < 2) { occur = BooleanClause.Occur.MUST_NOT; } else if (r < 5) { if (allowMust) { occur = BooleanClause.Occur.MUST; } else { occur = BooleanClause.Occur.SHOULD; } } else { occur = BooleanClause.Occur.SHOULD; } current.Add(q, occur); } if (cb != null) { cb.PostCreate(current); } return(current); }
public virtual void TestPhraseQueryInConjunctionScorer() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(termQuery, searcher); searcher.Close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(booleanQuery, searcher); searcher.Close(); directory.Close(); }
public virtual void TestPhraseQueryInConjunctionScorer() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "foobar", Field.Store.YES)); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, termQuery, searcher); reader.Dispose(); writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE)); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES)); writer.AddDocument(doc); reader = writer.GetReader(); writer.Dispose(); searcher = NewSearcher(reader); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, booleanQuery, searcher); reader.Dispose(); directory.Dispose(); }