/// <summary> /// Initializes a new instance of the <see cref="SearchResults" /> class. /// </summary> /// <param name="searcher">The searcher.</param> /// <param name="reader">The reader.</param> /// <param name="docs">The hits.</param> /// <param name="criteria">The criteria.</param> /// <param name="query">The query.</param> public LuceneSearchResults( Searcher searcher, IndexReader reader, TopDocs docs, ISearchCriteria criteria, Query query) { Results = new SearchResults(criteria, null); CreateDocuments(searcher, docs); CreateFacets(reader, query); CreateSuggestions(reader, criteria); }
/// <summary> /// Populates the SearchResult object with data from the specified TopFieldDocs object. /// </summary> /// <param name="result">The SearchResult to be populated.</param> /// <param name="topDocs">The TopDocs object returned by Lucene.</param> /// <param name="categories">The categories.</param> /// <param name="getDoc">A lambda that returns the Lucene document given the doc id.</param> /// <exception cref="ArgumentNullException"> /// </exception> public static void PopulateWith(this SearchResult<Guid> result, TopDocs topDocs, IEnumerable<Category> categories, Func<int, LuceneDocument> getDoc) { if (result == null) throw new ArgumentNullException(nameof(result)); if (topDocs == null) throw new ArgumentNullException(nameof(topDocs)); if (getDoc == null) throw new ArgumentNullException(nameof(getDoc)); result.ItemCount = topDocs.ScoreDocs.Length; result.TotalHits = topDocs.TotalHits; if (result.ItemCount > 0) { var itemsToSkip = (result.PageNumber - 1) * result.ItemsPerPage; var itemsToTake = result.ItemsPerPage; var scoreDocs = topDocs.ScoreDocs .Skip(itemsToSkip) .Take(itemsToTake) .ToList(); var documentIds = new List<Guid>(); for (var i = 0; i < scoreDocs.Count; i++) { var sd = scoreDocs[i]; var doc = getDoc(sd.Doc); if (doc == null) continue; var idField = doc.GetField(Schema.StandardField.ID); var idValue = idField.StringValue(); documentIds.Add(Guid.Parse(idValue)); } result.Items = documentIds; result.Categories = categories ?? Enumerable.Empty<Category>(); result.PageCount = ComputePageCount(result.ItemCount, result.ItemsPerPage); } }
public void DoStandardHighlights(Analyzer analyzer, IndexSearcher searcher, TopDocs hits, Query query, IFormatter formatter) { DoStandardHighlights(analyzer, searcher, hits, query, formatter, false); }
public object SearchPrevIndex(Dictionary <string, string> dic, int pageIndex, int pageSize, LibHandle handle, string lastFileId) { BooleanQuery bQuery = new BooleanQuery(); foreach (var item in dic) { QueryParser parse = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, item.Key, PanGuAnalyzer); Query query = parse.Parse(GetKeyWordsSplitBySpace(item.Value)); parse.SetDefaultOperator(QueryParser.Operator.AND); bQuery.Add(query, BooleanClause.Occur.MUST); } IndexSearcher search = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(IndexDic)), true); Stopwatch stopwatch = Stopwatch.StartNew(); //SortField构造函数第三个字段true为降序,false为升序 Sort sort = new Sort(new SortField("fileId", SortField.STRING, true)); TopDocs docs = search.Search(bQuery, null, 1000, sort); stopwatch.Stop(); SearchResult doclist = new SearchResult(); if (docs != null && docs.totalHits > 0) { doclist.SearchTime = stopwatch.ElapsedMilliseconds; doclist.TotalHits = 0; List <AbstractFileBase> fileList = new List <AbstractFileBase>(); List <string> docIds = new List <string>(); Queue docsQueue = new Queue(); for (int i = (pageIndex - 1) * pageSize; i < docs.totalHits; i++) { Document doc = search.Doc(docs.scoreDocs[i].doc); string fileId = doc.Get("fileId").ToString(); docIds.Add(fileId); fileList.Add(new TextFileInfo() { FileId = fileId, Content = doc.Get("content").ToString(), }); #region 每循环100次,或最后一次循环 if (((i - (pageIndex - 1) * pageSize) % 100 == 0 && i != 0) || i == docs.totalHits - 1) { List <string> allowDocIds = DMPermissionControl.Default.FilterDocIds(handle, DMFuncPermissionEnum.Read, docIds); foreach (AbstractFileBase file in fileList) { if (allowDocIds.Contains(file.FileId)) { if (string.Compare(file.FileId, lastFileId) <= 0) { foreach (object o in docsQueue.ToArray()) { doclist.Docs.Add((AbstractFileBase)o); } return(doclist); } else { if (docsQueue.Count >= pageSize) { docsQueue.Dequeue(); } docsQueue.Enqueue(file); } } } docIds.Clear(); fileList.Clear(); } #endregion } } return(doclist); }
public void TestMultiSearcher() { // setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED); d.Add(f); writer1.AddDocument(d); writer1.Optimize(); writer1.Close(); IndexReader reader1 = IndexReader.Open(ramDir1, true); // setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED); d.Add(f); writer2.AddDocument(d); writer2.Optimize(); writer2.Close(); IndexReader reader2 = IndexReader.Open(ramDir2, true); var searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1, true); searchers[1] = new IndexSearcher(ramDir2, true); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, new StandardAnalyzer(TEST_VERSION)); parser.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; query = parser.Parse("multi*"); Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); // at this point the multisearcher calls combine(query[]) hits = multiSearcher.Search(query, null, 1000); // query = QueryParser.Parse("multi*", FIELD_NAME, new StandardAnalyzer(TEST_VERSION)); Query[] expandedQueries = new Query[2]; expandedQueries[0] = query.Rewrite(reader1); expandedQueries[1] = query.Rewrite(reader2); query = query.Combine(expandedQueries); // create an instance of the highlighter with the tags used to surround // highlighted text Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query)); for (int i = 0; i < hits.TotalHits; i++) { String text = multiSearcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); String highlightedText = highlighter.GetBestFragment(tokenStream, text); Console.WriteLine(highlightedText); } Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestSimpleWithScoring() { const string idField = "id"; const string toField = "movieId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "A random movie", Field.Store.NO)); doc.Add(new TextField("name", "Movie 1", Field.Store.NO)); doc.Add(new TextField(idField, "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "A second random movie", Field.Store.NO)); doc.Add(new TextField("name", "Movie 2", Field.Store.NO)); doc.Add(new TextField(idField, "4", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.Reader); w.Dispose(); // Search for movie via subtitle Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max); TopDocs result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(0, result.ScoreDocs[0].Doc); assertEquals(3, result.ScoreDocs[1].Doc); // Score mode max. joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Max); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); assertEquals(0, result.ScoreDocs[1].Doc); // Score mode total joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Total); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(0, result.ScoreDocs[0].Doc); assertEquals(3, result.ScoreDocs[1].Doc); //Score mode avg joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Avg); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); assertEquals(0, result.ScoreDocs[1].Doc); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (VERBOSE) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random().NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.Reader; w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (VERBOSE) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random().Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (VERBOSE) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random().Next(scoreModeLength); if (VERBOSE) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (VERBOSE) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult, topScoreDocCollector)); // Asserting bit set... if (VERBOSE) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality()); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality()); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.TopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (VERBOSE) { string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }
/// <summary> /// Searches for and displays results. /// </summary> /// <param name="search">The text to search for. Supports file title and contents.</param> /// <param name="writeToConsole">Whether or not to write search status to console (errors still report).</param> public Task <List <string> > SearchFiles(string search, bool writeToConsole = true) { SearchText = search; return(Task.Run(() => { var matches = new List <string>(); if (!IndexDirectoryExists() && !DirectoryReader.IndexExists(fSDirectory)) { GeneralHelper.WriteToConsole($"No index available! Please unpack game assets and generate an index.\n"); return matches; } try { using (Analyzer analyzer = new CustomAnalyzer()) using (IndexReader reader = DirectoryReader.Open(fSDirectory)) { IndexSearcher searcher = new IndexSearcher(reader); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, new[] { "title", "body" }, analyzer) { AllowLeadingWildcard = true }; Query searchTermQuery = queryParser.Parse('*' + QueryParser.Escape(search.Trim()) + '*'); BooleanQuery aggregateQuery = new BooleanQuery() { { searchTermQuery, Occur.MUST } }; if (reader.MaxDoc != 0) { var start = DateTime.Now; if (writeToConsole) { GeneralHelper.WriteToConsole("Search started.\n"); } // perform search TopDocs topDocs = searcher.Search(aggregateQuery, reader.MaxDoc); if (writeToConsole) { GeneralHelper.WriteToConsole($"Search returned {topDocs.ScoreDocs.Length} results in {TimeSpan.FromTicks(DateTime.Now.Subtract(start).Ticks).TotalMilliseconds} ms\n"); } // display results foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { float score = scoreDoc.Score; int docId = scoreDoc.Doc; Document doc = searcher.Doc(docId); matches.Add(doc.Get("path")); } } else { GeneralHelper.WriteToConsole("No documents available. Please generate the index again.\n"); } } } catch { // Checking if the index is corrupt is slower than just letting it fail GeneralHelper.WriteToConsole($"Available index is corrupt. Please rerun the indexer to create a new one.\n"); } return matches.OrderBy(m => m).ToList(); })); }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); TaskScheduler es = Random().NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet <string> delIDs = new ConcurrentHashSet <string>(new HashSet <string>()); ISet <string> delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>()); IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray())); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
private static void DocumentCountImpl(JsonWriter jsonWriter, IndexSearcher searcher, Query query) { TopDocs topDocs = searcher.Search(query, 1); ResponseFormatter.WriteV2CountResult(jsonWriter, topDocs.TotalHits); }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random, cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); int numDocs = TestUtil.NextInt32(Random, 100, 400); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList <BytesRef> ids = new List <BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random.Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.IndexWriter, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
/// <summary> /// /// </summary> /// <remarks></remarks> /// <seealso cref=""/> /// <param name="query"></param> /// <param name="headerItemXmlNodeList"></param> /// <returns></returns> public static SearchResult search(Query query, List<XmlNode> headerItemXmlNodeList) { int n = 0; DatasetManager dm = null; try { dm = new DatasetManager(); n = dm.DatasetRepo.Get().Count; if (n == 0) n = 1000; } catch { n = 1000; } finally { dm.Dispose(); if (n <= 0) n = 1000; } TopDocs docs = searcher.Search(query, n); SearchResult sro = new SearchResult(); sro.PageSize = 10; sro.CurrentPage = 1; sro.NumberOfHits = 100; List<HeaderItem> Header = new List<HeaderItem>(); List<HeaderItem> DefaultHeader = new List<HeaderItem>(); // create id HeaderItem id = new HeaderItem(); id.DisplayName = "ID"; id.Name = "ID"; id.DataType = "Integer"; sro.Id = id; Header.Add(id); DefaultHeader.Add(id); // create entity HeaderItem entity = new HeaderItem(); entity.DisplayName = "Type"; entity.Name = "entity_name"; entity.DataType = "string"; Header.Add(entity); //DefaultHeader.Add(entity); foreach (XmlNode ade in headerItemXmlNodeList) { HeaderItem hi = new HeaderItem(); hi = new HeaderItem(); hi.Name = ade.Attributes.GetNamedItem("lucene_name").Value; hi.DisplayName = ade.Attributes.GetNamedItem("display_name").Value; Header.Add(hi); if (ade.Attributes.GetNamedItem("default_visible_item").Value.ToLower().Equals("yes")) { DefaultHeader.Add(hi); } } List<Row> RowList = new List<Row>(); string valueLastEntity = ""; // var to store last entity value bool moreThanOneEntityFound = false; // var to set, if more than one entity name was found foreach (ScoreDoc sd in docs.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); Row r = new Row(); List<object> ValueList = new List<object>(); ValueList = new List<object>(); ValueList.Add(doc.Get("doc_id")); ValueList.Add(doc.Get("gen_entity_name")); // check if there are more than one entities in the result list if (moreThanOneEntityFound == false && ValueList[1].ToString() != valueLastEntity && valueLastEntity != "") { moreThanOneEntityFound = true; } valueLastEntity = ValueList[1].ToString(); foreach (XmlNode ade in headerItemXmlNodeList) { String fieldType = ade.Attributes.GetNamedItem("type").Value; String luceneName = ade.Attributes.GetNamedItem("lucene_name").Value; if (fieldType.ToLower().Equals("facet_field")) { luceneName = "facet_" + luceneName; } else if (fieldType.ToLower().Equals("category_field")) { luceneName = "category_" + luceneName; } else if (fieldType.ToLower().Equals("property_field")) { luceneName = "property_" + luceneName; } ValueList.Add(doc.Get(luceneName)); } r.Values = ValueList; RowList.Add(r); } // show column of entities, if there are more than one found if (moreThanOneEntityFound == true) { DefaultHeader.Add(entity); } sro.Header = Header; sro.DefaultVisibleHeaderItem = DefaultHeader; sro.Rows = RowList; return sro; }
public async Task <IActionResult> Index(string id, string q, PagerParameters pagerParameters) { var siteSettings = await _siteService.GetSiteSettingsAsync(); Pager pager = new Pager(pagerParameters, siteSettings.PageSize); var indexName = "Search"; if (!String.IsNullOrWhiteSpace(id)) { indexName = id; } if (!_luceneIndexProvider.Exists(indexName)) { return(NotFound()); } if (String.IsNullOrWhiteSpace(q)) { return(View(new SearchIndexViewModel { Pager = pager, IndexName = id, ContentItems = Enumerable.Empty <ContentItem>() })); } var luceneSettings = await _luceneIndexingService.GetLuceneSettingsAsync(); if (luceneSettings == null) { Logger.LogInformation("Couldn't execute search. No Lucene settings was defined."); return(View(new SearchIndexViewModel { HasMoreResults = false, Query = q, Pager = pager, IndexName = id, ContentItems = Enumerable.Empty <ContentItem>() })); } var queryParser = new MultiFieldQueryParser(LuceneSettings.DefaultVersion, luceneSettings.SearchFields, new StandardAnalyzer(LuceneSettings.DefaultVersion)); var query = queryParser.Parse(QueryParser.Escape(q)); List <int> contentItemIds = new List <int>(); _luceneIndexProvider.Search(indexName, searcher => { // Fetch one more result than PageSize to generate "More" links TopScoreDocCollector collector = TopScoreDocCollector.Create(pager.PageSize + 1, true); searcher.Search(query, collector); TopDocs hits = collector.TopDocs(pager.GetStartIndex(), pager.PageSize + 1); foreach (var hit in hits.ScoreDocs) { var d = searcher.Doc(hit.Doc, IdSet); contentItemIds.Add(Convert.ToInt32(d.GetField("ContentItemId").StringValue)); } }); var contentItems = new List <ContentItem>(); foreach (var contentItemId in contentItemIds.Take(pager.PageSize)) { var contentItem = await _contentManager.GetAsync(contentItemId); if (contentItem != null) { contentItems.Add(contentItem); } } var model = new SearchIndexViewModel { HasMoreResults = contentItemIds.Count > pager.PageSize, Query = q, Pager = pager, IndexName = id, ContentItems = contentItems }; return(View(model)); }
public virtual void TestRandomStoredFields() { Directory dir = NewDirectory(); Random rand = Random(); RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20))); //w.w.setNoCFSRatio(0.0); int docCount = AtLeast(200); int fieldCount = TestUtil.NextInt(rand, 1, 5); IList <int?> fieldIDs = new List <int?>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.Tokenized = false; Field idField = NewField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.Add(i); } IDictionary <string, Document> docs = new Dictionary <string, Document>(); if (VERBOSE) { Console.WriteLine("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.Stored = true; for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(idField); string id = "" + i; idField.StringValue = id; docs[id] = doc; if (VERBOSE) { Console.WriteLine("TEST: add doc id=" + id); } foreach (int field in fieldIDs) { string s; if (rand.Next(4) != 3) { s = TestUtil.RandomUnicodeString(rand, 1000); doc.Add(NewField("f" + field, s, customType2)); } else { s = null; } } w.AddDocument(doc); if (rand.Next(50) == 17) { // mixup binding of field name -> Number every so often fieldIDs = CollectionsHelper.Shuffle(fieldIDs); } if (rand.Next(5) == 3 && i > 0) { string delID = "" + rand.Next(i); if (VERBOSE) { Console.WriteLine("TEST: delete doc id=" + delID); } w.DeleteDocuments(new Term("id", delID)); docs.Remove(delID); } } if (VERBOSE) { Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields"); } if (docs.Count > 0) { string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/); for (int x = 0; x < 2; x++) { IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: cycle x=" + x + " r=" + r); } int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string testID = idsList[rand.Next(idsList.Length)]; if (VERBOSE) { Console.WriteLine("TEST: test id=" + testID); } TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1); Assert.AreEqual(1, hits.TotalHits); Document doc = r.Document(hits.ScoreDocs[0].Doc); Document docExp = docs[testID]; for (int i = 0; i < fieldCount; i++) { Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i)); } } r.Dispose(); w.ForceMerge(1); } } w.Dispose(); dir.Dispose(); }
public void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.Indexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.StringValue = "" + i; iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].BinaryValue(), fieldValues[0].BinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
/// <summary> /// Searches the specified phrase in the specified search fields. /// </summary> /// <param name="wiki">The wiki.</param> /// <param name="searchFields">The search fields.</param> /// <param name="phrase">The phrase to search.</param> /// <param name="searchOption">The search options.</param> /// <returns>A list of <see cref="SearchResult"/> items.</returns> public static List <SearchResult> Search(string wiki, SearchField[] searchFields, string phrase, SearchOptions searchOption) { IIndexDirectoryProviderV60 indexDirectoryProvider = Collectors.CollectorsBox.GetIndexDirectoryProvider(wiki); Analyzer analyzer = new SimpleAnalyzer(); IndexSearcher searcher = new IndexSearcher(indexDirectoryProvider.GetDirectory(), false); string[] searchFieldsAsString = (from f in searchFields select f.AsString()).ToArray(); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, searchFieldsAsString, analyzer); if (searchOption == SearchOptions.AllWords) { queryParser.DefaultOperator = QueryParser.Operator.AND; } if (searchOption == SearchOptions.AtLeastOneWord) { queryParser.DefaultOperator = QueryParser.Operator.OR; } if (searchOption == SearchOptions.ExactPhrase) { phrase = String.Format("\"{0}\"", phrase); } try { Query query = queryParser.Parse(phrase); TopDocs topDocs = searcher.Search(query, 100); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b class=\"searchkeyword\">", "</b>"), new QueryScorer(query)); List <SearchResult> searchResults = new List <SearchResult>(topDocs.TotalHits); for (int i = 0; i < Math.Min(100, topDocs.TotalHits); i++) { Document doc = searcher.Doc(topDocs.ScoreDocs[i].Doc); SearchResult result = new SearchResult(); result.DocumentType = DocumentTypeFromString(doc.GetField(SearchField.DocumentType.AsString()).StringValue); result.Relevance = topDocs.ScoreDocs[i].Score * 100; switch (result.DocumentType) { case DocumentType.Page: PageDocument page = new PageDocument(); page.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue; page.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue; page.Title = doc.GetField(SearchField.Title.AsString()).StringValue; TokenStream tokenStream1 = analyzer.TokenStream(SearchField.Title.AsString(), new StringReader(page.Title)); page.HighlightedTitle = highlighter.GetBestFragments(tokenStream1, page.Title, 3, " [...] "); page.Content = doc.GetField(SearchField.Content.AsString()).StringValue; tokenStream1 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(page.Content)); page.HighlightedContent = highlighter.GetBestFragments(tokenStream1, page.Content, 3, " [...] "); result.Document = page; break; case DocumentType.Message: MessageDocument message = new MessageDocument(); message.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue; message.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue; message.DateTime = DateTime.Parse(doc.GetField(SearchField.MessageDateTime.AsString()).StringValue); message.Subject = doc.GetField(SearchField.Title.AsString()).StringValue; message.Body = doc.GetField(SearchField.Content.AsString()).StringValue; TokenStream tokenStream2 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(message.Body)); message.HighlightedBody = highlighter.GetBestFragments(tokenStream2, message.Body, 3, " [...] "); result.Document = message; break; case DocumentType.Attachment: PageAttachmentDocument attachment = new PageAttachmentDocument(); attachment.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue; attachment.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue; attachment.FileName = doc.GetField(SearchField.FileName.AsString()).StringValue; attachment.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue; TokenStream tokenStream3 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(attachment.FileContent)); attachment.HighlightedFileContent = highlighter.GetBestFragments(tokenStream3, attachment.FileContent, 3, " [...] "); result.Document = attachment; break; case DocumentType.File: FileDocument file = new FileDocument(); file.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue; file.FileName = doc.GetField(SearchField.FileName.AsString()).StringValue; file.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue; TokenStream tokenStream4 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(file.FileContent)); file.HighlightedFileContent = highlighter.GetBestFragments(tokenStream4, file.FileContent, 3, " [...]"); result.Document = file; break; } searchResults.Add(result); } searcher.Dispose(); return(searchResults); } catch (ParseException) { return(new List <SearchResult>(0)); } }
public void TestRegexQuery() { const int maxNumFragmentsRequired = 2; query = new RegexQuery(new Term(FIELD_NAME, "ken.*")); searcher = new IndexSearcher(ramDir, true); hits = searcher.Search(query, 100); var scorer = new QueryScorer(query, FIELD_NAME); var highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
/// <summary> /// /// </summary> /// <remarks></remarks> /// <seealso cref=""/> /// <param name="query"></param> /// <param name="headerItemXmlNodeList"></param> /// <returns></returns> public static SearchResult search(Query query, List <XmlNode> headerItemXmlNodeList) { TopDocs docs = searcher.Search(query, 1000); SearchResult sro = new SearchResult(); sro.PageSize = 10; sro.CurrentPage = 1; sro.NumberOfHits = 100; List <HeaderItem> Header = new List <HeaderItem>(); List <HeaderItem> DefaultHeader = new List <HeaderItem>(); // create id HeaderItem id = new HeaderItem(); id.DisplayName = "ID"; id.Name = "ID"; id.DataType = "Integer"; sro.Id = id; Header.Add(id); DefaultHeader.Add(id); foreach (XmlNode ade in headerItemXmlNodeList) { HeaderItem hi = new HeaderItem(); hi = new HeaderItem(); hi.Name = ade.Attributes.GetNamedItem("lucene_name").Value; hi.DisplayName = ade.Attributes.GetNamedItem("display_name").Value; Header.Add(hi); if (ade.Attributes.GetNamedItem("default_visible_item").Value.ToLower().Equals("yes")) { DefaultHeader.Add(hi); } } List <Row> RowList = new List <Row>(); foreach (ScoreDoc sd in docs.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); Row r = new Row(); List <object> ValueList = new List <object>(); ValueList = new List <object>(); ValueList.Add(doc.Get("doc_id")); foreach (XmlNode ade in headerItemXmlNodeList) { String fieldType = ade.Attributes.GetNamedItem("type").Value; String luceneName = ade.Attributes.GetNamedItem("lucene_name").Value; if (fieldType.ToLower().Equals("facet_field")) { luceneName = "facet_" + luceneName; } else if (fieldType.ToLower().Equals("category_field")) { luceneName = "category_" + luceneName; } else if (fieldType.ToLower().Equals("property_field")) { luceneName = "property_" + luceneName; } ValueList.Add(doc.Get(luceneName)); } r.Values = ValueList; RowList.Add(r); } sro.Header = Header; sro.DefaultVisibleHeaderItem = DefaultHeader; sro.Rows = RowList; return(sro); }
public void TestConstantScoreMultiTermQuery() { numHighlights = 0; query = new WildcardQuery(new Term(FIELD_NAME, "ken*")); ((WildcardQuery) query).RewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; searcher = new IndexSearcher(ramDir, true); // can't rewrite ConstantScore if you want to highlight it - // it rewrites to ConstantScoreQuery which cannot be highlighted // query = unReWrittenQuery.Rewrite(reader); Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); hits = searcher.Search(query, null, 1000); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); // try null field hits = searcher.Search(query, null, 1000); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, null); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); // try default field hits = searcher.Search(query, null, 1000); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
/// <summary> /// /// </summary> /// <remarks></remarks> /// <seealso cref=""/> /// <param name="origQuery"></param> /// <param name="queryFilter"></param> /// <param name="searchtext"></param> /// <returns></returns> public static IEnumerable <TextValue> doTextSearch(Query origQuery, String queryFilter, String searchtext) { String filter = queryFilter; BooleanQuery query = new BooleanQuery(); query.Add(origQuery, Occur.MUST); if (!filter.ToLower().StartsWith("ng_")) { filter = "ng_" + filter; } if (filter.ToLower().Equals("ng_all")) { filter = "ng_all"; queryFilter = "ng_all"; } HashSet <string> uniqueText = new HashSet <string>(); searchtext = searchtext.ToLower(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, filter, new KeywordAnalyzer()); parser.DefaultOperator = QueryParser.Operator.AND; Query X1 = parser.Parse(searchtext); query.Add(X1, Occur.MUST); // Query query = parser.Parse("tree data"); TopDocs tds = searcher.Search(query, 50); QueryScorer scorer = new QueryScorer(query, searchtext); Analyzer analyzer = new NGramAnalyzer(); List <TextValue> autoCompleteTextList = new List <TextValue>(); foreach (ScoreDoc sd in tds.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); String docId = doc.GetField("doc_id").StringValue; TermQuery q1 = new TermQuery(new Term("id", docId.ToLower())); TermQuery q0 = new TermQuery(new Term("field", queryFilter.ToLower())); QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "value", new KeywordAnalyzer()); parser1.DefaultOperator = QueryParser.Operator.AND; Query q2 = parser1.Parse(searchtext); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, Occur.MUST); q3.Add(q2, Occur.MUST); q3.Add(q0, Occur.MUST); TopDocs tdAutoComp = autoCompleteSearcher.Search(q3, 100); foreach (ScoreDoc sdAutoComp in tdAutoComp.ScoreDocs) { Document docAutoComp = autoCompleteSearcher.Doc(sdAutoComp.Doc); String toAdd = docAutoComp.GetField("value").StringValue; if (!uniqueText.Contains(toAdd)) { TextValue tv = new TextValue(); tv.Name = toAdd; tv.Value = toAdd; autoCompleteTextList.Add(tv); uniqueText.Add(toAdd); } } if (autoCompleteTextList.Count > 7) { break; } } return(autoCompleteTextList); }
public void _Search() { //string request = (searchParams as string); string old_request = ""; string new_request = ""; while (true) { lock (_request) { new_request = _request; } if (new_request != old_request) { old_request = new_request; if (new_request.Length != 0) { IndexReader reader = null; Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); string index_path = Owl.Properties.Settings.Default.IndexPath; try { reader = IndexReader.Open(index_path); } catch { status.SearchStatus = string.Format("Problems while opening Index: has it been created in {0} ?", Owl.Properties.Settings.Default.IndexPath); } int nb_docs = 0; int found_docs = 0; if (reader != null) { try { Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); //QueryParser parser = new QueryParser("contents", analyzer); MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "contents", "path" }, analyzer); Query query = parser.Parse(new_request); SimpleCall sc = delegate() { resultItems.Clear(); }; Application.Current.Dispatcher.Invoke(DispatcherPriority.Background, sc); //Hits hits = searcher.Search(query); TopDocs docs = searcher.Search(query, null, 100); int num_doc = 1; foreach (ScoreDoc score_doc in docs.scoreDocs) { if (HasRequestChanged(new_request)) { break; } Document doc = searcher.Doc(score_doc.doc); System.String path = doc.Get("path"); // SimpleCall sc2 = delegate() { resultItems.Add(new Result(string.Format("{0} - {2} ({1})%\n{3}", num_doc++, (int)((score_doc.score * 100) / docs.GetMaxScore()), System.IO.Path.GetFileName(path), System.IO.Path.GetDirectoryName(path)), path)); }; Application.Current.Dispatcher.Invoke(DispatcherPriority.Background, sc2); // } found_docs = docs.scoreDocs.Length; nb_docs = reader.NumDocs(); searcher.Close(); } //catch (TokenMgrError) //{ } catch (Exception e) { status.SearchStatus = string.Format("Problems with request {0} ", new_request); Log.Error(e); } finally { reader.Close(); stopWatch.Stop(); } } //--- status.SearchStatus = string.Format("{0} results for '{3}' in {1} docs (took {2} ms)", found_docs, nb_docs, stopWatch.ElapsedMilliseconds, new_request); } } else { _mre.Reset(); _mre.WaitOne(); } //Thread.Sleep(250); } }
private IEnumerable <IAuditEntry> getResults(TopDocs ids, int page, IndexSearcher searcher) { int skip = page * 20; return(ids.ScoreDocs.Reverse().Skip(skip).Take(20).Select(x => new BasicAuditEntry(searcher.Doc(x.Doc), x.Doc))); }
public override void Run() { try { IndexReader lastReader = null; IndexSearcher lastSearcher = null; while (operations.DecrementAndGet() >= 0) { // bias toward a recently changed doc int id = rand.Next(100) < 25 ? outerInstance.lastId : rand.Next(ndocs); // when indexing, we update the index, then the model // so when querying, we should first check the model, and then the index long val; DirectoryReader r; lock (outerInstance) { val = outerInstance.committedModel[id]; r = outerInstance.reader; r.IncRef(); } if (Verbose) { Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": s id=" + id + " val=" + val + " r=" + r.Version); } // sreq = req("wt","json", "q","id:"+Integer.toString(id), "omitHeader","true"); IndexSearcher searcher; if (r == lastReader) { // Just re-use lastSearcher, else // newSearcher may create too many thread // pools (ExecutorService): searcher = lastSearcher; } else { searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION outerInstance, #endif r); lastReader = r; lastSearcher = searcher; } Query q = new TermQuery(new Term("id", Convert.ToString(id))); TopDocs results = searcher.Search(q, 10); if (results.TotalHits == 0 && tombstones) { // if we couldn't find the doc, look for its tombstone q = new TermQuery(new Term("id", "-" + Convert.ToString(id))); results = searcher.Search(q, 1); if (results.TotalHits == 0) { if (val == -1L) { // expected... no doc was added yet r.DecRef(); continue; } Assert.Fail("No documents or tombstones found for id " + id + ", expected at least " + val + " reader=" + r); } } if (results.TotalHits == 0 && !tombstones) { // nothing to do - we can't tell anything from a deleted doc without tombstones } else { // we should have found the document, or its tombstone if (results.TotalHits != 1) { Console.WriteLine("FAIL: hits id:" + id + " val=" + val); foreach (ScoreDoc sd in results.ScoreDocs) { Document doc = r.Document(sd.Doc); Console.WriteLine(" docID=" + sd.Doc + " id:" + doc.Get("id") + " foundVal=" + doc.Get(outerInstance.field)); } Assert.Fail("id=" + id + " reader=" + r + " totalHits=" + results.TotalHits); } Document doc_ = searcher.Doc(results.ScoreDocs[0].Doc); long foundVal = Convert.ToInt64(doc_.Get(outerInstance.field)); if (foundVal < Math.Abs(val)) { Assert.Fail("foundVal=" + foundVal + " val=" + val + " id=" + id + " reader=" + r); } } r.DecRef(); } } catch (Exception e) { operations.Value = ((int)-1L); Console.WriteLine(Thread.CurrentThread.Name + ": FAILED: unexpected exception"); Console.WriteLine(e.StackTrace); throw new Exception(e.Message, e); } }
public void assertFromTestData(int[] codePointTable) { if (VERBOSE) { Console.WriteLine("TEST: codePointTable=" + codePointTable); } Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt"); TextReader reader = new StreamReader(stream, Encoding.UTF8); int bits = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); int terms = (int)Math.Pow(2, bits); Store.Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); for (int i = 0; i < terms; i++) { field.SetStringValue(MapInt(codePointTable, i)); writer.AddDocument(doc); } IndexReader r = writer.Reader; IndexSearcher searcher = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + searcher); } // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation, // otherwise scores are different! searcher.Similarity = (new DefaultSimilarity()); writer.Dispose(); String line; int lineNum = 0; while ((line = reader.ReadLine()) != null) { lineNum++; String[] @params = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); String query = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture)); int prefix = int.Parse(@params[1], CultureInfo.InvariantCulture); int pqSize = int.Parse(@params[2], CultureInfo.InvariantCulture); float minScore = float.Parse(@params[3], CultureInfo.InvariantCulture); #pragma warning disable 612, 618 SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix); #pragma warning restore 612, 618 q.MultiTermRewriteMethod = new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize); int expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); TopDocs docs = searcher.Search(q, expectedResults); assertEquals(expectedResults, docs.TotalHits); for (int i = 0; i < expectedResults; i++) { String[] scoreDoc = reader.ReadLine().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc); assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon); } } r.Dispose(); dir.Dispose(); }
public void TestSimple() { const string idField = "id"; const string toField = "productId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "random text", Field.Store.NO)); doc.Add(new TextField("name", "name1", Field.Store.NO)); doc.Add(new TextField(idField, "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "more random text", Field.Store.NO)); doc.Add(new TextField("name", "name2", Field.Store.NO)); doc.Add(new TextField(idField, "4", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.Reader); w.Dispose(); // Search for product Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher, ScoreMode.None); TopDocs result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(4, result.ScoreDocs[0].Doc); assertEquals(5, result.ScoreDocs[1].Doc); joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher, ScoreMode.None); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(1, result.ScoreDocs[0].Doc); assertEquals(2, result.ScoreDocs[1].Doc); // Search for offer joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher, ScoreMode.None); result = indexSearcher.Search(joinQuery, 10); assertEquals(1, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
/// <summary> /// Highlights the top passages from a single field. /// </summary> /// <param name="field">field name to highlight. Must have a stored string value and also be indexed with offsets.</param> /// <param name="query">query to highlight.</param> /// <param name="searcher">searcher that was previously used to execute the query.</param> /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param> /// <returns> /// Array of formatted snippets corresponding to the documents in <paramref name="topDocs"/>. /// If no highlights were found for a document, the /// first sentence for the field will be returned. /// </returns> /// <exception cref="IOException">if an I/O error occurred during processing</exception> /// <exception cref="ArgumentException">if <paramref name="field"/> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception> public virtual string[] Highlight(string field, Query query, IndexSearcher searcher, TopDocs topDocs) { return(Highlight(field, query, searcher, topDocs, 1)); }
private void RecordResultsAlreadySeenForDistinctQuery(IndexSearcher indexSearcher, TopDocs search, int start) { if (fieldsToFetch.IsDistinctQuery == false) { return; } // add results that were already there in previous pages var min = Math.Min(start, search.totalHits); for (int i = 0; i < min; i++) { Document document = indexSearcher.Doc(search.scoreDocs[i].doc); var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch); alreadyReturned.Add(indexQueryResult.Projection); } }
/// <summary> /// Highlights the top-N passages from a single field. /// </summary> /// <param name="field"> /// field name to highlight. /// Must have a stored string value and also be indexed with offsets. /// </param> /// <param name="query">query to highlight.</param> /// <param name="searcher">searcher that was previously used to execute the query.</param> /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param> /// <param name="maxPassages">The maximum number of top-N ranked passages used to form the highlighted snippets.</param> /// <returns> /// Array of formatted snippets corresponding to the documents in <paramref name="topDocs"/>. /// If no highlights were found for a document, the /// first <paramref name="maxPassages"/> sentences from the /// field will be returned. /// </returns> /// <exception cref="IOException">if an I/O error occurred during processing</exception> /// <exception cref="ArgumentException">Illegal if <paramref name="field"/> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception> public virtual string[] Highlight(string field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) { IDictionary <string, string[]> res = HighlightFields(new string[] { field }, query, searcher, topDocs, new int[] { maxPassages }); string[] result; res.TryGetValue(field, out result); return(result); }
public void TestMinShouldMatch() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); string[] docs = new string[] { @"this is the end of the world right", @"is this it or maybe not", @"this is the end of the universe as we know it", @"there is the famous restaurant at the end of the universe" }; for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES)); doc.Add(NewTextField(@"field", docs[i], Field.Store.NO)); w.AddDocument(doc); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 0.5f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 2.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 0.49f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 1.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 1.0f; query.HighFreqMinimumNumberShouldMatch = 4.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0.0f); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); // doc 2 and 3 only get a score from low freq terms assertEquals( new JCG.HashSet <string> { @"2", @"3" }, new JCG.HashSet <string> { r.Document(search.ScoreDocs[1].Doc).Get(@"id"), r.Document(search.ScoreDocs[2].Doc).Get(@"id") }, aggressive: false); } { // only high freq terms around - check that min should match is applied CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "the")); query.LowFreqMinimumNumberShouldMatch = 1.0f; query.HighFreqMinimumNumberShouldMatch = 2.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 4); } { // only high freq terms around - check that min should match is applied CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "the")); query.LowFreqMinimumNumberShouldMatch = 1.0f; query.HighFreqMinimumNumberShouldMatch = 2.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 2); assertEquals( new JCG.HashSet <string> { @"0", @"2" }, new JCG.HashSet <string> { r.Document(search.ScoreDocs[0].Doc).Get(@"id"), r.Document(search.ScoreDocs[1].Doc).Get(@"id") }, aggressive: false); } r.Dispose(); w.Dispose(); dir.Dispose(); }
/// <summary> /// Highlights the top passages from multiple fields. /// <para/> /// Conceptually, this behaves as a more efficient form of: /// <code> /// IDictionary<string, string[]> m = new Dictionary<string, string[]>(); /// foreach (string field in fields) /// { /// m[field] = Highlight(field, query, searcher, topDocs); /// } /// return m; /// </code> /// </summary> /// <param name="fields">field names to highlight. Must have a stored string value and also be indexed with offsets.</param> /// <param name="query">query to highlight.</param> /// <param name="searcher">searcher that was previously used to execute the query.</param> /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param> /// <returns> /// <see cref="T:IDictionary{string, string[]}"/> keyed on field name, containing the array of formatted snippets /// corresponding to the documents in <paramref name="topDocs"/>. /// If no highlights were found for a document, the /// first sentence from the field will be returned. /// </returns> /// <exception cref="IOException">if an I/O error occurred during processing</exception> /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception> public virtual IDictionary <string, string[]> HighlightFields(string[] fields, Query query, IndexSearcher searcher, TopDocs topDocs) { int[] maxPassages = new int[fields.Length]; Arrays.Fill(maxPassages, 1); return(HighlightFields(fields, query, searcher, topDocs, maxPassages)); }
public void DoSearching(Query unReWrittenQuery) { searcher = new IndexSearcher(ramDir, true); // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) // you must use a rewritten query! query = unReWrittenQuery.Rewrite(reader); Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); hits = searcher.Search(query, null, 1000); }
/// <summary> /// Highlights the top-N passages from multiple fields. /// <para/> /// Conceptually, this behaves as a more efficient form of: /// <code> /// IDictionary<string, string[]> m = new Dictionary<string, string[]>(); /// foreach (string field in fields) /// { /// m[field] = Highlight(field, query, searcher, topDocs, maxPassages); /// } /// return m; /// </code> /// </summary> /// <param name="fields">field names to highlight. Must have a stored string value and also be indexed with offsets.</param> /// <param name="query">query to highlight.</param> /// <param name="searcher">searcher that was previously used to execute the query.</param> /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param> /// <param name="maxPassages">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param> /// <returns> /// <see cref="T:IDictionary{string, string[]}"/> keyed on field name, containing the array of formatted snippets /// corresponding to the documents in <paramref name="topDocs"/>. /// If no highlights were found for a document, the /// first <paramref name="maxPassages"/> sentences from the /// field will be returned. /// </returns> /// <exception cref="IOException">if an I/O error occurred during processing</exception> /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception> public virtual IDictionary <string, string[]> HighlightFields(string[] fields, Query query, IndexSearcher searcher, TopDocs topDocs, int[] maxPassages) { ScoreDoc[] scoreDocs = topDocs.ScoreDocs; int[] docids = new int[scoreDocs.Length]; for (int i = 0; i < docids.Length; i++) { docids[i] = scoreDocs[i].Doc; } return(HighlightFields(fields, query, searcher, docids, maxPassages)); }
public void DoStandardHighlights(Analyzer analyzer, IndexSearcher searcher, TopDocs hits, Query query, IFormatter formatter, bool expandMT) { IFragmenter frag = new SimpleFragmenter(20); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; IScorer scorer = null; TokenStream tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); if (Mode == QUERY) { scorer = new QueryScorer(query); } else if (Mode == QUERY_TERM) { scorer = new QueryTermScorer(query); } var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = frag}; String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } }
/// <summary> /// Rescore an initial first-pass <seealso cref="TopDocs"/>. /// </summary> /// <param name="searcher"> <seealso cref="IndexSearcher"/> used to produce the /// first pass topDocs </param> /// <param name="firstPassTopDocs"> Hits from the first pass /// search. It's very important that these hits were /// produced by the provided searcher; otherwise the doc /// IDs will not match! </param> /// <param name="topN"> How many re-scored hits to return </param> public abstract TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN);
public void TestNumericRangeQuery() { // doesn't currently highlight, but make sure it doesn't cause exception either query = NumericRangeQuery.NewIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true); searcher = new IndexSearcher(ramDir, true); hits = searcher.Search(query, 100); int maxNumFragmentsRequired = 2; QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(NUMERIC_FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); //Console.WriteLine("\t" + result); } }
/// <summary> /// Creates result document collection from Lucene documents. /// </summary> /// <param name="searcher">The searcher.</param> /// <param name="topDocs">The hits.</param> private void CreateDocuments(Searcher searcher, TopDocs topDocs) { // if no documents found return if (topDocs == null) return; var entries = new List<ResultDocument>(); // get total hits var totalCount = topDocs.TotalHits; var recordsToRetrieve = Results.SearchCriteria.RecordsToRetrieve; var startIndex = Results.SearchCriteria.StartingRecord; if (recordsToRetrieve > totalCount) recordsToRetrieve = totalCount; for (var index = startIndex; index < startIndex + recordsToRetrieve; index++) { if (index >= totalCount) break; var document = searcher.Doc(topDocs.ScoreDocs[index].Doc); var doc = new ResultDocument(); var documentFields = document.GetFields(); using (var fi = documentFields.GetEnumerator()) { while (fi.MoveNext()) { if (fi.Current != null) { var field = fi.Current; // make sure document field doens't exist, if it does, simply add another value if (doc.ContainsKey(field.Name)) { var existingField = doc[field.Name] as DocumentField; if (existingField != null) existingField.AddValue(field.StringValue); } else // add new { doc.Add(new DocumentField(field.Name, field.StringValue)); } } } } entries.Add(doc); } var searchDocuments = new ResultDocumentSet { Name = "Items", Documents = entries.ToArray(), TotalCount = totalCount }; Results.Documents = new[] { searchDocuments }; }
/** * Split a given index into 3 indexes for training, test and cross validation tasks respectively * * @param originalIndex an {@link AtomicReader} on the source index * @param trainingIndex a {@link Directory} used to write the training index * @param testIndex a {@link Directory} used to write the test index * @param crossValidationIndex a {@link Directory} used to write the cross validation index * @param analyzer {@link Analyzer} used to create the new docs * @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used * @throws IOException if any writing operation fails on any of the indexes */ public void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames) { // create IWs for train / test / cv IDXs IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer)); try { int size = originalIndex.MaxDoc; IndexSearcher indexSearcher = new IndexSearcher(originalIndex); TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), Int32.MaxValue); // set the type to be indexed, stored, with term vectors FieldType ft = new FieldType(TextField.TYPE_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; int b = 0; // iterate over existing documents foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { // create a new document for indexing Document doc = new Document(); if (fieldNames != null && fieldNames.Length > 0) { foreach (String fieldName in fieldNames) { doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft)); } } else { foreach (IndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields) { if (storableField.ReaderValue != null) { doc.Add(new Field(storableField.Name, storableField.ReaderValue, ft)); } else if (storableField.BinaryValue != null) { doc.Add(new Field(storableField.Name, storableField.BinaryValue, ft)); } else if (storableField.StringValue != null) { doc.Add(new Field(storableField.Name, storableField.StringValue, ft)); } else if (storableField.NumericValue != null) { doc.Add(new Field(storableField.Name, storableField.NumericValue.ToString(), ft)); } } } // add it to one of the IDXs if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio) { testWriter.AddDocument(doc); } else if (cvWriter.MaxDoc < size * _crossValidationRatio) { cvWriter.AddDocument(doc); } else { trainingWriter.AddDocument(doc); } b++; } } catch (Exception e) { throw new IOException("Exceptio in DatasetSplitter", e); } finally { testWriter.Commit(); cvWriter.Commit(); trainingWriter.Commit(); // close IWs testWriter.Dispose(); cvWriter.Dispose(); trainingWriter.Dispose(); } }
public virtual void TestArbitraryFields() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int NUM_DOCS = AtLeast(27); if (VERBOSE) { Console.WriteLine("TEST: " + NUM_DOCS + " docs"); } int[] fieldsPerDoc = new int[NUM_DOCS]; int baseCount = 0; for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int fieldCount = TestUtil.NextInt32(Random, 1, 17); fieldsPerDoc[docCount] = fieldCount - 1; int finalDocCount = docCount; if (VERBOSE) { Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount); } int finalBaseCount = baseCount; baseCount += fieldCount - 1; w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount)); } IndexReader r = w.GetReader(); w.Dispose(); IndexSearcher s = NewSearcher(r); int counter = 0; for (int id = 0; id < NUM_DOCS; id++) { if (VERBOSE) { Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter); } TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; Document doc = s.Doc(docID); int endCounter = counter + fieldsPerDoc[id]; while (counter < endCounter) { string name = "f" + counter; int fieldID = counter % 10; bool stored = (counter & 1) == 0 || fieldID == 3; bool binary = fieldID == 3; bool indexed = fieldID != 3; string stringValue; if (fieldID != 3 && fieldID != 9) { stringValue = "text " + counter; } else { stringValue = null; } // stored: if (stored) { IIndexableField f = doc.GetField(name); Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); if (binary) { Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); BytesRef b = f.GetBinaryValue(); Assert.IsNotNull(b); Assert.AreEqual(10, b.Length); for (int idx = 0; idx < 10; idx++) { Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]); } } else { Debug.Assert(stringValue != null); Assert.AreEqual(stringValue, f.GetStringValue()); } } if (indexed) { bool tv = counter % 2 == 1 && fieldID != 9; if (tv) { Terms tfv = r.GetTermVectors(docID).GetTerms(name); Assert.IsNotNull(tfv); TermsEnum termsEnum = tfv.GetIterator(null); Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(1, dpEnum.NextPosition()); Assert.AreEqual(new BytesRef("text"), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(0, dpEnum.NextPosition()); Assert.IsNull(termsEnum.Next()); // TODO: offsets } else { Fields vectors = r.GetTermVectors(docID); Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null); } BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST); TopDocs hits2 = s.Search(bq, 1); Assert.AreEqual(1, hits2.TotalHits); Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc); bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST); TopDocs hits3 = s.Search(bq, 1); Assert.AreEqual(1, hits3.TotalHits); Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc); } counter++; } } r.Dispose(); dir.Dispose(); }
private void RetrieveMatches( ISearchWrapper wrapper, TopDocs hits, IList<MatchingDocument> matchList, Func<MatchingDocument,bool> doesMatch, int first, int count) { var scoreDocs = hits.scoreDocs; // Find the earliest acceptable document int docIndex = 0; Document doc; do { doc = wrapper.IndexSearcher.doc(scoreDocs[docIndex].doc); if (doesMatch(new MatchingDocument(doc))) break; ++docIndex; } while (docIndex < scoreDocs.Length); // Find the document that matches 'first' index int matchIndex = 0; while (matchIndex < first && docIndex < scoreDocs.Length) { doc = wrapper.IndexSearcher.doc(scoreDocs[docIndex].doc); ++docIndex; if (doesMatch(new MatchingDocument(doc))) ++matchIndex; } // Find the remaining matches while (matchList.Count < count && docIndex < scoreDocs.Length) { doc = wrapper.IndexSearcher.doc(scoreDocs[docIndex].doc); ++docIndex; var matchingDoc = new MatchingDocument(doc); if (doesMatch(matchingDoc)) { matchList.Add(matchingDoc); } } }
protected override Result <LightweightHitData> CreateResults(N2.Persistence.Search.Query query, IndexSearcher s, TopDocs hits) { var result = new Result <LightweightHitData>(); result.Total = hits.TotalHits; var resultHits = hits.ScoreDocs.Skip(query.SkipHits).Take(query.TakeHits).Select(hit => { var doc = s.Doc(hit.Doc); int id = int.Parse(doc.Get("ID")); return(new Hit <LightweightHitData> { Content = new LightweightHitData { ID = id, AlteredPermissions = (Security.Permission) int.Parse(doc.Get("AlteredPermissions")), State = (ContentState)int.Parse(doc.Get("State")), Visible = Convert.ToBoolean(doc.Get("Visible")), AuthorizedRoles = doc.Get("Roles").Split(' '), Path = doc.Get("Path") }, Title = doc.Get("Title"), Url = doc.Get("Url"), Score = hit.Score }); }).ToList(); result.Hits = resultHits; result.Count = resultHits.Count; return(result); }
public virtual void TestRollingUpdates_Mem() { Random random = new Random(Random.Next()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); //provider.register(new MemoryCodec()); if ((!"Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) && LuceneTestCase.Random.NextBoolean()) { Codec.Default = TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(LuceneTestCase.Random.nextBoolean(), random.NextSingle())); } MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * LuceneTestCase.Random.NextDouble() : 5 * LuceneTestCase.Random.NextDouble()))); if (VERBOSE) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).SetStringValue(myID); Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (VERBOSE) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && LuceneTestCase.Random.Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = LuceneTestCase.Random.NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.GetSizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }
private static IList <SearchResult> RetrieveSearchResults(IndexSearcher indexSearcher, TopDocs hits) { //iterate over the results. var results = hits.ScoreDocs.AsQueryable() .OrderByDescending(hit => hit.Score) .ToList() .Select(hit => { var jsonDocumentField = indexSearcher.Doc(hit.Doc).GetField(Constants.JsonDocumentFieldName); var fieldDoc = (hit as FieldDoc); if (fieldDoc != null && Double.IsNaN(hit.Score) && fieldDoc.fields.Length > 0 && fieldDoc.fields[0] is float) { hit.Score = (float)fieldDoc.fields[0]; //TODO: is this really true? } if (jsonDocumentField == null) { return new SearchResult { Score = hit.Score, LuceneDocId = hit.Doc, Document = null } } ; return(new SearchResult { Score = hit.Score, LuceneDocId = hit.Doc, Document = JsonConvert.DeserializeObject <JsonDocumentDto>(jsonDocumentField.StringValue) }); }) .ToList(); return(results); }