public override void SetUp() { base.SetUp(); // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.YES); doc.Add(field); Terms = new SortedSet<BytesRef>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; Terms.Add(new BytesRef(s)); writer.AddDocument(doc); } TermsAutomaton = BasicAutomata.MakeStringUnion(Terms); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); PayloadHelper helper = new PayloadHelper(); Searcher_Renamed = helper.SetUp(Random(), Similarity, 1000); IndexReader = Searcher_Renamed.IndexReader; }
public static void AfterClass() { Searcher = null; Reader.Dispose(); Reader = null; Directory.Dispose(); Directory = null; }
public static void AfterClass() { Reader.Dispose(); Directory.Dispose(); Searcher = null; Reader = null; Directory = null; SimplePayloadAnalyzer = null; }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(FIELD, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public static void search(String indexDir, String q) { Directory dir = FSDriecotry.Open(new System.IO.FileInfo(indexDir)); IndexSearcher searcher = new IndexSearcher(dir, true); QueryParser parser = new QueryParser("contents", new StandardAnalyzer(Version.LUCENE_CURRENT)); Query query = parser.Parser(q); Lucene.Net.Saerch.TopDocs hits = searher.Search(query, 10); System.Console.WriteLine("Found " + hits.totalHits + " document(s) that matched query '" + q + "':"); for (int i = 0; i < hits.scoreDocs.Length; i++) { ScoreDoc scoreDoc = hits.ScoreDoc[i]; Document doc = searcher.Doc(scoreDoc.doc); System.Console.WriteLine(doc.Get("filename")); } searcher.Close(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") })); writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") })); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); field.StringValue = "quick brown fox"; iw.AddDocument(doc); field.StringValue = "jumps over lazy broun dog"; iw.AddDocument(doc); field.StringValue = "jumps over extremely very lazy broxn dog"; iw.AddDocument(doc); Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestRandomSearchPerformance() { IndexSearcher searcher = new IndexSearcher(Reader); foreach (Term t in SampleTerms) { TermQuery query = new TermQuery(t); TopDocs topDocs = searcher.Search(query, 10); Assert.IsTrue(topDocs.TotalHits > 0); } }
protected internal virtual void SmokeTestSearcher(IndexSearcher s) { RunQuery(s, new TermQuery(new Term("body", "united"))); RunQuery(s, new TermQuery(new Term("titleTokenized", "states"))); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("body", "united")); pq.Add(new Term("body", "states")); RunQuery(s, pq); }
/// <summary> /// Searches the datasource using the specified criteria. Criteria is parsed by the query builder specified by /// <typeparamref /// name="QueryBuilderType" /> /// . /// </summary> /// <param name="scope">Name of the application.</param> /// <param name="criteria">The criteria.</param> /// <returns></returns> /// <exception cref="VirtoCommerce.Search.Providers.Lucene.LuceneSearchException"></exception> public virtual ISearchResults Search(string scope, ISearchCriteria criteria) { TopDocs docs = null; var folderName = this.GetFolderName(scope, criteria.DocumentType); var dir = FSDirectory.Open(new DirectoryInfo(this.GetDirectoryPath(folderName))); var searcher = new IndexSearcher(dir); var q = (Query)this.QueryBuilder.BuildQuery(criteria); Debug.WriteLine("Search Lucene Query:{0}", (object)q.ToString()); try { var numDocs = criteria.StartingRecord + criteria.RecordsToRetrieve; if (criteria.Sort != null) { var fields = criteria.Sort.GetSort(); docs = searcher.Search( q, null, numDocs, new Sort( fields.Select(field => new SortField(field.FieldName, field.DataType, field.IsDescending)) .ToArray())); } else { docs = searcher.Search(q, numDocs); } } catch (Exception ex) { throw new LuceneSearchException("Search exception", ex); } var results = new LuceneSearchResults(searcher, searcher.IndexReader, docs, criteria, q); // Cleanup here searcher.IndexReader.Dispose(); searcher.Dispose(); return results.Results; }
private static IEnumerable <Artifact> _mapLuceneToDataList(IEnumerable <ScoreDoc> hits, IndexSearcher searcher) { return(hits.Select(hit => _mapLuceneDocumentToData(searcher.Doc(hit.Doc))).ToList()); }
private void ReleaseSearcher(IndexSearcher aSearcher) { // don't check if open - always decRef // don't decrement the private searcher - could have been swapped aSearcher.IndexReader.DecRef(); }
public static List<List<FacetReturn>> GetFacets(List<SearchStringModel> _searchQuery) { var ret = new List<List<FacetReturn>>(); var facets = Context.ContentDatabase.GetItem(Constants.FacetFolder).Children; foreach (Item facet in facets) { if (facet.Fields["Enabled"].Value == "1") { var type = Activator.CreateInstance(Type.GetType(facet.Fields["Type"].Value)); if ((type as IFacet).IsNotNull()) { var locationOverride = GetLocationOverride(_searchQuery); var indexName = BucketManager.GetContextIndex(Context.ContentDatabase.GetItem(locationOverride)); using (var searcher = new IndexSearcher(indexName)) using (var context = new SortableIndexSearchContext(searcher.Index)) { var query = SearchHelper.GetBaseQuery(_searchQuery, locationOverride); var queryBase = searcher.ContructQuery(query); var searchBitArray = new QueryFilter(queryBase).Bits(context.Searcher.GetIndexReader()); var res = ((IFacet)type).Filter(queryBase, _searchQuery, locationOverride, searchBitArray); ret.Add(res); } } } } return ret; }
/// <summary> /// An extension of Item that allows you to launch a Search from an item /// </summary> /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns> /// <param name="startLocationItem">The start location of the search</param> /// <param name="queryParser">The raw JSON Parse query</param> /// <param name="hitCount">This will output the hitCount of the search</param> /// <param name="indexName">Force query to run on a particular index</param> public static IEnumerable<SitecoreItem> Search(Query rawLuceneQuery, out int hitCount, int pageSize = 20, int pageNumber = 1, string indexName = "itembuckets_buckets") { using (var searcher = new IndexSearcher(indexName)) { var keyValuePair = searcher.RunQuery(rawLuceneQuery, pageSize, pageNumber); hitCount = keyValuePair.Key; return keyValuePair.Value; } }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; if (Random.NextBoolean()) { facets = new FastTaxonomyFacetCounts(taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(); if (Random.NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: string result; using (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true }) { PrintTaxonomyStats.PrintStats(taxoReader, w, true); } result = bos.ToString(); } Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public void TestNestedSorting() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); IList <Document> docs = new List <Document>(); Document document = new Document(); document.Add(new StringField("field2", "a", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "b", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "a", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "d", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "b", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "f", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "c", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "h", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "d", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "j", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "f", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "l", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "g", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); // This doc will not be included, because it doesn't have nested docs document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "h", Field.Store.NO)); w.AddDocument(document); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "n", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "o", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "i", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); // Some garbage docs, just to check if the NestedFieldComparer can deal with this. document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.IndexWriter, false)); w.Dispose(); Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent"))); Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2"))); ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); // Sort by field ascending, order first ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, Wrap(parentFilter), Wrap(childFilter)); Sort sort = new Sort(sortField); TopFieldDocs topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field ascending, order last sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(topDocs.TotalHits, 7); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(28, topDocs.ScoreDocs[0].Doc); assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(23, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[2].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[4].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last, sort filter (filter_1:T) childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T")))); query = new ToParentBlockJoinQuery( new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(6, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(23, topDocs.ScoreDocs[0].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(28, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[4].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public IndexSearcherHoldingState(IndexSearcher indexSearcher) { IndexSearcher = indexSearcher; MemoryStatistics.RegisterLowMemoryHandler(this); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
/// <summary> /// Assigns the additional parameters. /// </summary> /// <param name = "settings"></param> public override void AssignSettings(Dictionary <string, string> settings) { _checkIndexes = bool.Parse(settings["CheckIndexes"]); bool autoCommit = bool.Parse(settings["AutoCommit"]); string luceneDotNetIndexDirectory = settings["LuceneDotNetIndexDirectory"]; string currentCrawlDirectory = Path.Combine(luceneDotNetIndexDirectory, "CurrentCrawl"); //create required directories... if (!Directory.Exists(luceneDotNetIndexDirectory)) { Directory.CreateDirectory(luceneDotNetIndexDirectory); } if (!Directory.Exists(currentCrawlDirectory)) { Directory.CreateDirectory(currentCrawlDirectory); } //create lucene.net directories... _luceneDotNetIndexDirectory = FSDirectory.Open(new DirectoryInfo(luceneDotNetIndexDirectory)); _currentCrawlDirectory = FSDirectory.Open(new DirectoryInfo(currentCrawlDirectory)); _standardAnalyzer = new StandardAnalyzer(); //delete the lock - a crawl may have been prematurely terminated, likely by the user's election. write.lock prevents us from writing to the index on subsequent crawls. if (File.Exists(Path.Combine(luceneDotNetIndexDirectory, "write.lock"))) { File.Delete(Path.Combine(luceneDotNetIndexDirectory, "write.lock")); } //delete the lock - a crawl may have been prematurely terminated, likely by the user's election. write.lock prevents us from writing to the index on subsequent crawls. if (File.Exists(Path.Combine(currentCrawlDirectory, "write.lock"))) { File.Delete(Path.Combine(currentCrawlDirectory, "write.lock")); } File.Delete(Path.Combine(currentCrawlDirectory, "write.lock")); ManageIndexes(); TearDownIndexWriter(); _indexFiles = bool.Parse(settings["IndexFiles"]); _indexImages = bool.Parse(settings["IndexImages"]); _indexWebPages = bool.Parse(settings["IndexWebPages"]); //check to see if we have requested to rebuild the index. if (bool.Parse(settings["RebuildIndexOnLoad"])) { string tempDirectory = Path.Combine(luceneDotNetIndexDirectory, "Temp"); int fileIDLowerBound = int.Parse(settings["FileIDLowerBound"]); int fileIDUpperBound = int.Parse(settings["FileIDUpperBound"]); int imageIDLowerBound = int.Parse(settings["ImageIDLowerBound"]); int imageIDUpperBound = int.Parse(settings["ImageIDUpperBound"]); int webPageIDLowerBound = int.Parse(settings["WebPageIDLowerBound"]); int webPageIDUpperBound = int.Parse(settings["WebPageIDUpperBound"]); RebuildIndexes(tempDirectory, fileIDLowerBound, fileIDUpperBound, imageIDLowerBound, imageIDUpperBound, webPageIDLowerBound, webPageIDUpperBound); TearDownIndexWriter(); } //switch back to the _current if (autoCommit) { //NOTE: autoCommit was disabled in Lucene.net 2.4. The threads now check when to Commit(); _autoCommit = true; _autoCommitLock = new object(); _lastCommitDateTime = DateTime.Now; //_indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, true, _standardAnalyzer, false); _indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, _standardAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); } else { _indexWriter = new IndexWriter(_currentCrawlDirectory, _standardAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); } SetIndexWriterDefaults(); _indexSearcher = new IndexSearcher(_luceneDotNetIndexDirectory, true); }
/// <summary> /// Suggest similar words (optionally restricted to a field of an index). /// <para> /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms /// is not the same as the edit distance strategy used to calculate the best /// matching spell-checked word from the hits that Lucene found, one usually has /// to retrieve a couple of numSug's in order to get the true best match. /// </para> /// <para> /// I.e. if numSug == 1, don't count on that suggestion being the best one. /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion. /// </para> /// </summary> /// <param name="word"> the word you want a spell check done on </param> /// <param name="numSug"> the number of suggested words </param> /// <param name="ir"> the indexReader of the user index (can be null see field param) </param> /// <param name="field"> the field of the user index: if field is not null, the suggested /// words are restricted to the words present in this field. </param> /// <param name="suggestMode"> /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param> /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param> /// <exception cref="System.IO.IOException"> if the underlying index throws an <see cref="System.IO.IOException"/> </exception> /// <exception cref="ObjectDisposedException"> if the <see cref="SpellChecker"/> is already disposed </exception> /// <returns> string[] the sorted list of the suggest words with these 2 criteria: /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity /// of the suggest words in the field of the user index /// </returns> public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy) { // obtainSearcher calls ensureOpen IndexSearcher indexSearcher = ObtainSearcher(); try { if (ir == null || field == null) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } if (suggestMode == SuggestMode.SUGGEST_ALWAYS) { ir = null; field = null; } int lengthWord = word.Length; int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0; int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0; // if the word exists in the real index and we don't care for word frequency, return the word itself if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) { return(new string[] { word }); } BooleanQuery query = new BooleanQuery(); string[] grams; string key; for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = FormGrams(word, ng); // form word into ngrams (allow dups too) if (grams.Length == 0) { continue; // hmm } if (bStart > 0) // should we boost prefixes? { Add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) // should we boost suffixes { Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.Length; i++) { Add(query, key, grams[i]); } } int maxHits = 10 * numSug; // System.out.println("Q: " + query); ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs; // System.out.println("HITS: " + hits.length()); SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparer); // go thru more than 'maxr' matches in case the distance filter triggers int stop = Math.Min(hits.Length, maxHits); SuggestWord sugWord = new SuggestWord(); for (int i = 0; i < stop; i++) { sugWord.String = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word // don't suggest a word for itself, that would be silly if (sugWord.String.Equals(word, StringComparison.Ordinal)) { continue; } // edit distance sugWord.Score = sd.GetDistance(word, sugWord.String); if (sugWord.Score < accuracy) { continue; } if (ir != null && field != null) // use the user index { sugWord.Freq = ir.DocFreq(new Term(field, sugWord.String)); // freq in the index // don't suggest a word that is not present in the field if ((suggestMode == SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.Freq) || sugWord.Freq < 1) { continue; } } sugQueue.InsertWithOverflow(sugWord); if (sugQueue.Count == numSug) { // if queue full, maintain the minScore score accuracy = sugQueue.Top.Score; } sugWord = new SuggestWord(); } // convert to array string string[] list = new string[sugQueue.Count]; for (int i = sugQueue.Count - 1; i >= 0; i--) { list[i] = sugQueue.Pop().String; } return(list); } finally { ReleaseSearcher(indexSearcher); } }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexDir); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt32(Random, 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.GetReader()); // NRT open var tr = new DirectoryTaxonomyReader(tw); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random.Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = GetTaxonomyFacetCounts(tr, config, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken, StringComparison.Ordinal)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null; if (v == null) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); Assert.AreEqual(expected, actual); } IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (VERBOSE) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random.NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.GetReader(); w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (VERBOSE) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random.Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (VERBOSE) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random.Next(scoreModeLength); if (VERBOSE) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (VERBOSE) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult, topScoreDocCollector)); // Asserting bit set... if (VERBOSE) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality()); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality()); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.GetTopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (VERBOSE) { string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }
public override Weight CreateWeight(IndexSearcher searcher) { return(new ToChildBlockJoinWeight(this, _parentQuery.CreateWeight(searcher), _parentsFilter, _doScores)); }
/// <summary> /// An extension of Item that allows you to launch a Search from an item /// </summary> /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns> /// <param name="startLocationItem">The start location of the search</param> /// <param name="hitCount">This will output the hitCount of the search</param> /// <param name="currentSearchString">The raw JSON Parse query</param> /// <param name="indexName">Force query to run on a particular index</param> /// <param name="sortField">Sort query by field (must be in index)</param> /// <param name="sortDirection">Sort in either "asc" or "desc"</param> /// <example>BucketManager.Search(Sitecore.Context.Item, SearchModel)</example> public static IEnumerable<SitecoreItem> Search(Item startLocationItem, out int hitCount, List<SearchStringModel> currentSearchString, string indexName = "itembuckets_buckets", string sortField = "", string sortDirection = "") { var refinements = new SafeDictionary<string>(); var searchStringModels = SearchHelper.GetTags(currentSearchString); if (searchStringModels.Count > 0) { foreach (var ss in searchStringModels) { var query = ss.Value; if (query.Contains("tagid=")) { query = query.Split('|')[1].Replace("tagid=", string.Empty); } var db = Context.ContentDatabase ?? Context.Database; refinements.Add("_tags", db.GetItem(query).ID.ToString()); } } using (var searcher = new IndexSearcher(indexName)) { var keyValuePair = searcher.GetItems(new DateRangeSearchParam { FullTextQuery = SearchHelper.GetText(currentSearchString), RelatedIds = null, SortDirection = sortDirection, TemplateIds = SearchHelper.GetTemplates(currentSearchString), LocationIds = startLocationItem.ID.ToGuid().ToEnumerable(), SortByField = sortField, Refinements = refinements}); hitCount = keyValuePair.Key; return keyValuePair.Value; } }
public void TestSimple() { const string idField = "id"; const string toField = "productId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "random text", Field.Store.NO)); doc.Add(new TextField("name", "name1", Field.Store.NO)); doc.Add(new TextField(idField, "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "more random text", Field.Store.NO)); doc.Add(new TextField("name", "name2", Field.Store.NO)); doc.Add(new TextField(idField, "4", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for product Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher, ScoreMode.None); TopDocs result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(4, result.ScoreDocs[0].Doc); assertEquals(5, result.ScoreDocs[1].Doc); joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher, ScoreMode.None); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(1, result.ScoreDocs[0].Doc); assertEquals(2, result.ScoreDocs[1].Doc); // Search for offer joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher, ScoreMode.None); result = indexSearcher.Search(joinQuery, 10); assertEquals(1, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
protected internal override void DoSearching(TaskScheduler es, DateTime stopTime) { bool anyOpenDelFiles = false; DirectoryReader r = DirectoryReader.Open(Writer, true); while (DateTime.UtcNow < stopTime && !Failed.Get()) { if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: now reopen r=" + r); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } else { if (VERBOSE) { Console.WriteLine("TEST: now close reader=" + r); } r.Dispose(); Writer.Commit(); ISet<string> openDeletedFiles = ((MockDirectoryWrapper)Dir).OpenDeletedFiles; if (openDeletedFiles.Count > 0) { Console.WriteLine("OBD files: " + openDeletedFiles); } anyOpenDelFiles |= openDeletedFiles.Count > 0; //Assert.AreEqual("open but deleted: " + openDeletedFiles, 0, openDeletedFiles.Size()); if (VERBOSE) { Console.WriteLine("TEST: now open"); } r = DirectoryReader.Open(Writer, true); } if (VERBOSE) { Console.WriteLine("TEST: got new reader=" + r); } //System.out.println("numDocs=" + r.NumDocs + " //openDelFileCount=" + dir.openDeleteFileCount()); if (r.NumDocs > 0) { FixedSearcher = new IndexSearcher(r, es); SmokeTestSearcher(FixedSearcher); RunSearchThreads(DateTime.UtcNow.AddMilliseconds(500)); } } r.Dispose(); //System.out.println("numDocs=" + r.NumDocs + " openDelFileCount=" + dir.openDeleteFileCount()); ISet<string> openDeletedFiles_ = ((MockDirectoryWrapper)Dir).OpenDeletedFiles; if (openDeletedFiles_.Count > 0) { Console.WriteLine("OBD files: " + openDeletedFiles_); } anyOpenDelFiles |= openDeletedFiles_.Count > 0; Assert.IsFalse(anyOpenDelFiles, "saw non-zero open-but-deleted count"); }
public override Weight CreateWeight(IndexSearcher searcher) { Weight baseWeight = baseQuery.CreateWeight(searcher); object[] drillDowns = new object[drillDownQueries.Length]; for (int dim = 0; dim < drillDownQueries.Length; dim++) { Query query = drillDownQueries[dim]; Filter filter = DrillDownQuery.GetFilter(query); if (filter != null) { drillDowns[dim] = filter; } else { // TODO: would be nice if we could say "we will do no // scoring" here.... drillDowns[dim] = searcher.Rewrite(query).CreateWeight(searcher); } } return new WeightAnonymousInnerClassHelper(this, baseWeight, drillDowns); }
// [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.IsIndexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.SetStringValue("" + i); iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IIndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
/// <summary> /// Indexes the data from the given <see cref="IDictionary"/>. </summary> /// <param name="dict"> Dictionary to index </param> /// <param name="config"> <see cref="IndexWriterConfig"/> to use </param> /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param> /// <exception cref="ObjectDisposedException"> if the <see cref="SpellChecker"/> is already disposed </exception> /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception> public void IndexDictionary(IDictionary dict, IndexWriterConfig config, bool fullMerge) { lock (modifyCurrentIndexLock) { EnsureOpen(); Directory dir = this.spellIndex; using (var writer = new IndexWriter(dir, config)) { IndexSearcher indexSearcher = ObtainSearcher(); IList <TermsEnum> termsEnums = new List <TermsEnum>(); IndexReader reader = searcher.IndexReader; if (reader.MaxDoc > 0) { foreach (AtomicReaderContext ctx in reader.Leaves) { Terms terms = ctx.AtomicReader.GetTerms(F_WORD); if (terms != null) { termsEnums.Add(terms.GetIterator(null)); } } } bool isEmpty = termsEnums.Count == 0; try { IBytesRefIterator iter = dict.GetEntryIterator(); BytesRef currentTerm; while ((currentTerm = iter.Next()) != null) { string word = currentTerm.Utf8ToString(); int len = word.Length; if (len < 3) { continue; // too short we bail but "too long" is fine... } if (!isEmpty) { foreach (TermsEnum te in termsEnums) { if (te.SeekExact(currentTerm)) { goto termsContinue; } } } // ok index the word var doc = CreateDocument(word, GetMin(len), GetMax(len)); writer.AddDocument(doc); termsContinue: ; } } finally { ReleaseSearcher(indexSearcher); } if (fullMerge) { writer.ForceMerge(1); } } // TODO: this isn't that great, maybe in the future SpellChecker should take // IWC in its ctor / keep its writer open? // also re-open the spell index to see our own changes when the next suggestion // is fetched: SwapSearcher(dir); } }
private static void GenerateHighlights(IList<Document> documents, IndexWriter writer, SearchCriteria criteria) { var documentHightlightMap = documents.ToDictionary(c => c._id.ToString()); var reader = DirectoryReader.Open(writer, true, true); var queryParser = new HighlighterQueryParser(writer.GetAnalyzer()); queryParser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE); var query = queryParser.Parse(criteria.Query) .Rewrite(reader); var highlighter = CreateHighlighter(); var fieldQuery = highlighter.GetFieldQuery(query); var searcher = new IndexSearcher(reader); var topFieldDocs = searcher.Search(query, documents.Count, Sort.RELEVANCE); var scoreDocs = topFieldDocs.ScoreDocs; foreach (var sd in scoreDocs) { var bestFragments = highlighter.GetBestFragments(fieldQuery, reader, sd.Doc, Schema.StandardField.FULL_TEXT, FRAGMENT_SIZE, FRAGMENT_COUNT); var document = searcher.Doc(sd.Doc); var docId = document.Get(Schema.StandardField.ID); if (documentHightlightMap.ContainsKey(docId) && bestFragments.Length > 0) { var dictionary = documentHightlightMap[docId].AsDictionary(); var highlight = String.Join($"{Environment.NewLine} ... {Environment.NewLine}", bestFragments); dictionary[HIGHLIGHT_FIELD_NAME] = highlight; } } }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestRandomStoredFields() { Directory dir = NewDirectory(); Random rand = Random(); RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20))); //w.w.setNoCFSRatio(0.0); int docCount = AtLeast(200); int fieldCount = TestUtil.NextInt(rand, 1, 5); IList <int?> fieldIDs = new List <int?>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IsTokenized = false; Field idField = NewField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.Add(i); } IDictionary <string, Document> docs = new Dictionary <string, Document>(); if (VERBOSE) { Console.WriteLine("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.IsStored = true; for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(idField); string id = "" + i; idField.SetStringValue(id); docs[id] = doc; if (VERBOSE) { Console.WriteLine("TEST: add doc id=" + id); } foreach (int field in fieldIDs) { string s; if (rand.Next(4) != 3) { s = TestUtil.RandomUnicodeString(rand, 1000); doc.Add(NewField("f" + field, s, customType2)); } else { s = null; } } w.AddDocument(doc); if (rand.Next(50) == 17) { // mixup binding of field name -> Number every so often Collections.Shuffle(fieldIDs); } if (rand.Next(5) == 3 && i > 0) { string delID = "" + rand.Next(i); if (VERBOSE) { Console.WriteLine("TEST: delete doc id=" + delID); } w.DeleteDocuments(new Term("id", delID)); docs.Remove(delID); } } if (VERBOSE) { Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields"); } if (docs.Count > 0) { string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/); for (int x = 0; x < 2; x++) { IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: cycle x=" + x + " r=" + r); } int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string testID = idsList[rand.Next(idsList.Length)]; if (VERBOSE) { Console.WriteLine("TEST: test id=" + testID); } TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1); Assert.AreEqual(1, hits.TotalHits); Document doc = r.Document(hits.ScoreDocs[0].Doc); Document docExp = docs[testID]; for (int i = 0; i < fieldCount; i++) { assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i)); } } r.Dispose(); w.ForceMerge(1); } } w.Dispose(); dir.Dispose(); }
private int RunQuery(IndexSearcher s, Query q) { s.Search(q, 10); int hitCount = s.Search(q, null, 10, new Sort(new SortField("title", SortField.Type_e.STRING))).TotalHits; if (DefaultCodecSupportsDocValues()) { Sort dvSort = new Sort(new SortField("title", SortField.Type_e.STRING)); int hitCount2 = s.Search(q, null, 10, dvSort).TotalHits; Assert.AreEqual(hitCount, hitCount2); } return hitCount; }
private static IEnumerable <int> Search(Directory directory, Query query, IEnumerable <int> sortOrder, bool reversed = false) { var searcher = new IndexSearcher(directory, true); return(Search(searcher, query, sortOrder, reversed)); }
protected internal virtual void ReleaseSearcher(IndexSearcher s) { }
public void TestSimpleWithScoring() { const string idField = "id"; const string toField = "movieId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "A random movie", Field.Store.NO)); doc.Add(new TextField("name", "Movie 1", Field.Store.NO)); doc.Add(new TextField(idField, "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "A second random movie", Field.Store.NO)); doc.Add(new TextField("name", "Movie 2", Field.Store.NO)); doc.Add(new TextField(idField, "4", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for movie via subtitle Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max); TopDocs result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(0, result.ScoreDocs[0].Doc); assertEquals(3, result.ScoreDocs[1].Doc); // Score mode max. joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Max); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); assertEquals(0, result.ScoreDocs[1].Doc); // Score mode total joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Total); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(0, result.ScoreDocs[0].Doc); assertEquals(3, result.ScoreDocs[1].Doc); //Score mode avg joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Avg); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); assertEquals(0, result.ScoreDocs[1].Doc); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
internal SearchWrapper(SearcherManager searcherManager) { _searcherManager = searcherManager; _indexSearcher = (IndexSearcher) _searcherManager.acquire(); }
// Make sure the documents returned by the search match the expected list // Copied from TestSort.java private void AssertMatches(IndexSearcher searcher, Query query, Sort sort, string expectedResult) { ScoreDoc[] result = searcher.Search(query, null, 1000, sort).ScoreDocs; StringBuilder buff = new StringBuilder(10); int n = result.Length; for (int i = 0; i < n; ++i) { Document doc = searcher.Doc(result[i].Doc); IndexableField[] v = doc.GetFields("tracer"); for (int j = 0; j < v.Length; ++j) { buff.Append(v[j].StringValue); } } Assert.AreEqual(expectedResult, buff.ToString()); }
public virtual void TestHugeLabel() { Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetsConfig config = new FacetsConfig(); // Add one huge label: string bigs = null; int ordinal = -1; int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = TestUtil.RandomSimpleString(Random(), len, len); FacetField ff = new FacetField("dim", bigs); FacetLabel cp = new FacetLabel("dim", bigs); ordinal = taxoWriter.AddCategory(cp); Document doc = new Document(); doc.Add(ff); indexWriter.AddDocument(config.Build(taxoWriter, doc)); // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { string s = TestUtil.RandomSimpleString(Random(), 1, 10); taxoWriter.AddCategory(new FacetLabel("dim", s)); doc = new Document(); doc.Add(new FacetField("dim", s)); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } // when too large components were allowed to be added, this resulted in a new added category Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp)); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig()); ddq.Add("dim", bigs); Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits); IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, bool multipleValuesPerDocument, bool scoreDocsInOrder) { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.RandomUniqueValues = new string[numRandomValues]; ISet <string> trackSet = new HashSet <string>(); context.RandomFrom = new bool[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { string uniqueRandomValue; do { uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random); // uniqueRandomValue = TestUtil.randomSimpleString(random); } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.Add(uniqueRandomValue); context.RandomFrom[i] = Random.NextBoolean(); context.RandomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { string id = Convert.ToString(i); int randomI = Random.Next(context.RandomUniqueValues.Length); string value = context.RandomUniqueValues[randomI]; Document document = new Document(); document.Add(NewTextField(Random, "id", id, Field.Store.NO)); document.Add(NewTextField(Random, "value", value, Field.Store.NO)); bool from = context.RandomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)]; docs[i].LinkValues.Add(linkValue); if (from) { if (!context.FromDocuments.ContainsKey(linkValue)) { context.FromDocuments[linkValue] = new List <RandomDoc>(); } if (!context.RandomValueFromDocs.ContainsKey(value)) { context.RandomValueFromDocs[value] = new List <RandomDoc>(); } context.FromDocuments[linkValue].Add(docs[i]); context.RandomValueFromDocs[value].Add(docs[i]); document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO)); } else { if (!context.ToDocuments.ContainsKey(linkValue)) { context.ToDocuments[linkValue] = new List <RandomDoc>(); } if (!context.RandomValueToDocs.ContainsKey(value)) { context.RandomValueToDocs[value] = new List <RandomDoc>(); } context.ToDocuments[linkValue].Add(docs[i]); context.RandomValueToDocs[value].Add(docs[i]); document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO)); } } RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.AddDocument(document); if (Random.Next(10) == 4) { w.Commit(); } if (VERBOSE) { Console.WriteLine("Added document[" + docs[i].Id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // any ScoreMode. IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader()); IndexSearcher toSearcher = NewSearcher(toWriter.GetReader()); for (int i = 0; i < context.RandomUniqueValues.Length; i++) { string uniqueRandomValue = context.RandomUniqueValues[i]; string fromField; string toField; IDictionary <string, IDictionary <int, JoinScore> > queryVals; if (context.RandomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.FromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.ToHitsToJoinScore; } IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores)); } else { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores)); } IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader); Terms terms = slowCompositeReader.GetTerms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; SortedSet <BytesRef> joinValues = new SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer); joinValues.AddAll(joinValueToJoinScores.Keys); foreach (BytesRef joinValue in joinValues) { termsEnum = terms.GetIterator(termsEnum); if (termsEnum.SeekExact(joinValue)) { docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE); JoinScore joinScore = joinValueToJoinScores[joinValue]; for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(doc)) { docToJoinScore[doc] = joinScore; } } } } } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores, docToJoinScore)); } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousInnerClassHelper6(this, toField, joinValueToJoinScores, docToJoinScore)); } queryVals[uniqueRandomValue] = docToJoinScore; } fromSearcher.IndexReader.Dispose(); toSearcher.IndexReader.Dispose(); return(context); }
public virtual void TestAddIndexOnDiskFull() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal)); int START_COUNT = 57; int NUM_DIR = TEST_NIGHTLY ? 50 : 5; int END_COUNT = START_COUNT + NUM_DIR * (TEST_NIGHTLY ? 25 : 5); // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = NewDirectory(); IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); for (int j = 0; j < 25; j++) { AddDocWithIndex(writer, 25 * i + j); } writer.Dispose(); string[] files = dirs[i].ListAll(); for (int j = 0; j < files.Length; j++) { inputDiskUsage += dirs[i].FileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: MockDirectoryWrapper startDir = NewMockDirectory(); IndexWriter indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); for (int j = 0; j < START_COUNT; j++) { AddDocWithIndex(indWriter, j); } indWriter.Dispose(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.Open(startDir); Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq"); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(57, hits.Length, "first number of hits"); reader.Dispose(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.ListAll(); long diskUsage = startDir.GetSizeInBytes(); long startDiskUsage = 0; string[] files_ = startDir.ListAll(); for (int i = 0; i < files_.Length; i++) { startDiskUsage += startDir.FileLength(files_[i]); } for (int iter = 0; iter < 3; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + TestUtil.NextInt32(Random, 50, 200); int method = iter; bool success = false; bool done = false; string methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + forceMerge(1)"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (VERBOSE) { Console.WriteLine("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory(startDir, NewIOContext(Random))); indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy(false))); IOException err = null; IMergeScheduler ms = indWriter.Config.MergeScheduler; for (int x = 0; x < 2; x++) { if (ms is IConcurrentMergeScheduler) // this test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. { if (0 == x) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } else { ((IConcurrentMergeScheduler)ms).ClearSuppressExceptions(); } } // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double)diskFree) / diskUsage; long thisDiskFree; string testName = null; if (0 == x) { dir.RandomIOExceptionRateOnOpen = Random.NextDouble() * 0.01; thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) { testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } } else { dir.RandomIOExceptionRateOnOpen = 0.0; thisDiskFree = 0; rate = 0.0; if (VERBOSE) { testName = "disk full test " + methodName + " with unlimited disk space"; } } if (VERBOSE) { Console.WriteLine("\ncycle: " + testName); } dir.TrackDiskUsage = true; dir.MaxSizeInBytes = thisDiskFree; dir.RandomIOExceptionRate = rate; try { if (0 == method) { if (VERBOSE) { Console.WriteLine("TEST: now addIndexes count=" + dirs.Length); } indWriter.AddIndexes(dirs); if (VERBOSE) { Console.WriteLine("TEST: now forceMerge"); } indWriter.ForceMerge(1); } else if (1 == method) { IndexReader[] readers = new IndexReader[dirs.Length]; for (int i = 0; i < dirs.Length; i++) { readers[i] = DirectoryReader.Open(dirs[i]); } try { indWriter.AddIndexes(readers); } finally { for (int i = 0; i < dirs.Length; i++) { readers[i].Dispose(); } } } else { indWriter.AddIndexes(dirs); } success = true; if (VERBOSE) { Console.WriteLine(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (VERBOSE) { Console.WriteLine(" hit IOException: " + e); Console.WriteLine(e.StackTrace); } if (1 == x) { Console.WriteLine(e.StackTrace); Assert.Fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done TestUtil.SyncConcurrentMerges(indWriter); if (VERBOSE) { Console.WriteLine(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): dir.RandomIOExceptionRateOnOpen = 0.0; try { reader = DirectoryReader.Open(dir); } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.DocFreq(searchTerm); if (success) { if (result != START_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = NewSearcher(reader); try { hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs; } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (result2 != result) { Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } reader.Dispose(); if (VERBOSE) { Console.WriteLine(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { Console.WriteLine(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes"); } // Make sure we don't hit disk full during close below: dir.MaxSizeInBytes = 0; dir.RandomIOExceptionRate = 0.0; dir.RandomIOExceptionRateOnOpen = 0.0; indWriter.Dispose(); // Wait for all BG threads to finish else // dir.Dispose() will throw IOException because // there are still open files TestUtil.SyncConcurrentMerges(ms); dir.Dispose(); // Try again with more free space: diskFree += TEST_NIGHTLY ? TestUtil.NextInt32(Random, 4000, 8000) : TestUtil.NextInt32(Random, 40000, 80000); } } startDir.Dispose(); foreach (Directory dir in dirs) { dir.Dispose(); } }
public void TestInsideBooleanQuery() { const string idField = "id"; const string toField = "productId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "random text", Field.Store.NO)); doc.Add(new TextField("name", "name1", Field.Store.NO)); doc.Add(new TextField(idField, "7", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "more random text", Field.Store.NO)); doc.Add(new TextField("name", "name2", Field.Store.NO)); doc.Add(new TextField(idField, "0", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for product Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg); BooleanQuery bq = new BooleanQuery(); bq.Add(joinQuery, Occur.SHOULD); bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD); indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper(this)); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetSimilarity(Similarity)); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(NewTextField("field", English.IntToEnglish(i), Field.Store.YES)); string txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1); doc.Add(NewTextField("field2", txt, Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); Searcher.Similarity = Similarity; }
public IEnumerable <Group> MapSearchResults(IEnumerable <ScoreDoc> scoreDocs, IndexSearcher searcher) { return(scoreDocs.Select(scoreDoc => MapSearchResult(searcher.Doc(scoreDoc.Doc))).ToList()); }
// Test using various international locales with accented characters (which // sort differently depending on locale) // // Copied (and slightly modified) from // Lucene.Net.Search.TestSort.testInternationalSort() // // TODO: this test is really fragile. there are already 3 different cases, // depending upon unicode version. public virtual void TestCollationKeySort(Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, string usResult, string frResult, string svResult, string dkResult) { Directory indexStore = NewDirectory(); IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); // document data: // the tracer field is used to determine which document was hit string[][] sortData = new string[][] { new string[] { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, new string[] { "B", "y", "HAT", "HAT", "HAT", "HAT" }, new string[] { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, new string[] { "D", "y", "HUT", "HUT", "HUT", "HUT" }, new string[] { "E", "x", "peach", "peach", "peach", "peach" }, new string[] { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, new string[] { "G", "x", "sin", "sin", "sin", "sin" }, new string[] { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, new string[] { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, new string[] { "J", "y", "HOT", "HOT", "HOT", "HOT" } }; FieldType customType = new FieldType(); customType.Stored = true; for (int i = 0; i < sortData.Length; ++i) { Document doc = new Document(); doc.Add(new Field("tracer", sortData[i][0], customType)); doc.Add(new TextField("contents", sortData[i][1], Field.Store.NO)); if (sortData[i][2] != null) { doc.Add(new TextField("US", usAnalyzer.TokenStream("US", new StringReader(sortData[i][2])))); } if (sortData[i][3] != null) { doc.Add(new TextField("France", franceAnalyzer.TokenStream("France", new StringReader(sortData[i][3])))); } if (sortData[i][4] != null) { doc.Add(new TextField("Sweden", swedenAnalyzer.TokenStream("Sweden", new StringReader(sortData[i][4])))); } if (sortData[i][5] != null) { doc.Add(new TextField("Denmark", denmarkAnalyzer.TokenStream("Denmark", new StringReader(sortData[i][5])))); } writer.AddDocument(doc); } writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(indexStore); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(); Query queryX = new TermQuery(new Term("contents", "x")); Query queryY = new TermQuery(new Term("contents", "y")); sort.SetSort(new SortField("US", SortField.Type_e.STRING)); AssertMatches(searcher, queryY, sort, usResult); sort.SetSort(new SortField("France", SortField.Type_e.STRING)); AssertMatches(searcher, queryX, sort, frResult); sort.SetSort(new SortField("Sweden", SortField.Type_e.STRING)); AssertMatches(searcher, queryY, sort, svResult); sort.SetSort(new SortField("Denmark", SortField.Type_e.STRING)); AssertMatches(searcher, queryY, sort, dkResult); reader.Dispose(); indexStore.Dispose(); }
public override void CreateWeight(IDictionary context, IndexSearcher searcher) { m_source.CreateWeight(context, searcher); }
public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES)); doc.Add(new StringField("body", "body", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs; Assert.AreEqual(0, result.Length, "The index Term should not be included."); result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs; Assert.AreEqual(1, result.Length, "The index Term should be included."); reader.Dispose(); dir.Dispose(); }
public override Weight CreateWeight(IndexSearcher searcher) { return(new CustomWeight(this, searcher)); }
/// <summary> /// Using a strongly types List of SearchStringModel, you can run a search based off a JSON String /// </summary> /// <param name="itm"> /// The itm. /// </param> /// <param name="currentSearchString"> /// The current Search String. /// </param> /// <param name="hitCount"> /// The hit Count. /// </param> /// <param name="indexName"> /// The index Name. /// </param> /// <param name="sortField"> /// The sort Field. /// </param> /// <param name="sortDirection"> /// The sort Direction. /// </param> /// <param name="pageSize"> /// The page Size. /// </param> /// <param name="pageNumber"> /// The page Number. /// </param> /// <param name="parameters"> /// The parameters. /// </param> /// <returns> /// IEnumreable List of Results that have been typed to a smaller version of the Item Object /// </returns> public static IEnumerable<SitecoreItem> FullSearch(Item itm, List<SearchStringModel> currentSearchString, out int hitCount, string indexName = "itembuckets_buckets", string sortField = "", string sortDirection = "", int pageSize = 0, int pageNumber = 0, object[] parameters = null) { var startDate = DateTime.Now; var endDate = DateTime.Now.AddDays(1); var locationSearch = LocationFilter; var refinements = new SafeDictionary<string>(); var searchStringModels = SearchHelper.GetTags(currentSearchString); if (searchStringModels.Count > 0) { foreach (var ss in searchStringModels) { var query = ss.Value; if (query.Contains("tagid=")) { query = query.Split('|')[1].Replace("tagid=", string.Empty); } var db = Context.ContentDatabase ?? Context.Database; refinements.Add("_tags", db.GetItem(query).ID.ToString()); } } var author = SearchHelper.GetAuthor(currentSearchString); var languages = SearchHelper.GetLanguages(currentSearchString); if (languages.Length > 0) { refinements.Add("_language", languages); } var references = SearchHelper.GetReferences(currentSearchString); var custom = SearchHelper.GetCustom(currentSearchString); if (custom.Length > 0) { var customSearch = custom.Split('|'); if (customSearch.Length > 0) { try { refinements.Add(customSearch[0], customSearch[1]); } catch (Exception exc) { Log.Error("Could not parse the custom search query", exc); } } } var search = SearchHelper.GetField(currentSearchString); if (search.Length > 0) { var customSearch = search; refinements.Add(customSearch, SearchHelper.GetText(currentSearchString)); } var fileTypes = SearchHelper.GetFileTypes(currentSearchString); if (fileTypes.Length > 0) { refinements.Add("extension", SearchHelper.GetFileTypes(currentSearchString)); } var s = SearchHelper.GetSite(currentSearchString); if (s.Length > 0) { SiteContext siteContext = SiteContextFactory.GetSiteContext(SiteManager.GetSite(s).Name); var db = Context.ContentDatabase ?? Context.Database; var startItemId = db.GetItem(siteContext.StartPath); locationSearch = startItemId.ID.ToString(); } var culture = CultureInfo.CreateSpecificCulture("en-US"); var startFlag = true; var endFlag = true; if (SearchHelper.GetStartDate(currentSearchString).Any()) { if (!DateTime.TryParse(SearchHelper.GetStartDate(currentSearchString), culture, DateTimeStyles.None, out startDate)) { startDate = DateTime.Now; } startFlag = false; } if (SearchHelper.GetEndDate(currentSearchString).Any()) { if (!DateTime.TryParse(SearchHelper.GetEndDate(currentSearchString), culture, DateTimeStyles.None, out endDate)) { endDate = DateTime.Now.AddDays(1); } endFlag = false; } using (var searcher = new IndexSearcher(indexName)) { var location = IdHelper.ParseId(SearchHelper.GetLocation(currentSearchString, locationSearch)); var locationIdFromItem = itm != null ? itm.ID.ToGuid().ToEnumerable() : null; var rangeSearch = new DateRangeSearchParam { ID = SearchHelper.GetID(currentSearchString).IsEmpty() ? SearchHelper.GetRecent(currentSearchString) : SearchHelper.GetID(currentSearchString), ShowAllVersions = false, FullTextQuery = SearchHelper.GetText(currentSearchString), Refinements = refinements, RelatedIds = references.Any() ? IdHelper.ParseId(references) : null, SortDirection = sortDirection, TemplateIds = SearchHelper.GetTemplates(currentSearchString), LocationIds = !location.Any() ? locationIdFromItem : location, Language = languages, SortByField = sortField, PageNumber = pageNumber, PageSize = pageSize, Author = author == string.Empty ? string.Empty : author, }; if (!startFlag || !endFlag) { rangeSearch.Ranges = new List<DateRangeSearchParam.DateRangeField> { new DateRangeSearchParam.DateRangeField(SearchFieldIDs.CreatedDate, startDate, endDate) { InclusiveStart = true, InclusiveEnd = true } }; } var returnResult = searcher.GetItems(rangeSearch); hitCount = returnResult.Key; return returnResult.Value; } }
public override void CreateWeight(IDictionary context, IndexSearcher searcher) { ifSource.CreateWeight(context, searcher); trueSource.CreateWeight(context, searcher); falseSource.CreateWeight(context, searcher); }
/// <summary> /// An extension of Item that allows you to launch a Search from an item /// </summary> /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns> /// <param name="startLocationItem">The start location of the search</param> /// <param name="refinements">A collection of refinements to the query</param> /// <param name="hitCount">This will output the hitCount of the search</param> /// <param name="relatedIds">Pipe delimited string of Id to query by Links to and from items</param> /// <param name="indexName">Force query to run on a particular index</param> /// <param name="text">The raw text query</param> /// <param name="templates">Pipe delimited string of Id of Templates</param> /// <param name="location">Override the location of the search with an Id</param> /// <param name="language">Query by the two letter ISO country code</param> /// <param name="id">Query by ID</param> /// <param name="sortField">Sort query by field (must be in index)</param> /// <param name="sortDirection">Sort in either "asc" or "desc"</param> /// <param name="itemName">Query by item name</param> /// <param name="startDate">mm/dd/yyyy format of start date</param> /// <param name="endDate">mm/dd/yyyy format of end date</param> /// <param name="numberOfItemsToReturn">0-XXXXXX (The bigger this number is the less performant it will be)</param> /// <example>BucketManager.Search(Sitecore.Context.Item, text: "Tim", templates: "TemplateGUID")</example> /// <example>BucketManager.Search(Sitecore.Context.Item, text: "Tim", relatedIds: "ItemGUID", sortField: "_name")</example> public static IEnumerable<SitecoreItem> Search(Item startLocationItem, SafeDictionary<string> refinements, out int hitCount, IEnumerable<Guid> relatedIds = null, string indexName = "itembuckets_buckets", string text = "", IEnumerable<Guid> templates = null, string location = "", string language = "en", string id = "", string sortField = "", string sortDirection = "", string itemName = "", string startDate = "", string endDate = "", int numberOfItemsToReturn = 20) { using (var searcher = new IndexSearcher(indexName)) { var culture = CultureInfo.CreateSpecificCulture("en-US"); var startDateOut = DateTime.Now; var endDateOut = DateTime.Now.AddDays(1); var startFlag = true; var endFlag = true; if (!DateTime.TryParse(startDate, culture, DateTimeStyles.None, out startDateOut)) { startDateOut = DateTime.Now; startFlag = false; } if (!DateTime.TryParse(endDate, culture, DateTimeStyles.None, out endDateOut)) { endDateOut = DateTime.Now.AddDays(1); endFlag = false; } if (startLocationItem.IsNull()) { Log.Warn("You are trying to run an Search on an item that has a start location of null", null); hitCount = 0; return new List<SitecoreItem>(); } var dateSearchParam = new DateRangeSearchParam { ItemName = itemName, FullTextQuery = text, RelatedIds = relatedIds, TemplateIds = templates, LocationIds = startLocationItem.ID.ToGuid().ToEnumerable(), Language = language, SortDirection = sortDirection, Refinements = refinements, ID = id, SortByField = sortField, PageSize = numberOfItemsToReturn }; if (startFlag || endFlag) { dateSearchParam.Ranges = new List<DateRangeSearchParam.DateRangeField> { new DateRangeSearchParam.DateRangeField( SearchFieldIDs.CreatedDate, startDateOut, endDateOut) { InclusiveStart = true, InclusiveEnd = true } }; } var keyValuePair = searcher.GetItems(dateSearchParam); hitCount = keyValuePair.Key; return keyValuePair.Value; } }
public void TestFuzzinessLong() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("aaaaaaa", writer); addDoc("segment", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query; // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // no match, more than half of the characters is wrong: query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" and "stellent" are indeed similar to "segment" by default: query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" doesn't match anymore thanks to increased minimum similarity: query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); try { query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f); fail("Expected IllegalArgumentException"); } #pragma warning disable 168 catch (ArgumentException e) #pragma warning restore 168 { // expecting exception } try { query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f); fail("Expected IllegalArgumentException"); } #pragma warning disable 168 catch (ArgumentException e) #pragma warning restore 168 { // expecting exception } reader.Dispose(); directory.Dispose(); }
/// <summary> /// An extension of Item that allows you to launch a Search from an item /// </summary> /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns> /// <param name="startLocationItem">The start location of the search</param> /// <param name="queryParser">The raw JSON Parse query</param> /// <param name="hitCount">This will output the hitCount of the search</param> /// <param name="indexName">Force query to run on a particular index</param> public static IEnumerable<SitecoreItem> Search(Item startLocationItem, SearchParam queryParser, out int hitCount, string indexName = "itembuckets_buckets") { using (var searcher = new IndexSearcher(indexName)) { var keyValuePair = searcher.GetItems(queryParser); hitCount = keyValuePair.Key; return keyValuePair.Value; } }
public void TestFuzziness() { //every test with SlowFuzzyQuery.defaultMinSimilarity //is exercising the Automaton, not the brute force linear method Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("aaaaa", writer); addDoc("aaaab", writer); addDoc("aaabb", writer); addDoc("aabbb", writer); addDoc("abbbb", writer); addDoc("bbbbb", writer); addDoc("ddddd", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); // same with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // test scoring query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("3 documents should match", 3, hits.Length); List <String> order = Arrays.AsList("bbbbb", "abbbb", "aabbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // test pq size by supplying maxExpansions=2 // This query would normally return 3 documents, because 3 terms match (see above): query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("only 2 documents should match", 2, hits.Length); order = Arrays.AsList("bbbbb", "abbbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // query identical to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // different field = no match: query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); reader.Dispose(); directory.Dispose(); }
protected internal override void ReleaseSearcher(IndexSearcher s) { if (s != FixedSearcher) { // Final searcher: s.IndexReader.Dispose(); } }
protected override void DoSearching(TaskScheduler es, long stopTime) { bool anyOpenDelFiles = false; DirectoryReader r = DirectoryReader.Open(Writer, true); while (DateTime.Now.Millisecond < stopTime && !Failed.Get()) { if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: now reopen r=" + r); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } else { if (VERBOSE) { Console.WriteLine("TEST: now close reader=" + r); } r.Dispose(); Writer.Commit(); ISet <string> openDeletedFiles = ((MockDirectoryWrapper)Dir).OpenDeletedFiles; if (openDeletedFiles.Count > 0) { Console.WriteLine("OBD files: " + openDeletedFiles); } anyOpenDelFiles |= openDeletedFiles.Count > 0; //Assert.AreEqual("open but deleted: " + openDeletedFiles, 0, openDeletedFiles.Size()); if (VERBOSE) { Console.WriteLine("TEST: now open"); } r = DirectoryReader.Open(Writer, true); } if (VERBOSE) { Console.WriteLine("TEST: got new reader=" + r); } //System.out.println("numDocs=" + r.NumDocs + " //openDelFileCount=" + dir.openDeleteFileCount()); if (r.NumDocs > 0) { FixedSearcher = new IndexSearcher(r, es); SmokeTestSearcher(FixedSearcher); RunSearchThreads(DateTime.Now.Millisecond + 500); } } r.Dispose(); //System.out.println("numDocs=" + r.NumDocs + " openDelFileCount=" + dir.openDeleteFileCount()); ISet <string> openDeletedFiles_ = ((MockDirectoryWrapper)Dir).OpenDeletedFiles; if (openDeletedFiles_.Count > 0) { Console.WriteLine("OBD files: " + openDeletedFiles_); } anyOpenDelFiles |= openDeletedFiles_.Count > 0; Assert.IsFalse(anyOpenDelFiles, "saw non-zero open-but-deleted count"); }
// LUCENE-1404 private int HitCount(IndexSearcher searcher, string word) { return searcher.Search(new TermQuery(new Term("text", word)), 10).TotalHits; }
public virtual void TestRollingUpdates_Mem() { Random random = new Random(Random().Next()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); //provider.register(new MemoryCodec()); // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean()) //{ // Codec.Default = // TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat())); //} MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble()))); if (VERBOSE) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).StringValue = myID; Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (VERBOSE) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && Random().Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs()); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.SizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }