Пример #1
0
        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50);
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.YES);
            doc.Add(field);
            Terms = new SortedSet<BytesRef>();

            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                Terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            TermsAutomaton = BasicAutomata.MakeStringUnion(Terms);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Пример #2
0
 public override void SetUp()
 {
     base.SetUp();
     PayloadHelper helper = new PayloadHelper();
     Searcher_Renamed = helper.SetUp(Random(), Similarity, 1000);
     IndexReader = Searcher_Renamed.IndexReader;
 }
 public static void AfterClass()
 {
     Searcher = null;
     Reader.Dispose();
     Reader = null;
     Directory.Dispose();
     Directory = null;
 }
Пример #4
0
 public static void AfterClass()
 {
     Reader.Dispose();
     Directory.Dispose();
     Searcher = null;
     Reader = null;
     Directory = null;
     SimplePayloadAnalyzer = null;
 }
Пример #5
0
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
     for (int i = 0; i < DocFields.Length; i++)
     {
         Document doc = new Document();
         doc.Add(NewTextField(FIELD, DocFields[i], Field.Store.NO));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
 }
Пример #6
0
    public static void search(String indexDir, String q)
    {
        Directory dir = FSDriecotry.Open(new System.IO.FileInfo(indexDir));
           IndexSearcher searcher = new IndexSearcher(dir, true);
           QueryParser parser = new QueryParser("contents", new StandardAnalyzer(Version.LUCENE_CURRENT));

           Query query = parser.Parser(q);
           Lucene.Net.Saerch.TopDocs hits = searher.Search(query, 10);
           System.Console.WriteLine("Found " + hits.totalHits + " document(s) that matched query '" + q + "':");
           for (int i = 0; i < hits.scoreDocs.Length; i++) {
           ScoreDoc scoreDoc = hits.ScoreDoc[i];
           Document doc = searcher.Doc(scoreDoc.doc);
           System.Console.WriteLine(doc.Get("filename"));
           }
          searcher.Close();
    }
        public static void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") }));
            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }
        public override void SetUp()
        {
            base.SetUp();
            Directory = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);
            Document doc = new Document();
            Field field = NewTextField("field", "", Field.Store.NO);
            doc.Add(field);

            field.StringValue = "quick brown fox";
            iw.AddDocument(doc);
            field.StringValue = "jumps over lazy broun dog";
            iw.AddDocument(doc);
            field.StringValue = "jumps over extremely very lazy broxn dog";
            iw.AddDocument(doc);
            Reader = iw.Reader;
            iw.Dispose();
            Searcher = NewSearcher(Reader);
        }
 public virtual void TestRandomSearchPerformance()
 {
     IndexSearcher searcher = new IndexSearcher(Reader);
     foreach (Term t in SampleTerms)
     {
         TermQuery query = new TermQuery(t);
         TopDocs topDocs = searcher.Search(query, 10);
         Assert.IsTrue(topDocs.TotalHits > 0);
     }
 }
 protected internal virtual void SmokeTestSearcher(IndexSearcher s)
 {
     RunQuery(s, new TermQuery(new Term("body", "united")));
     RunQuery(s, new TermQuery(new Term("titleTokenized", "states")));
     PhraseQuery pq = new PhraseQuery();
     pq.Add(new Term("body", "united"));
     pq.Add(new Term("body", "states"));
     RunQuery(s, pq);
 }
        /// <summary>
        ///     Searches the datasource using the specified criteria. Criteria is parsed by the query builder specified by
        ///     <typeparamref
        ///         name="QueryBuilderType" />
        ///     .
        /// </summary>
        /// <param name="scope">Name of the application.</param>
        /// <param name="criteria">The criteria.</param>
        /// <returns></returns>
        /// <exception cref="VirtoCommerce.Search.Providers.Lucene.LuceneSearchException"></exception>
        public virtual ISearchResults Search(string scope, ISearchCriteria criteria)
        {
            TopDocs docs = null;

            var folderName = this.GetFolderName(scope, criteria.DocumentType);

            var dir = FSDirectory.Open(new DirectoryInfo(this.GetDirectoryPath(folderName)));
            var searcher = new IndexSearcher(dir);

            var q = (Query)this.QueryBuilder.BuildQuery(criteria);

            Debug.WriteLine("Search Lucene Query:{0}", (object)q.ToString());

            try
            {
                var numDocs = criteria.StartingRecord + criteria.RecordsToRetrieve;

                if (criteria.Sort != null)
                {
                    var fields = criteria.Sort.GetSort();

                    docs = searcher.Search(
                        q,
                        null,
                        numDocs,
                        new Sort(
                            fields.Select(field => new SortField(field.FieldName, field.DataType, field.IsDescending))
                                  .ToArray()));
                }
                else
                {
                    docs = searcher.Search(q, numDocs);
                }
            }
            catch (Exception ex)
            {
                throw new LuceneSearchException("Search exception", ex);
            }

            var results = new LuceneSearchResults(searcher, searcher.IndexReader, docs, criteria, q);

            // Cleanup here
            searcher.IndexReader.Dispose();
            searcher.Dispose();
            return results.Results;
        }
Пример #12
0
 private static IEnumerable <Artifact> _mapLuceneToDataList(IEnumerable <ScoreDoc> hits, IndexSearcher searcher)
 {
     return(hits.Select(hit => _mapLuceneDocumentToData(searcher.Doc(hit.Doc))).ToList());
 }
Пример #13
0
 private void ReleaseSearcher(IndexSearcher aSearcher)
 {
     // don't check if open - always decRef
     // don't decrement the private searcher - could have been swapped
     aSearcher.IndexReader.DecRef();
 }
Пример #14
0
        public static List<List<FacetReturn>> GetFacets(List<SearchStringModel> _searchQuery)
        {
            var ret = new List<List<FacetReturn>>();
            var facets = Context.ContentDatabase.GetItem(Constants.FacetFolder).Children;
            foreach (Item facet in facets)
            {
                if (facet.Fields["Enabled"].Value == "1")
                {
                    var type = Activator.CreateInstance(Type.GetType(facet.Fields["Type"].Value));
                    if ((type as IFacet).IsNotNull())
                    {
                        var locationOverride = GetLocationOverride(_searchQuery);
                        var indexName = BucketManager.GetContextIndex(Context.ContentDatabase.GetItem(locationOverride));
                        using (var searcher = new IndexSearcher(indexName))
                        using (var context = new SortableIndexSearchContext(searcher.Index))
                        {
                            var query = SearchHelper.GetBaseQuery(_searchQuery, locationOverride);
                            var queryBase = searcher.ContructQuery(query);
                            var searchBitArray = new QueryFilter(queryBase).Bits(context.Searcher.GetIndexReader());
                            var res = ((IFacet)type).Filter(queryBase, _searchQuery, locationOverride, searchBitArray);
                            ret.Add(res);
                        }
                    }
                }
            }

            return ret;
        }
Пример #15
0
 /// <summary>
 /// An extension of Item that allows you to launch a Search from an item
 /// </summary>
 /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns>
 /// <param name="startLocationItem">The start location of the search</param>
 /// <param name="queryParser">The raw JSON Parse query</param>
 /// <param name="hitCount">This will output the hitCount of the search</param>
 /// <param name="indexName">Force query to run on a particular index</param>
 public static IEnumerable<SitecoreItem> Search(Query rawLuceneQuery, out int hitCount, int pageSize = 20, int pageNumber = 1, string indexName = "itembuckets_buckets")
 {
     using (var searcher = new IndexSearcher(indexName))
     {
         var keyValuePair = searcher.RunQuery(rawLuceneQuery, pageSize, pageNumber);
         hitCount = keyValuePair.Key;
         return keyValuePair.Value;
     }
 }
Пример #16
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            // Uses default $facets field:
            Facets facets;

            if (Random.NextBoolean())
            {
                facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            }
            else
            {
                OrdinalsReader ordsReader = new DocValuesOrdinalsReader();
                if (Random.NextBoolean())
                {
                    ordsReader = new CachedOrdinalsReader(ordsReader);
                }
                facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
            }

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
Пример #17
0
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetHierarchical("Publish Date", true);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new FacetField("Author", "Bob"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Susan"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "7"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Frank"));
            doc.Add(new FacetField("Publish Date", "1999", "5", "5"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);

            // Retrieve & verify results:
            Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n  2010 (2)\n  2012 (2)\n  1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString());
            Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", facets.GetTopChildren(10, "Author").ToString());

            // Now user drills down on Publish Date/2010:
            DrillDownQuery q2 = new DrillDownQuery(config);

            q2.Add("Publish Date", "2010");
            c = new FacetsCollector();
            searcher.Search(q2, c);
            facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString());

            Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa"));

            Assert.Null(facets.GetTopChildren(10, "Non exitent dim"));

            // Smoke test PrintTaxonomyStats:
            string result;

            using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
            {
                using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true)
                {
                    AutoFlush = true
                })
                {
                    PrintTaxonomyStats.PrintStats(taxoReader, w, true);
                }
                result = bos.ToString();
            }
            Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1);
            // Make sure at least a few nodes of the tree came out:
            Assert.True(result.IndexOf("  /1999", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("  /2012", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("      /20", StringComparison.Ordinal) != -1);

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
Пример #18
0
        public void TestNestedSorting()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                                                                            new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            IList <Document> docs     = new List <Document>();
            Document         document = new Document();

            document.Add(new StringField("field2", "a", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "b", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "c", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "a", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);
            w.Commit();

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "c", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "d", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "e", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "b", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "e", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "f", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "g", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "c", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "g", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "h", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "i", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "d", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);
            w.Commit();

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "i", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "j", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "k", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "f", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "k", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "l", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "m", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "g", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            // This doc will not be included, because it doesn't have nested docs
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "h", Field.Store.NO));
            w.AddDocument(document);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "m", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "n", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "o", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "i", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);
            w.Commit();

            // Some garbage docs, just to check if the NestedFieldComparer can deal with this.
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            w.AddDocument(document);
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            w.AddDocument(document);
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            w.AddDocument(document);

            IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.IndexWriter, false));

            w.Dispose();
            Filter parentFilter          = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent")));
            Filter childFilter           = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
            ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);

            // Sort by field ascending, order first
            ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, Wrap(parentFilter), Wrap(childFilter));
            Sort         sort    = new Sort(sortField);
            TopFieldDocs topDocs = searcher.Search(query, 5, sort);

            assertEquals(7, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(3, topDocs.ScoreDocs[0].Doc);
            assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[1].Doc);
            assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[4].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field ascending, order last
            sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, true, Wrap(parentFilter), Wrap(childFilter));
            sort      = new Sort(sortField);
            topDocs   = searcher.Search(query, 5, sort);
            assertEquals(7, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(3, topDocs.ScoreDocs[0].Doc);
            assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[1].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[4].Doc);
            assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field descending, order last
            sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter));
            sort      = new Sort(sortField);
            topDocs   = searcher.Search(query, 5, sort);
            assertEquals(topDocs.TotalHits, 7);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(28, topDocs.ScoreDocs[0].Doc);
            assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(23, topDocs.ScoreDocs[1].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[2].Doc);
            assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[4].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field descending, order last, sort filter (filter_1:T)
            childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
            query       = new ToParentBlockJoinQuery(
                new FilteredQuery(new MatchAllDocsQuery(), childFilter),
                new FixedBitSetCachingWrapperFilter(parentFilter),
                ScoreMode.None);
            sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter));
            sort      = new Sort(sortField);
            topDocs   = searcher.Search(query, 5, sort);
            assertEquals(6, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(23, topDocs.ScoreDocs[0].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(28, topDocs.ScoreDocs[1].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[4].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            searcher.IndexReader.Dispose();
            dir.Dispose();
        }
Пример #19
0
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n  foo2 (1)\n  foo3 (1)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=2 childCount=2\n  bar1 (1)\n  bar2 (1)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", results[2].ToString());

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
Пример #20
0
            public IndexSearcherHoldingState(IndexSearcher indexSearcher)
            {
                IndexSearcher = indexSearcher;

                MemoryStatistics.RegisterLowMemoryHandler(this);
            }
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FacetsConfig config = new FacetsConfig();

            // Reused across documents, to add the necessary facet
            // fields:
            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("Author", "Bob"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 20, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 30, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 40, Field.Store.NO));
            doc.Add(new FacetField("Author", "Susan"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 45, Field.Store.NO));
            doc.Add(new FacetField("Author", "Frank"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query and one of the
            // Facets.search utility methods:
            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num"));

            // Retrieve & verify results:
            Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n  Lisa (50.0)\n  Frank (45.0)\n  Susan (40.0)\n  Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString());

            taxoReader.Dispose();
            searcher.IndexReader.Dispose();
            dir.Dispose();
            taxoDir.Dispose();
        }
Пример #22
0
        /// <summary>
        ///     Assigns the additional parameters.
        /// </summary>
        /// <param name = "settings"></param>
        public override void AssignSettings(Dictionary <string, string> settings)
        {
            _checkIndexes = bool.Parse(settings["CheckIndexes"]);

            bool autoCommit = bool.Parse(settings["AutoCommit"]);

            string luceneDotNetIndexDirectory = settings["LuceneDotNetIndexDirectory"];
            string currentCrawlDirectory      = Path.Combine(luceneDotNetIndexDirectory, "CurrentCrawl");

            //create required directories...
            if (!Directory.Exists(luceneDotNetIndexDirectory))
            {
                Directory.CreateDirectory(luceneDotNetIndexDirectory);
            }

            if (!Directory.Exists(currentCrawlDirectory))
            {
                Directory.CreateDirectory(currentCrawlDirectory);
            }

            //create lucene.net directories...
            _luceneDotNetIndexDirectory = FSDirectory.Open(new DirectoryInfo(luceneDotNetIndexDirectory));
            _currentCrawlDirectory      = FSDirectory.Open(new DirectoryInfo(currentCrawlDirectory));

            _standardAnalyzer = new StandardAnalyzer();

            //delete the lock - a crawl may have been prematurely terminated, likely by the user's election.  write.lock prevents us from writing to the index on subsequent crawls.
            if (File.Exists(Path.Combine(luceneDotNetIndexDirectory, "write.lock")))
            {
                File.Delete(Path.Combine(luceneDotNetIndexDirectory, "write.lock"));
            }

            //delete the lock - a crawl may have been prematurely terminated, likely by the user's election.  write.lock prevents us from writing to the index on subsequent crawls.
            if (File.Exists(Path.Combine(currentCrawlDirectory, "write.lock")))
            {
                File.Delete(Path.Combine(currentCrawlDirectory, "write.lock"));
            }

            File.Delete(Path.Combine(currentCrawlDirectory, "write.lock"));

            ManageIndexes();

            TearDownIndexWriter();

            _indexFiles    = bool.Parse(settings["IndexFiles"]);
            _indexImages   = bool.Parse(settings["IndexImages"]);
            _indexWebPages = bool.Parse(settings["IndexWebPages"]);

            //check to see if we have requested to rebuild the index.
            if (bool.Parse(settings["RebuildIndexOnLoad"]))
            {
                string tempDirectory       = Path.Combine(luceneDotNetIndexDirectory, "Temp");
                int    fileIDLowerBound    = int.Parse(settings["FileIDLowerBound"]);
                int    fileIDUpperBound    = int.Parse(settings["FileIDUpperBound"]);
                int    imageIDLowerBound   = int.Parse(settings["ImageIDLowerBound"]);
                int    imageIDUpperBound   = int.Parse(settings["ImageIDUpperBound"]);
                int    webPageIDLowerBound = int.Parse(settings["WebPageIDLowerBound"]);
                int    webPageIDUpperBound = int.Parse(settings["WebPageIDUpperBound"]);

                RebuildIndexes(tempDirectory, fileIDLowerBound, fileIDUpperBound, imageIDLowerBound, imageIDUpperBound, webPageIDLowerBound, webPageIDUpperBound);

                TearDownIndexWriter();
            }

            //switch back to the _current
            if (autoCommit)
            {
                //NOTE: autoCommit was disabled in Lucene.net 2.4.  The threads now check when to Commit();
                _autoCommit         = true;
                _autoCommitLock     = new object();
                _lastCommitDateTime = DateTime.Now;
                //_indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, true, _standardAnalyzer, false);
                _indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, _standardAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);
            }
            else
            {
                _indexWriter = new IndexWriter(_currentCrawlDirectory, _standardAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            }

            SetIndexWriterDefaults();

            _indexSearcher = new IndexSearcher(_luceneDotNetIndexDirectory, true);
        }
Пример #23
0
        /// <summary>
        /// Suggest similar words (optionally restricted to a field of an index).
        /// <para>
        /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms
        /// is not the same as the edit distance strategy used to calculate the best
        /// matching spell-checked word from the hits that Lucene found, one usually has
        /// to retrieve a couple of numSug's in order to get the true best match.
        /// </para>
        /// <para>
        /// I.e. if numSug == 1, don't count on that suggestion being the best one.
        /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
        /// </para>
        /// </summary>
        /// <param name="word"> the word you want a spell check done on </param>
        /// <param name="numSug"> the number of suggested words </param>
        /// <param name="ir"> the indexReader of the user index (can be null see field param) </param>
        /// <param name="field"> the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field. </param>
        /// <param name="suggestMode">
        /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param>
        /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param>
        /// <exception cref="System.IO.IOException"> if the underlying index throws an <see cref="System.IO.IOException"/> </exception>
        /// <exception cref="ObjectDisposedException"> if the <see cref="SpellChecker"/> is already disposed </exception>
        /// <returns> string[] the sorted list of the suggest words with these 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        ///  </returns>
        public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy)
        {
            // obtainSearcher calls ensureOpen
            IndexSearcher indexSearcher = ObtainSearcher();

            try
            {
                if (ir == null || field == null)
                {
                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
                }
                if (suggestMode == SuggestMode.SUGGEST_ALWAYS)
                {
                    ir    = null;
                    field = null;
                }

                int lengthWord = word.Length;

                int freq     = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
                int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
                // if the word exists in the real index and we don't care for word frequency, return the word itself
                if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0)
                {
                    return(new string[] { word });
                }

                BooleanQuery query = new BooleanQuery();
                string[]     grams;
                string       key;

                for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
                {
                    key = "gram" + ng;           // form key

                    grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                    if (grams.Length == 0)
                    {
                        continue; // hmm
                    }

                    if (bStart > 0)                                            // should we boost prefixes?
                    {
                        Add(query, "start" + ng, grams[0], bStart);            // matches start of word
                    }
                    if (bEnd > 0)                                              // should we boost suffixes
                    {
                        Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
                    }
                    for (int i = 0; i < grams.Length; i++)
                    {
                        Add(query, key, grams[i]);
                    }
                }

                int maxHits = 10 * numSug;

                //    System.out.println("Q: " + query);
                ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
                //    System.out.println("HITS: " + hits.length());
                SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparer);

                // go thru more than 'maxr' matches in case the distance filter triggers
                int         stop    = Math.Min(hits.Length, maxHits);
                SuggestWord sugWord = new SuggestWord();
                for (int i = 0; i < stop; i++)
                {
                    sugWord.String = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word

                    // don't suggest a word for itself, that would be silly
                    if (sugWord.String.Equals(word, StringComparison.Ordinal))
                    {
                        continue;
                    }

                    // edit distance
                    sugWord.Score = sd.GetDistance(word, sugWord.String);
                    if (sugWord.Score < accuracy)
                    {
                        continue;
                    }

                    if (ir != null && field != null)                                // use the user index
                    {
                        sugWord.Freq = ir.DocFreq(new Term(field, sugWord.String)); // freq in the index
                        // don't suggest a word that is not present in the field
                        if ((suggestMode == SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.Freq) || sugWord.Freq < 1)
                        {
                            continue;
                        }
                    }
                    sugQueue.InsertWithOverflow(sugWord);
                    if (sugQueue.Count == numSug)
                    {
                        // if queue full, maintain the minScore score
                        accuracy = sugQueue.Top.Score;
                    }
                    sugWord = new SuggestWord();
                }

                // convert to array string
                string[] list = new string[sugQueue.Count];
                for (int i = sugQueue.Count - 1; i >= 0; i--)
                {
                    list[i] = sugQueue.Pop().String;
                }

                return(list);
            }
            finally
            {
                ReleaseSearcher(indexSearcher);
            }
        }
Пример #24
0
        public virtual void TestRandom()
        {
            string[]        tokens   = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexDir);
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt32(Random, 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.GetReader());

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random.Next(tokens.Length)];
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = GetTaxonomyFacetCounts(tr, config, fc);

                // Slow, yet hopefully bug-free, faceting:
                var expectedCounts = new List <Dictionary <string, int?> >();
                for (int i = 0; i < numDims; i++)
                {
                    expectedCounts.Add(new Dictionary <string, int?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken, StringComparison.Ordinal))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null;
                                if (v == null)
                                {
                                    expectedCounts[j][doc.dims[j]] = 1;
                                }
                                else
                                {
                                    expectedCounts[j][doc.dims[j]] = (int)v + 1;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    int totCount = 0;
                    foreach (KeyValuePair <string, int?> ent in expectedCounts[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totCount += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totCount > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                Assert.AreEqual(expected, actual);
            }

            IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
Пример #25
0
        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random.NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.GetReader();
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random.Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (VERBOSE)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random.Next(scoreModeLength);
                    if (VERBOSE)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
                                                                                 topScoreDocCollector));
                    // Asserting bit set...
                    if (VERBOSE)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.GetTopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (VERBOSE)
                        {
                            string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                            string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }
Пример #26
0
 public override Weight CreateWeight(IndexSearcher searcher)
 {
     return(new ToChildBlockJoinWeight(this, _parentQuery.CreateWeight(searcher), _parentsFilter, _doScores));
 }
Пример #27
0
        /// <summary>
        /// An extension of Item that allows you to launch a Search from an item
        /// </summary>
        /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns>
        /// <param name="startLocationItem">The start location of the search</param>
        /// <param name="hitCount">This will output the hitCount of the search</param>
        /// <param name="currentSearchString">The raw JSON Parse query</param>
        /// <param name="indexName">Force query to run on a particular index</param>
        /// <param name="sortField">Sort query by field (must be in index)</param>
        /// <param name="sortDirection">Sort in either "asc" or "desc"</param>
        /// <example>BucketManager.Search(Sitecore.Context.Item, SearchModel)</example>
        public static IEnumerable<SitecoreItem> Search(Item startLocationItem, out int hitCount, List<SearchStringModel> currentSearchString, string indexName = "itembuckets_buckets", string sortField = "", string sortDirection = "")
        {
            var refinements = new SafeDictionary<string>();
            var searchStringModels = SearchHelper.GetTags(currentSearchString);

            if (searchStringModels.Count > 0)
            {
                foreach (var ss in searchStringModels)
                {
                    var query = ss.Value;
                    if (query.Contains("tagid="))
                    {
                        query = query.Split('|')[1].Replace("tagid=", string.Empty);
                    }
                    var db = Context.ContentDatabase ?? Context.Database;
                    refinements.Add("_tags", db.GetItem(query).ID.ToString());
                }
            }
            using (var searcher = new IndexSearcher(indexName))
            {
                var keyValuePair = searcher.GetItems(new DateRangeSearchParam { FullTextQuery = SearchHelper.GetText(currentSearchString),
                    RelatedIds = null,
                    SortDirection = sortDirection,
                    TemplateIds = SearchHelper.GetTemplates(currentSearchString),
                    LocationIds = startLocationItem.ID.ToGuid().ToEnumerable(),
                    SortByField = sortField, Refinements = refinements});
                hitCount = keyValuePair.Key;
                return keyValuePair.Value;
            }
        }
Пример #28
0
        public void TestSimple()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
                                                       indexSearcher, ScoreMode.None);

            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(4, result.ScoreDocs[0].Doc);
            assertEquals(5, result.ScoreDocs[1].Doc);

            joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(1, result.ScoreDocs[0].Doc);
            assertEquals(2, result.ScoreDocs[1].Doc);

            // Search for offer
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(1, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Пример #29
0
        protected internal override void DoSearching(TaskScheduler es, DateTime stopTime)
        {
            bool anyOpenDelFiles = false;

            DirectoryReader r = DirectoryReader.Open(Writer, true);

            while (DateTime.UtcNow < stopTime && !Failed.Get())
            {
                if (Random().NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now reopen r=" + r);
                    }
                    DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                    if (r2 != null)
                    {
                        r.Dispose();
                        r = r2;
                    }
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now close reader=" + r);
                    }
                    r.Dispose();
                    Writer.Commit();
                    ISet<string> openDeletedFiles = ((MockDirectoryWrapper)Dir).OpenDeletedFiles;
                    if (openDeletedFiles.Count > 0)
                    {
                        Console.WriteLine("OBD files: " + openDeletedFiles);
                    }
                    anyOpenDelFiles |= openDeletedFiles.Count > 0;
                    //Assert.AreEqual("open but deleted: " + openDeletedFiles, 0, openDeletedFiles.Size());
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now open");
                    }
                    r = DirectoryReader.Open(Writer, true);
                }
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: got new reader=" + r);
                }
                //System.out.println("numDocs=" + r.NumDocs + "
                //openDelFileCount=" + dir.openDeleteFileCount());

                if (r.NumDocs > 0)
                {
                    FixedSearcher = new IndexSearcher(r, es);
                    SmokeTestSearcher(FixedSearcher);
                    RunSearchThreads(DateTime.UtcNow.AddMilliseconds(500));
                }
            }
            r.Dispose();

            //System.out.println("numDocs=" + r.NumDocs + " openDelFileCount=" + dir.openDeleteFileCount());
            ISet<string> openDeletedFiles_ = ((MockDirectoryWrapper)Dir).OpenDeletedFiles;
            if (openDeletedFiles_.Count > 0)
            {
                Console.WriteLine("OBD files: " + openDeletedFiles_);
            }
            anyOpenDelFiles |= openDeletedFiles_.Count > 0;

            Assert.IsFalse(anyOpenDelFiles, "saw non-zero open-but-deleted count");
        }
Пример #30
0
        public override Weight CreateWeight(IndexSearcher searcher)
        {
            Weight baseWeight = baseQuery.CreateWeight(searcher);
            object[] drillDowns = new object[drillDownQueries.Length];
            for (int dim = 0; dim < drillDownQueries.Length; dim++)
            {
                Query query = drillDownQueries[dim];
                Filter filter = DrillDownQuery.GetFilter(query);
                if (filter != null)
                {
                    drillDowns[dim] = filter;
                }
                else
                {
                    // TODO: would be nice if we could say "we will do no
                    // scoring" here....
                    drillDowns[dim] = searcher.Rewrite(query).CreateWeight(searcher);
                }
            }

            return new WeightAnonymousInnerClassHelper(this, baseWeight, drillDowns);
        }
Пример #31
0
        // [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestBigDocuments()
        {
            // "big" as "much bigger than the chunk size"
            // for this test we force a FS dir
            // we can't just use newFSDirectory, because this test doesn't really index anything.
            // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
            Directory         dir    = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments")));
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            Document emptyDoc = new Document(); // emptyDoc
            Document bigDoc1  = new Document(); // lot of small fields
            Document bigDoc2  = new Document(); // 1 very big field

            Field idField = new StringField("id", "", Field.Store.NO);

            emptyDoc.Add(idField);
            bigDoc1.Add(idField);
            bigDoc2.Add(idField);

            FieldType onlyStored = new FieldType(StringField.TYPE_STORED);

            onlyStored.IsIndexed = false;

            Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored);
            int   numFields  = RandomInts.NextIntBetween(Random(), 500000, 1000000);

            for (int i = 0; i < numFields; ++i)
            {
                bigDoc1.Add(smallField);
            }

            Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored);

            bigDoc2.Add(bigField);

            int numDocs = AtLeast(5);

            Document[] docs = new Document[numDocs];
            for (int i = 0; i < numDocs; ++i)
            {
                docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2));
            }
            for (int i = 0; i < numDocs; ++i)
            {
                idField.SetStringValue("" + i);
                iw.AddDocument(docs[i]);
                if (Random().Next(numDocs) == 0)
                {
                    iw.Commit();
                }
            }
            iw.Commit();
            iw.ForceMerge(1); // look at what happens when big docs are merged
            DirectoryReader rd       = DirectoryReader.Open(dir);
            IndexSearcher   searcher = new IndexSearcher(rd);

            for (int i = 0; i < numDocs; ++i)
            {
                Query   query   = new TermQuery(new Term("id", "" + i));
                TopDocs topDocs = searcher.Search(query, 1);
                Assert.AreEqual(1, topDocs.TotalHits, "" + i);
                Document doc = rd.Document(topDocs.ScoreDocs[0].Doc);
                Assert.IsNotNull(doc);
                IIndexableField[] fieldValues = doc.GetFields("fld");
                Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length);
                if (fieldValues.Length > 0)
                {
                    Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue());
                }
            }
            rd.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #32
0
        /// <summary>
        /// Indexes the data from the given <see cref="IDictionary"/>. </summary>
        /// <param name="dict"> Dictionary to index </param>
        /// <param name="config"> <see cref="IndexWriterConfig"/> to use </param>
        /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param>
        /// <exception cref="ObjectDisposedException"> if the <see cref="SpellChecker"/> is already disposed </exception>
        /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception>
        public void IndexDictionary(IDictionary dict, IndexWriterConfig config, bool fullMerge)
        {
            lock (modifyCurrentIndexLock)
            {
                EnsureOpen();
                Directory dir = this.spellIndex;
                using (var writer = new IndexWriter(dir, config))
                {
                    IndexSearcher     indexSearcher = ObtainSearcher();
                    IList <TermsEnum> termsEnums    = new List <TermsEnum>();

                    IndexReader reader = searcher.IndexReader;
                    if (reader.MaxDoc > 0)
                    {
                        foreach (AtomicReaderContext ctx in reader.Leaves)
                        {
                            Terms terms = ctx.AtomicReader.GetTerms(F_WORD);
                            if (terms != null)
                            {
                                termsEnums.Add(terms.GetIterator(null));
                            }
                        }
                    }

                    bool isEmpty = termsEnums.Count == 0;

                    try
                    {
                        IBytesRefIterator iter = dict.GetEntryIterator();
                        BytesRef          currentTerm;

                        while ((currentTerm = iter.Next()) != null)
                        {
                            string word = currentTerm.Utf8ToString();
                            int    len  = word.Length;
                            if (len < 3)
                            {
                                continue; // too short we bail but "too long" is fine...
                            }

                            if (!isEmpty)
                            {
                                foreach (TermsEnum te in termsEnums)
                                {
                                    if (te.SeekExact(currentTerm))
                                    {
                                        goto termsContinue;
                                    }
                                }
                            }

                            // ok index the word
                            var doc = CreateDocument(word, GetMin(len), GetMax(len));
                            writer.AddDocument(doc);
termsContinue:
                            ;
                        }
                    }
                    finally
                    {
                        ReleaseSearcher(indexSearcher);
                    }
                    if (fullMerge)
                    {
                        writer.ForceMerge(1);
                    }
                }
                // TODO: this isn't that great, maybe in the future SpellChecker should take
                // IWC in its ctor / keep its writer open?

                // also re-open the spell index to see our own changes when the next suggestion
                // is fetched:
                SwapSearcher(dir);
            }
        }
Пример #33
0
        private static void GenerateHighlights(IList<Document> documents, IndexWriter writer, SearchCriteria criteria)
        {
            var documentHightlightMap = documents.ToDictionary(c => c._id.ToString());

            var reader = DirectoryReader.Open(writer, true, true);
            var queryParser = new HighlighterQueryParser(writer.GetAnalyzer());
            queryParser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);

            var query = queryParser.Parse(criteria.Query)
                                   .Rewrite(reader);

            var highlighter = CreateHighlighter();
            var fieldQuery = highlighter.GetFieldQuery(query);

            var searcher = new IndexSearcher(reader);
            var topFieldDocs = searcher.Search(query, documents.Count, Sort.RELEVANCE);
            var scoreDocs = topFieldDocs.ScoreDocs;

            foreach (var sd in scoreDocs)
            {
                var bestFragments = highlighter.GetBestFragments(fieldQuery, reader, sd.Doc, Schema.StandardField.FULL_TEXT, FRAGMENT_SIZE, FRAGMENT_COUNT);
                var document = searcher.Doc(sd.Doc);
                var docId = document.Get(Schema.StandardField.ID);

                if (documentHightlightMap.ContainsKey(docId) && bestFragments.Length > 0)
                {
                    var dictionary = documentHightlightMap[docId].AsDictionary();
                    var highlight = String.Join($"{Environment.NewLine} ... {Environment.NewLine}", bestFragments);
                    dictionary[HIGHLIGHT_FIELD_NAME] = highlight;
                }
            }
        }
Пример #34
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestRandomStoredFields()
        {
            Directory         dir  = NewDirectory();
            Random            rand = Random();
            RandomIndexWriter w    = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20)));
            //w.w.setNoCFSRatio(0.0);
            int docCount   = AtLeast(200);
            int fieldCount = TestUtil.NextInt(rand, 1, 5);

            IList <int?> fieldIDs = new List <int?>();

            FieldType customType = new FieldType(TextField.TYPE_STORED);

            customType.IsTokenized = false;
            Field idField = NewField("id", "", customType);

            for (int i = 0; i < fieldCount; i++)
            {
                fieldIDs.Add(i);
            }

            IDictionary <string, Document> docs = new Dictionary <string, Document>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: build index docCount=" + docCount);
            }

            FieldType customType2 = new FieldType();

            customType2.IsStored = true;
            for (int i = 0; i < docCount; i++)
            {
                Document doc = new Document();
                doc.Add(idField);
                string id = "" + i;
                idField.SetStringValue(id);
                docs[id] = doc;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: add doc id=" + id);
                }

                foreach (int field in fieldIDs)
                {
                    string s;
                    if (rand.Next(4) != 3)
                    {
                        s = TestUtil.RandomUnicodeString(rand, 1000);
                        doc.Add(NewField("f" + field, s, customType2));
                    }
                    else
                    {
                        s = null;
                    }
                }
                w.AddDocument(doc);
                if (rand.Next(50) == 17)
                {
                    // mixup binding of field name -> Number every so often
                    Collections.Shuffle(fieldIDs);
                }
                if (rand.Next(5) == 3 && i > 0)
                {
                    string delID = "" + rand.Next(i);
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: delete doc id=" + delID);
                    }
                    w.DeleteDocuments(new Term("id", delID));
                    docs.Remove(delID);
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields");
            }
            if (docs.Count > 0)
            {
                string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/);

                for (int x = 0; x < 2; x++)
                {
                    IndexReader   r = w.Reader;
                    IndexSearcher s = NewSearcher(r);

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle x=" + x + " r=" + r);
                    }

                    int num = AtLeast(1000);
                    for (int iter = 0; iter < num; iter++)
                    {
                        string testID = idsList[rand.Next(idsList.Length)];
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: test id=" + testID);
                        }
                        TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1);
                        Assert.AreEqual(1, hits.TotalHits);
                        Document doc    = r.Document(hits.ScoreDocs[0].Doc);
                        Document docExp = docs[testID];
                        for (int i = 0; i < fieldCount; i++)
                        {
                            assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i));
                        }
                    }
                    r.Dispose();
                    w.ForceMerge(1);
                }
            }
            w.Dispose();
            dir.Dispose();
        }
 private int RunQuery(IndexSearcher s, Query q)
 {
     s.Search(q, 10);
     int hitCount = s.Search(q, null, 10, new Sort(new SortField("title", SortField.Type_e.STRING))).TotalHits;
     if (DefaultCodecSupportsDocValues())
     {
         Sort dvSort = new Sort(new SortField("title", SortField.Type_e.STRING));
         int hitCount2 = s.Search(q, null, 10, dvSort).TotalHits;
         Assert.AreEqual(hitCount, hitCount2);
     }
     return hitCount;
 }
Пример #36
0
        private static IEnumerable <int> Search(Directory directory, Query query, IEnumerable <int> sortOrder, bool reversed = false)
        {
            var searcher = new IndexSearcher(directory, true);

            return(Search(searcher, query, sortOrder, reversed));
        }
 protected internal virtual void ReleaseSearcher(IndexSearcher s)
 {
 }
Пример #38
0
        public void TestSimpleWithScoring()
        {
            const string idField = "id";
            const string toField = "movieId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "A random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for movie via subtitle
            Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
                                                       new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            // Score mode max.
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Max);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            // Score mode total
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Total);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            //Score mode avg
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Avg);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Пример #39
0
 internal SearchWrapper(SearcherManager searcherManager)
 {
     _searcherManager = searcherManager;
     _indexSearcher = (IndexSearcher) _searcherManager.acquire();
 }
Пример #40
0
 // Make sure the documents returned by the search match the expected list
 // Copied from TestSort.java
 private void AssertMatches(IndexSearcher searcher, Query query, Sort sort, string expectedResult)
 {
     ScoreDoc[] result = searcher.Search(query, null, 1000, sort).ScoreDocs;
     StringBuilder buff = new StringBuilder(10);
     int n = result.Length;
     for (int i = 0; i < n; ++i)
     {
         Document doc = searcher.Doc(result[i].Doc);
         IndexableField[] v = doc.GetFields("tracer");
         for (int j = 0; j < v.Length; ++j)
         {
             buff.Append(v[j].StringValue);
         }
     }
     Assert.AreEqual(expectedResult, buff.ToString());
 }
        public virtual void TestHugeLabel()
        {
            Directory indexDir = NewDirectory(), taxoDir = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1));
            FacetsConfig config = new FacetsConfig();

            // Add one huge label:
            string bigs = null;
            int ordinal = -1;

            int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator
            bigs = TestUtil.RandomSimpleString(Random(), len, len);
            FacetField ff = new FacetField("dim", bigs);
            FacetLabel cp = new FacetLabel("dim", bigs);
            ordinal = taxoWriter.AddCategory(cp);
            Document doc = new Document();
            doc.Add(ff);
            indexWriter.AddDocument(config.Build(taxoWriter, doc));

            // Add tiny ones to cause a re-hash
            for (int i = 0; i < 3; i++)
            {
                string s = TestUtil.RandomSimpleString(Random(), 1, 10);
                taxoWriter.AddCategory(new FacetLabel("dim", s));
                doc = new Document();
                doc.Add(new FacetField("dim", s));
                indexWriter.AddDocument(config.Build(taxoWriter, doc));
            }

            // when too large components were allowed to be added, this resulted in a new added category
            Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp));

            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = new IndexSearcher(indexReader);
            DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig());
            ddq.Add("dim", bigs);
            Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits);

            IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir);
        }
Пример #42
0
        private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
                                                    bool multipleValuesPerDocument, bool scoreDocsInOrder)
        {
            IndexIterationContext context = new IndexIterationContext();
            int numRandomValues           = nDocs / 2;

            context.RandomUniqueValues = new string[numRandomValues];
            ISet <string> trackSet = new HashSet <string>();

            context.RandomFrom = new bool[numRandomValues];
            for (int i = 0; i < numRandomValues; i++)
            {
                string uniqueRandomValue;
                do
                {
                    uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    //        uniqueRandomValue = TestUtil.randomSimpleString(random);
                } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue));
                // Generate unique values and empty strings aren't allowed.
                trackSet.Add(uniqueRandomValue);
                context.RandomFrom[i]         = Random.NextBoolean();
                context.RandomUniqueValues[i] = uniqueRandomValue;
            }

            RandomDoc[] docs = new RandomDoc[nDocs];
            for (int i = 0; i < nDocs; i++)
            {
                string   id       = Convert.ToString(i);
                int      randomI  = Random.Next(context.RandomUniqueValues.Length);
                string   value    = context.RandomUniqueValues[randomI];
                Document document = new Document();
                document.Add(NewTextField(Random, "id", id, Field.Store.NO));
                document.Add(NewTextField(Random, "value", value, Field.Store.NO));

                bool from = context.RandomFrom[randomI];
                int  numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1;
                docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
                for (int j = 0; j < numberOfLinkValues; j++)
                {
                    string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)];
                    docs[i].LinkValues.Add(linkValue);
                    if (from)
                    {
                        if (!context.FromDocuments.ContainsKey(linkValue))
                        {
                            context.FromDocuments[linkValue] = new List <RandomDoc>();
                        }
                        if (!context.RandomValueFromDocs.ContainsKey(value))
                        {
                            context.RandomValueFromDocs[value] = new List <RandomDoc>();
                        }

                        context.FromDocuments[linkValue].Add(docs[i]);
                        context.RandomValueFromDocs[value].Add(docs[i]);
                        document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO));
                    }
                    else
                    {
                        if (!context.ToDocuments.ContainsKey(linkValue))
                        {
                            context.ToDocuments[linkValue] = new List <RandomDoc>();
                        }
                        if (!context.RandomValueToDocs.ContainsKey(value))
                        {
                            context.RandomValueToDocs[value] = new List <RandomDoc>();
                        }

                        context.ToDocuments[linkValue].Add(docs[i]);
                        context.RandomValueToDocs[value].Add(docs[i]);
                        document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO));
                    }
                }

                RandomIndexWriter w;
                if (from)
                {
                    w = fromWriter;
                }
                else
                {
                    w = toWriter;
                }

                w.AddDocument(document);
                if (Random.Next(10) == 4)
                {
                    w.Commit();
                }
                if (VERBOSE)
                {
                    Console.WriteLine("Added document[" + docs[i].Id + "]: " + document);
                }
            }

            // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
            // any ScoreMode.
            IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader());
            IndexSearcher toSearcher   = NewSearcher(toWriter.GetReader());

            for (int i = 0; i < context.RandomUniqueValues.Length; i++)
            {
                string uniqueRandomValue = context.RandomUniqueValues[i];
                string fromField;
                string toField;
                IDictionary <string, IDictionary <int, JoinScore> > queryVals;
                if (context.RandomFrom[i])
                {
                    fromField = "from";
                    toField   = "to";
                    queryVals = context.FromHitsToJoinScore;
                }
                else
                {
                    fromField = "to";
                    toField   = "from";
                    queryVals = context.ToHitsToJoinScore;
                }
                IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores));
                }
                else
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores));
                }

                IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    if (scoreDocsInOrder)
                    {
                        AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
                        Terms        terms = slowCompositeReader.GetTerms(toField);
                        if (terms != null)
                        {
                            DocsEnum             docsEnum   = null;
                            TermsEnum            termsEnum  = null;
                            SortedSet <BytesRef> joinValues =
                                new SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
                            joinValues.AddAll(joinValueToJoinScores.Keys);
                            foreach (BytesRef joinValue in joinValues)
                            {
                                termsEnum = terms.GetIterator(termsEnum);
                                if (termsEnum.SeekExact(joinValue))
                                {
                                    docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE);
                                    JoinScore joinScore = joinValueToJoinScores[joinValue];

                                    for (int doc = docsEnum.NextDoc();
                                         doc != DocIdSetIterator.NO_MORE_DOCS;
                                         doc = docsEnum.NextDoc())
                                    {
                                        // First encountered join value determines the score.
                                        // Something to keep in mind for many-to-many relations.
                                        if (!docToJoinScore.ContainsKey(doc))
                                        {
                                            docToJoinScore[doc] = joinScore;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        toSearcher.Search(new MatchAllDocsQuery(),
                                          new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores,
                                                                                  docToJoinScore));
                    }
                }
                else
                {
                    toSearcher.Search(new MatchAllDocsQuery(),
                                      new CollectorAnonymousInnerClassHelper6(this, toField, joinValueToJoinScores,
                                                                              docToJoinScore));
                }
                queryVals[uniqueRandomValue] = docToJoinScore;
            }

            fromSearcher.IndexReader.Dispose();
            toSearcher.IndexReader.Dispose();

            return(context);
        }
Пример #43
0
        public virtual void TestAddIndexOnDiskFull()
        {
            // MemoryCodec, since it uses FST, is not necessarily
            // "additive", ie if you add up N small FSTs, then merge
            // them, the merged result can easily be larger than the
            // sum because the merged FST may use array encoding for
            // some arcs (which uses more space):

            string idFormat      = TestUtil.GetPostingsFormat("id");
            string contentFormat = TestUtil.GetPostingsFormat("content");

            AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal));

            int START_COUNT = 57;
            int NUM_DIR     = TEST_NIGHTLY ? 50 : 5;
            int END_COUNT   = START_COUNT + NUM_DIR * (TEST_NIGHTLY ? 25 : 5);

            // Build up a bunch of dirs that have indexes which we
            // will then merge together by calling addIndexes(*):
            Directory[] dirs           = new Directory[NUM_DIR];
            long        inputDiskUsage = 0;

            for (int i = 0; i < NUM_DIR; i++)
            {
                dirs[i] = NewDirectory();
                IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                for (int j = 0; j < 25; j++)
                {
                    AddDocWithIndex(writer, 25 * i + j);
                }
                writer.Dispose();
                string[] files = dirs[i].ListAll();
                for (int j = 0; j < files.Length; j++)
                {
                    inputDiskUsage += dirs[i].FileLength(files[j]);
                }
            }

            // Now, build a starting index that has START_COUNT docs.  We
            // will then try to addIndexes into a copy of this:
            MockDirectoryWrapper startDir  = NewMockDirectory();
            IndexWriter          indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            for (int j = 0; j < START_COUNT; j++)
            {
                AddDocWithIndex(indWriter, j);
            }
            indWriter.Dispose();

            // Make sure starting index seems to be working properly:
            Term        searchTerm = new Term("content", "aaa");
            IndexReader reader     = DirectoryReader.Open(startDir);

            Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq");

            IndexSearcher searcher = NewSearcher(reader);

            ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs;
            Assert.AreEqual(57, hits.Length, "first number of hits");
            reader.Dispose();

            // Iterate with larger and larger amounts of free
            // disk space.  With little free disk space,
            // addIndexes will certainly run out of space &
            // fail.  Verify that when this happens, index is
            // not corrupt and index in fact has added no
            // documents.  Then, we increase disk space by 2000
            // bytes each iteration.  At some point there is
            // enough free disk space and addIndexes should
            // succeed and index should show all documents were
            // added.

            // String[] files = startDir.ListAll();
            long diskUsage = startDir.GetSizeInBytes();

            long startDiskUsage = 0;

            string[] files_ = startDir.ListAll();
            for (int i = 0; i < files_.Length; i++)
            {
                startDiskUsage += startDir.FileLength(files_[i]);
            }

            for (int iter = 0; iter < 3; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }

                // Start with 100 bytes more than we are currently using:
                long diskFree = diskUsage + TestUtil.NextInt32(Random, 50, 200);

                int method = iter;

                bool success = false;
                bool done    = false;

                string methodName;
                if (0 == method)
                {
                    methodName = "addIndexes(Directory[]) + forceMerge(1)";
                }
                else if (1 == method)
                {
                    methodName = "addIndexes(IndexReader[])";
                }
                else
                {
                    methodName = "addIndexes(Directory[])";
                }

                while (!done)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle...");
                    }

                    // Make a new dir that will enforce disk usage:
                    MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory(startDir, NewIOContext(Random)));
                    indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy(false)));
                    IOException err = null;

                    IMergeScheduler ms = indWriter.Config.MergeScheduler;
                    for (int x = 0; x < 2; x++)
                    {
                        if (ms is IConcurrentMergeScheduler)
                        // this test intentionally produces exceptions
                        // in the threads that CMS launches; we don't
                        // want to pollute test output with these.
                        {
                            if (0 == x)
                            {
                                ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                            }
                            else
                            {
                                ((IConcurrentMergeScheduler)ms).ClearSuppressExceptions();
                            }
                        }

                        // Two loops: first time, limit disk space &
                        // throw random IOExceptions; second time, no
                        // disk space limit:

                        double rate      = 0.05;
                        double diskRatio = ((double)diskFree) / diskUsage;
                        long   thisDiskFree;

                        string testName = null;

                        if (0 == x)
                        {
                            dir.RandomIOExceptionRateOnOpen = Random.NextDouble() * 0.01;
                            thisDiskFree = diskFree;
                            if (diskRatio >= 2.0)
                            {
                                rate /= 2;
                            }
                            if (diskRatio >= 4.0)
                            {
                                rate /= 2;
                            }
                            if (diskRatio >= 6.0)
                            {
                                rate = 0.0;
                            }
                            if (VERBOSE)
                            {
                                testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes";
                            }
                        }
                        else
                        {
                            dir.RandomIOExceptionRateOnOpen = 0.0;
                            thisDiskFree = 0;
                            rate         = 0.0;
                            if (VERBOSE)
                            {
                                testName = "disk full test " + methodName + " with unlimited disk space";
                            }
                        }

                        if (VERBOSE)
                        {
                            Console.WriteLine("\ncycle: " + testName);
                        }

                        dir.TrackDiskUsage        = true;
                        dir.MaxSizeInBytes        = thisDiskFree;
                        dir.RandomIOExceptionRate = rate;

                        try
                        {
                            if (0 == method)
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("TEST: now addIndexes count=" + dirs.Length);
                                }
                                indWriter.AddIndexes(dirs);
                                if (VERBOSE)
                                {
                                    Console.WriteLine("TEST: now forceMerge");
                                }
                                indWriter.ForceMerge(1);
                            }
                            else if (1 == method)
                            {
                                IndexReader[] readers = new IndexReader[dirs.Length];
                                for (int i = 0; i < dirs.Length; i++)
                                {
                                    readers[i] = DirectoryReader.Open(dirs[i]);
                                }
                                try
                                {
                                    indWriter.AddIndexes(readers);
                                }
                                finally
                                {
                                    for (int i = 0; i < dirs.Length; i++)
                                    {
                                        readers[i].Dispose();
                                    }
                                }
                            }
                            else
                            {
                                indWriter.AddIndexes(dirs);
                            }

                            success = true;
                            if (VERBOSE)
                            {
                                Console.WriteLine("  success!");
                            }

                            if (0 == x)
                            {
                                done = true;
                            }
                        }
                        catch (IOException e)
                        {
                            success = false;
                            err     = e;
                            if (VERBOSE)
                            {
                                Console.WriteLine("  hit IOException: " + e);
                                Console.WriteLine(e.StackTrace);
                            }

                            if (1 == x)
                            {
                                Console.WriteLine(e.StackTrace);
                                Assert.Fail(methodName + " hit IOException after disk space was freed up");
                            }
                        }

                        // Make sure all threads from
                        // ConcurrentMergeScheduler are done
                        TestUtil.SyncConcurrentMerges(indWriter);

                        if (VERBOSE)
                        {
                            Console.WriteLine("  now test readers");
                        }

                        // Finally, verify index is not corrupt, and, if
                        // we succeeded, we see all docs added, and if we
                        // failed, we see either all docs or no docs added
                        // (transactional semantics):
                        dir.RandomIOExceptionRateOnOpen = 0.0;
                        try
                        {
                            reader = DirectoryReader.Open(dir);
                        }
                        catch (IOException e)
                        {
                            Console.WriteLine(e.StackTrace);
                            Assert.Fail(testName + ": exception when creating IndexReader: " + e);
                        }
                        int result = reader.DocFreq(searchTerm);
                        if (success)
                        {
                            if (result != START_COUNT)
                            {
                                Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT);
                            }
                        }
                        else
                        {
                            // On hitting exception we still may have added
                            // all docs:
                            if (result != START_COUNT && result != END_COUNT)
                            {
                                Console.WriteLine(err.StackTrace);
                                Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
                            }
                        }

                        searcher = NewSearcher(reader);
                        try
                        {
                            hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs;
                        }
                        catch (IOException e)
                        {
                            Console.WriteLine(e.StackTrace);
                            Assert.Fail(testName + ": exception when searching: " + e);
                        }
                        int result2 = hits.Length;
                        if (success)
                        {
                            if (result2 != result)
                            {
                                Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
                            }
                        }
                        else
                        {
                            // On hitting exception we still may have added
                            // all docs:
                            if (result2 != result)
                            {
                                Console.WriteLine(err.StackTrace);
                                Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
                            }
                        }

                        reader.Dispose();
                        if (VERBOSE)
                        {
                            Console.WriteLine("  count is " + result);
                        }

                        if (done || result == END_COUNT)
                        {
                            break;
                        }
                    }

                    if (VERBOSE)
                    {
                        Console.WriteLine("  start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes);
                    }

                    if (done)
                    {
                        // Javadocs state that temp free Directory space
                        // required is at most 2X total input size of
                        // indices so let's make sure:
                        Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes");
                    }

                    // Make sure we don't hit disk full during close below:
                    dir.MaxSizeInBytes              = 0;
                    dir.RandomIOExceptionRate       = 0.0;
                    dir.RandomIOExceptionRateOnOpen = 0.0;

                    indWriter.Dispose();

                    // Wait for all BG threads to finish else
                    // dir.Dispose() will throw IOException because
                    // there are still open files
                    TestUtil.SyncConcurrentMerges(ms);

                    dir.Dispose();

                    // Try again with more free space:
                    diskFree += TEST_NIGHTLY ? TestUtil.NextInt32(Random, 4000, 8000) : TestUtil.NextInt32(Random, 40000, 80000);
                }
            }

            startDir.Dispose();
            foreach (Directory dir in dirs)
            {
                dir.Dispose();
            }
        }
Пример #44
0
        public void TestInsideBooleanQuery()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "0", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField,
                                                       new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);

            BooleanQuery bq = new BooleanQuery();

            bq.Add(joinQuery, Occur.SHOULD);
            bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD);

            indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper(this));

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Пример #45
0
        public static void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetSimilarity(Similarity));
            //writer.infoStream = System.out;
            for (int i = 0; i < 1000; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("field", English.IntToEnglish(i), Field.Store.YES));
                string txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1);
                doc.Add(NewTextField("field2", txt, Field.Store.YES));
                writer.AddDocument(doc);
            }
            Reader = writer.Reader;
            writer.Dispose();

            Searcher = NewSearcher(Reader);
            Searcher.Similarity = Similarity;
        }
Пример #46
0
 public IEnumerable <Group> MapSearchResults(IEnumerable <ScoreDoc> scoreDocs, IndexSearcher searcher)
 {
     return(scoreDocs.Select(scoreDoc => MapSearchResult(searcher.Doc(scoreDoc.Doc))).ToList());
 }
Пример #47
0
        // Test using various international locales with accented characters (which
        // sort differently depending on locale)
        //
        // Copied (and slightly modified) from
        // Lucene.Net.Search.TestSort.testInternationalSort()
        //
        // TODO: this test is really fragile. there are already 3 different cases,
        // depending upon unicode version.
        public virtual void TestCollationKeySort(Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, string usResult, string frResult, string svResult, string dkResult)
        {
            Directory indexStore = NewDirectory();
            IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            // document data:
            // the tracer field is used to determine which document was hit
            string[][] sortData = new string[][] { new string[] { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, new string[] { "B", "y", "HAT", "HAT", "HAT", "HAT" }, new string[] { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, new string[] { "D", "y", "HUT", "HUT", "HUT", "HUT" }, new string[] { "E", "x", "peach", "peach", "peach", "peach" }, new string[] { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, new string[] { "G", "x", "sin", "sin", "sin", "sin" }, new string[] { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, new string[] { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, new string[] { "J", "y", "HOT", "HOT", "HOT", "HOT" } };

            FieldType customType = new FieldType();
            customType.Stored = true;

            for (int i = 0; i < sortData.Length; ++i)
            {
                Document doc = new Document();
                doc.Add(new Field("tracer", sortData[i][0], customType));
                doc.Add(new TextField("contents", sortData[i][1], Field.Store.NO));
                if (sortData[i][2] != null)
                {
                    doc.Add(new TextField("US", usAnalyzer.TokenStream("US", new StringReader(sortData[i][2]))));
                }
                if (sortData[i][3] != null)
                {
                    doc.Add(new TextField("France", franceAnalyzer.TokenStream("France", new StringReader(sortData[i][3]))));
                }
                if (sortData[i][4] != null)
                {
                    doc.Add(new TextField("Sweden", swedenAnalyzer.TokenStream("Sweden", new StringReader(sortData[i][4]))));
                }
                if (sortData[i][5] != null)
                {
                    doc.Add(new TextField("Denmark", denmarkAnalyzer.TokenStream("Denmark", new StringReader(sortData[i][5]))));
                }
                writer.AddDocument(doc);
            }
            writer.ForceMerge(1);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(indexStore);
            IndexSearcher searcher = new IndexSearcher(reader);

            Sort sort = new Sort();
            Query queryX = new TermQuery(new Term("contents", "x"));
            Query queryY = new TermQuery(new Term("contents", "y"));

            sort.SetSort(new SortField("US", SortField.Type_e.STRING));
            AssertMatches(searcher, queryY, sort, usResult);

            sort.SetSort(new SortField("France", SortField.Type_e.STRING));
            AssertMatches(searcher, queryX, sort, frResult);

            sort.SetSort(new SortField("Sweden", SortField.Type_e.STRING));
            AssertMatches(searcher, queryY, sort, svResult);

            sort.SetSort(new SortField("Denmark", SortField.Type_e.STRING));
            AssertMatches(searcher, queryY, sort, dkResult);
            reader.Dispose();
            indexStore.Dispose();
        }
Пример #48
0
 public override void CreateWeight(IDictionary context, IndexSearcher searcher)
 {
     m_source.CreateWeight(context, searcher);
 }
Пример #49
0
        public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd)
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document doc = new Document();
            doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES));
            doc.Add(new StringField("body", "body", Field.Store.YES));
            writer.AddDocument(doc);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            Query query = new TermQuery(new Term("body", "body"));

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi searcher not
            // supported).
            ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(0, result.Length, "The index Term should not be included.");

            result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(1, result.Length, "The index Term should be included.");

            reader.Dispose();
            dir.Dispose();
        }
Пример #50
0
 public override Weight CreateWeight(IndexSearcher searcher)
 {
     return(new CustomWeight(this, searcher));
 }
Пример #51
0
        /// <summary>
        /// Using a strongly types List of SearchStringModel, you can run a search based off a JSON String
        /// </summary>
        /// <param name="itm">
        /// The itm.
        /// </param>
        /// <param name="currentSearchString">
        /// The current Search String.
        /// </param>
        /// <param name="hitCount">
        /// The hit Count.
        /// </param>
        /// <param name="indexName">
        /// The index Name.
        /// </param>
        /// <param name="sortField">
        /// The sort Field.
        /// </param>
        /// <param name="sortDirection">
        /// The sort Direction.
        /// </param>
        /// <param name="pageSize">
        /// The page Size.
        /// </param>
        /// <param name="pageNumber">
        /// The page Number.
        /// </param>
        /// <param name="parameters">
        /// The parameters.
        /// </param>
        /// <returns>
        /// IEnumreable List of Results that have been typed to a smaller version of the Item Object
        /// </returns>
        public static IEnumerable<SitecoreItem> FullSearch(Item itm, List<SearchStringModel> currentSearchString, out int hitCount, string indexName = "itembuckets_buckets", string sortField = "", string sortDirection = "", int pageSize = 0, int pageNumber = 0, object[] parameters = null)
        {
            var startDate = DateTime.Now;
            var endDate = DateTime.Now.AddDays(1);
            var locationSearch = LocationFilter;
            var refinements = new SafeDictionary<string>();
            var searchStringModels = SearchHelper.GetTags(currentSearchString);

            if (searchStringModels.Count > 0)
            {
                foreach (var ss in searchStringModels)
                {
                    var query = ss.Value;
                    if (query.Contains("tagid="))
                    {
                        query = query.Split('|')[1].Replace("tagid=", string.Empty);
                    }
                    var db = Context.ContentDatabase ?? Context.Database;
                    refinements.Add("_tags", db.GetItem(query).ID.ToString());
                }
            }

            var author = SearchHelper.GetAuthor(currentSearchString);

            var languages = SearchHelper.GetLanguages(currentSearchString);
            if (languages.Length > 0)
            {
                refinements.Add("_language", languages);
            }

            var references = SearchHelper.GetReferences(currentSearchString);

            var custom = SearchHelper.GetCustom(currentSearchString);
            if (custom.Length > 0)
            {
                var customSearch = custom.Split('|');
                if (customSearch.Length > 0)
                {
                    try
                    {
                        refinements.Add(customSearch[0], customSearch[1]);
                    }
                    catch (Exception exc)
                    {
                        Log.Error("Could not parse the custom search query", exc);
                    }
                }
            }

            var search = SearchHelper.GetField(currentSearchString);
            if (search.Length > 0)
            {
                var customSearch = search;
                refinements.Add(customSearch, SearchHelper.GetText(currentSearchString));
            }

            var fileTypes = SearchHelper.GetFileTypes(currentSearchString);
            if (fileTypes.Length > 0)
            {
                refinements.Add("extension", SearchHelper.GetFileTypes(currentSearchString));
            }

            var s = SearchHelper.GetSite(currentSearchString);
            if (s.Length > 0)
            {
                SiteContext siteContext = SiteContextFactory.GetSiteContext(SiteManager.GetSite(s).Name);
                var db = Context.ContentDatabase ?? Context.Database;
                var startItemId = db.GetItem(siteContext.StartPath);
                locationSearch = startItemId.ID.ToString();
            }

            var culture = CultureInfo.CreateSpecificCulture("en-US");
            var startFlag = true;
            var endFlag = true;
            if (SearchHelper.GetStartDate(currentSearchString).Any())
            {
                if (!DateTime.TryParse(SearchHelper.GetStartDate(currentSearchString), culture, DateTimeStyles.None, out startDate))
                {
                    startDate = DateTime.Now;
                }

                startFlag = false;
            }

            if (SearchHelper.GetEndDate(currentSearchString).Any())
            {
                if (!DateTime.TryParse(SearchHelper.GetEndDate(currentSearchString), culture, DateTimeStyles.None, out endDate))
                {
                    endDate = DateTime.Now.AddDays(1);
                }

                endFlag = false;
            }

            using (var searcher = new IndexSearcher(indexName))
            {
                var location = IdHelper.ParseId(SearchHelper.GetLocation(currentSearchString, locationSearch));
                var locationIdFromItem = itm != null ? itm.ID.ToGuid().ToEnumerable() : null;
                var rangeSearch = new DateRangeSearchParam
                {
                    ID = SearchHelper.GetID(currentSearchString).IsEmpty() ? SearchHelper.GetRecent(currentSearchString) : SearchHelper.GetID(currentSearchString),
                    ShowAllVersions = false,
                    FullTextQuery = SearchHelper.GetText(currentSearchString),
                    Refinements = refinements,
                    RelatedIds = references.Any() ? IdHelper.ParseId(references) : null,
                    SortDirection = sortDirection,
                    TemplateIds = SearchHelper.GetTemplates(currentSearchString),
                    LocationIds = !location.Any() ? locationIdFromItem : location,
                    Language = languages,
                    SortByField = sortField,
                    PageNumber = pageNumber,
                    PageSize = pageSize,
                    Author = author == string.Empty ? string.Empty : author,
                };

                if (!startFlag || !endFlag)
                {
                    rangeSearch.Ranges = new List<DateRangeSearchParam.DateRangeField>
                                             {
                                                 new DateRangeSearchParam.DateRangeField(SearchFieldIDs.CreatedDate, startDate, endDate)
                                                     {
                                                         InclusiveStart = true, InclusiveEnd = true
                                                     }
                                             };
                }

                var returnResult = searcher.GetItems(rangeSearch);
                hitCount = returnResult.Key;
                return returnResult.Value;
            }
        }
Пример #52
0
 public override void CreateWeight(IDictionary context, IndexSearcher searcher)
 {
     ifSource.CreateWeight(context, searcher);
     trueSource.CreateWeight(context, searcher);
     falseSource.CreateWeight(context, searcher);
 }
Пример #53
0
        /// <summary>
        /// An extension of Item that allows you to launch a Search from an item
        /// </summary>
        /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns>
        /// <param name="startLocationItem">The start location of the search</param>
        /// <param name="refinements">A collection of refinements to the query</param>
        /// <param name="hitCount">This will output the hitCount of the search</param>
        /// <param name="relatedIds">Pipe delimited string of Id to query by Links to and from items</param>
        /// <param name="indexName">Force query to run on a particular index</param>
        /// <param name="text">The raw text query</param>
        /// <param name="templates">Pipe delimited string of Id of Templates</param>
        /// <param name="location">Override the location of the search with an Id</param>
        /// <param name="language">Query by the two letter ISO country code</param>
        /// <param name="id">Query by ID</param>
        /// <param name="sortField">Sort query by field (must be in index)</param>
        /// <param name="sortDirection">Sort in either "asc" or "desc"</param>
        /// <param name="itemName">Query by item name</param>
        /// <param name="startDate">mm/dd/yyyy format of start date</param>
        /// <param name="endDate">mm/dd/yyyy format of end date</param>
        /// <param name="numberOfItemsToReturn">0-XXXXXX (The bigger this number is the less performant it will be)</param>
        /// <example>BucketManager.Search(Sitecore.Context.Item, text: "Tim", templates: "TemplateGUID")</example>
        /// <example>BucketManager.Search(Sitecore.Context.Item, text: "Tim", relatedIds: "ItemGUID", sortField: "_name")</example>
        public static IEnumerable<SitecoreItem> Search(Item startLocationItem, SafeDictionary<string> refinements, out int hitCount, IEnumerable<Guid> relatedIds = null, string indexName = "itembuckets_buckets", string text = "", IEnumerable<Guid> templates = null, string location = "", string language = "en", string id = "", string sortField = "", string sortDirection = "", string itemName = "", string startDate = "", string endDate = "", int numberOfItemsToReturn = 20)
        {
            using (var searcher = new IndexSearcher(indexName))
            {
                var culture = CultureInfo.CreateSpecificCulture("en-US");
                var startDateOut = DateTime.Now;
                var endDateOut = DateTime.Now.AddDays(1);
                var startFlag = true;
                var endFlag = true;
                if (!DateTime.TryParse(startDate, culture, DateTimeStyles.None, out startDateOut))
                {
                    startDateOut = DateTime.Now;
                    startFlag = false;
                }

                if (!DateTime.TryParse(endDate, culture, DateTimeStyles.None, out endDateOut))
                {
                    endDateOut = DateTime.Now.AddDays(1);
                    endFlag = false;
                }

                if (startLocationItem.IsNull())
                {
                    Log.Warn("You are trying to run an Search on an item that has a start location of null", null);
                    hitCount = 0;
                    return new List<SitecoreItem>();
                }

                var dateSearchParam = new DateRangeSearchParam
                                          {
                                              ItemName = itemName,
                                              FullTextQuery = text,
                                              RelatedIds = relatedIds,
                                              TemplateIds = templates,
                                              LocationIds = startLocationItem.ID.ToGuid().ToEnumerable(),
                                              Language = language,
                                              SortDirection = sortDirection,
                                              Refinements = refinements,
                                              ID = id,
                                              SortByField = sortField,
                                              PageSize = numberOfItemsToReturn
                                          };

                if (startFlag || endFlag)
                {
                    dateSearchParam.Ranges = new List<DateRangeSearchParam.DateRangeField>
                                                 {
                                                     new DateRangeSearchParam.DateRangeField(
                                                         SearchFieldIDs.CreatedDate,
                                                         startDateOut,
                                                         endDateOut)
                                                         {
                                                             InclusiveStart = true, InclusiveEnd = true
                                                         }
                                                 };
                }

                var keyValuePair = searcher.GetItems(dateSearchParam);
                hitCount = keyValuePair.Key;
                return keyValuePair.Value;
            }
        }
Пример #54
0
        public void TestFuzzinessLong()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            addDoc("aaaaaaa", writer);
            addDoc("segment", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            SlowFuzzyQuery query;

            // not similar enough:
            query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);
            // edit distance to "aaaaaaa" = 3, this matches because the string is longer than
            // in testDefaultFuzziness so a bigger difference is allowed:
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // no match, more than half of the characters is wrong:
            query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // "student" and "stellent" are indeed similar to "segment" by default:
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // "student" doesn't match anymore thanks to increased minimum similarity:
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            try
            {
                query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f);
                fail("Expected IllegalArgumentException");
            }
#pragma warning disable 168
            catch (ArgumentException e)
#pragma warning restore 168
            {
                // expecting exception
            }
            try
            {
                query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f);
                fail("Expected IllegalArgumentException");
            }
#pragma warning disable 168
            catch (ArgumentException e)
#pragma warning restore 168
            {
                // expecting exception
            }

            reader.Dispose();
            directory.Dispose();
        }
Пример #55
0
 /// <summary>
 /// An extension of Item that allows you to launch a Search from an item
 /// </summary>
 /// <returns>List of Results of Type IEnumerable List of SitecoreItem (which implements IItem)</returns>
 /// <param name="startLocationItem">The start location of the search</param>
 /// <param name="queryParser">The raw JSON Parse query</param>
 /// <param name="hitCount">This will output the hitCount of the search</param>
 /// <param name="indexName">Force query to run on a particular index</param>
 public static IEnumerable<SitecoreItem> Search(Item startLocationItem, SearchParam queryParser, out int hitCount, string indexName = "itembuckets_buckets")
 {
     using (var searcher = new IndexSearcher(indexName))
     {
         var keyValuePair = searcher.GetItems(queryParser);
         hitCount = keyValuePair.Key;
         return keyValuePair.Value;
     }
 }
Пример #56
0
        public void TestFuzziness()
        {
            //every test with SlowFuzzyQuery.defaultMinSimilarity
            //is exercising the Automaton, not the brute force linear method

            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            addDoc("aaaaa", writer);
            addDoc("aaaab", writer);
            addDoc("aaabb", writer);
            addDoc("aabbb", writer);
            addDoc("abbbb", writer);
            addDoc("bbbbb", writer);
            addDoc("ddddd", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);

            // same with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(2, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);

            // test scoring
            query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals("3 documents should match", 3, hits.Length);
            List <String> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");

            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].score);
                assertEquals(order[i], term);
            }

            // test pq size by supplying maxExpansions=2
            // This query would normally return 3 documents, because 3 terms match (see above):
            query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals("only 2 documents should match", 2, hits.Length);
            order = Arrays.AsList("bbbbb", "abbbb");
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].score);
                assertEquals(order[i], term);
            }

            // not similar enough:
            query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0);   // edit distance to "aaaaa" = 3
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // query identical to a word in the index:
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(2, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);


            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);


            // different field = no match:
            query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }
Пример #57
0
 protected internal override void ReleaseSearcher(IndexSearcher s)
 {
     if (s != FixedSearcher)
     {
         // Final searcher:
         s.IndexReader.Dispose();
     }
 }
Пример #58
0
        protected override void DoSearching(TaskScheduler es, long stopTime)
        {
            bool anyOpenDelFiles = false;

            DirectoryReader r = DirectoryReader.Open(Writer, true);

            while (DateTime.Now.Millisecond < stopTime && !Failed.Get())
            {
                if (Random().NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now reopen r=" + r);
                    }
                    DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                    if (r2 != null)
                    {
                        r.Dispose();
                        r = r2;
                    }
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now close reader=" + r);
                    }
                    r.Dispose();
                    Writer.Commit();
                    ISet <string> openDeletedFiles = ((MockDirectoryWrapper)Dir).OpenDeletedFiles;
                    if (openDeletedFiles.Count > 0)
                    {
                        Console.WriteLine("OBD files: " + openDeletedFiles);
                    }
                    anyOpenDelFiles |= openDeletedFiles.Count > 0;
                    //Assert.AreEqual("open but deleted: " + openDeletedFiles, 0, openDeletedFiles.Size());
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now open");
                    }
                    r = DirectoryReader.Open(Writer, true);
                }
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: got new reader=" + r);
                }
                //System.out.println("numDocs=" + r.NumDocs + "
                //openDelFileCount=" + dir.openDeleteFileCount());

                if (r.NumDocs > 0)
                {
                    FixedSearcher = new IndexSearcher(r, es);
                    SmokeTestSearcher(FixedSearcher);
                    RunSearchThreads(DateTime.Now.Millisecond + 500);
                }
            }
            r.Dispose();

            //System.out.println("numDocs=" + r.NumDocs + " openDelFileCount=" + dir.openDeleteFileCount());
            ISet <string> openDeletedFiles_ = ((MockDirectoryWrapper)Dir).OpenDeletedFiles;

            if (openDeletedFiles_.Count > 0)
            {
                Console.WriteLine("OBD files: " + openDeletedFiles_);
            }
            anyOpenDelFiles |= openDeletedFiles_.Count > 0;

            Assert.IsFalse(anyOpenDelFiles, "saw non-zero open-but-deleted count");
        }
Пример #59
0
 // LUCENE-1404
 private int HitCount(IndexSearcher searcher, string word)
 {
     return searcher.Search(new TermQuery(new Term("text", word)), 10).TotalHits;
 }
Пример #60
0
        public virtual void TestRollingUpdates_Mem()
        {
            Random random             = new Random(Random().Next());
            BaseDirectoryWrapper dir  = NewDirectory();
            LineFileDocs         docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());

            //provider.register(new MemoryCodec());
            // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over
            //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
            //{
            //    Codec.Default =
            //        TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat()));
            //}

            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter   w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int           SIZE       = AtLeast(20);
            int           id         = 0;
            IndexReader   r          = null;
            IndexSearcher s          = null;
            int           numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;

            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc  = docs.NextDoc();
                string             myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).StringValue = myID;

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && Random().Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = Random().NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs());

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            long totalBytes = 0;

            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.SizeInBytes();
            }
            long totalBytes2 = 0;

            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }