예제 #1
0
        public virtual void TestAddSameDocTwice()
        {
            // LUCENE-5367: this was a problem with the previous code, making sure it
            // works with the new code.
            Directory indexDir = NewDirectory(), taxoDir = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig facetsConfig = new FacetsConfig();
            Document doc = new Document();
            doc.Add(new FacetField("a", "b"));
            doc = facetsConfig.Build(taxoWriter, doc);
            // these two addDocument() used to fail
            indexWriter.AddDocument(doc);
            indexWriter.AddDocument(doc);
            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);
            FacetsCollector fc = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), fc);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc);
            FacetResult res = facets.GetTopChildren(10, "a");
            Assert.AreEqual(1, res.LabelValues.Length);
            Assert.AreEqual(2, res.LabelValues[0].Value);
            IOUtils.Close(indexReader, taxoReader);

            IOUtils.Close(indexDir, taxoDir);
        }
        public virtual void TestDefault()
        {
            Directory indexDir = NewDirectory();
            Directory taxoDir = NewDirectory();

            // create and open an index writer
            var iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
            // create and open a taxonomy writer
            var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
            var config = Config;

            seedIndex(tw, iw, config);

            IndexReader ir = iw.Reader;
            tw.Commit();

            // prepare index reader and taxonomy.
            var tr = new DirectoryTaxonomyReader(taxoDir);

            // prepare searcher to search against
            IndexSearcher searcher = NewSearcher(ir);

            FacetsCollector sfc = PerformSearch(tr, ir, searcher);

            // Obtain facets results and hand-test them
            AssertCorrectResults(GetTaxonomyFacetCounts(tr, config, sfc));

            assertOrdinalsExist("$facets", ir);

            IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir);
        }
예제 #3
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            dir     = NewDirectory();
            taxoDir = NewDirectory();
            // preparations - index, taxonomy, content

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            // Cannot mix ints & floats in the same indexed field:
            config = new FacetsConfig();
            config.SetIndexFieldName("int", "$facets.int");
            config.SetMultiValued("int", true);
            config.SetIndexFieldName("float", "$facets.float");
            config.SetMultiValued("float", true);

            var writer = new RandomIndexWriter(Random, dir, Similarity, TimeZone);

            // index documents, 50% have only 'b' and all have 'a'
            for (int i = 0; i < 110; i++)
            {
                Document doc = new Document();
                // every 11th document is added empty, this used to cause the association
                // aggregators to go into an infinite loop
                if (i % 11 != 0)
                {
                    doc.Add(new Int32AssociationFacetField(2, "int", "a"));
                    doc.Add(new SingleAssociationFacetField(0.5f, "float", "a"));
                    if (i % 2 == 0) // 50
                    {
                        doc.Add(new Int32AssociationFacetField(3, "int", "b"));
                        doc.Add(new SingleAssociationFacetField(0.2f, "float", "b"));
                    }
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.GetReader();
            writer.Dispose();
            taxoReader = new DirectoryTaxonomyReader(taxoDir);
        }
예제 #4
0
        public virtual void TestSegmentsWithoutCategoriesOrResults()
        {
            // tests the accumulator when there are segments with no results
            var indexDir = NewDirectory();
            var taxoDir  = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges
            IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

            var          taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config     = new FacetsConfig();

            indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true);    // 2nd segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true);  // 3rd segment ok
            indexTwoDocs(taxoWriter, indexWriter, null, false);   // 4th segment, no content, or categories
            indexTwoDocs(taxoWriter, indexWriter, null, true);    // 5th segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true);  // 6th segment, with content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true);    // 7th segment, with content, no categories
            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader   = DirectoryReader.Open(indexDir);
            var             taxoReader    = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher   indexSearcher = NewSearcher(indexReader);

            // search for "f:a", only segments 1 and 3 should match results
            Query           q   = new TermQuery(new Term("f", "a"));
            FacetsCollector sfc = new FacetsCollector();

            indexSearcher.Search(q, sfc);
            Facets      facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            FacetResult result = facets.GetTopChildren(10, "A");

            Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children");
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(2, (int)labelValue.Value, "wrong weight for child " + labelValue.Label);
            }

            IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir);
        }
        public virtual void TestLabelWithDelimiter()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            RandomIndexWriter writer  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("dim", true);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "test\u001Fone"));
            doc.Add(new FacetField("dim", "test\u001Etwo"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo"));

            FacetResult result = facets.GetTopChildren(10, "dim");

            Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n  test\u001Fone (1)\n  test\u001Etwo (1)\n", result.ToString());
            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
예제 #6
0
        public virtual void TestDifferentFieldsAndText()
        {
            Directory indexDir = NewDirectory();
            Directory taxoDir  = NewDirectory();

            // create and open an index writer
            var iw = new RandomIndexWriter(Random, indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)));
            // create and open a taxonomy writer
            var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = Config;

            config.SetIndexFieldName("Band", "$bands");
            config.SetIndexFieldName("Composer", "$composers");
            seedIndex(tw, iw, config);

            IndexReader ir = iw.GetReader();

            tw.Commit();

            // prepare index reader and taxonomy.
            var tr = new DirectoryTaxonomyReader(taxoDir);

            // prepare searcher to search against
            IndexSearcher searcher = NewSearcher(ir);

            FacetsCollector sfc = PerformSearch(tr, ir, searcher);

            IDictionary <string, Facets> facetsMap = new Dictionary <string, Facets>();

            facetsMap["Band"]     = GetTaxonomyFacetCounts(tr, config, sfc, "$bands");
            facetsMap["Composer"] = GetTaxonomyFacetCounts(tr, config, sfc, "$composers");
            Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc));

            // Obtain facets results and hand-test them
            AssertCorrectResults(facets);
            assertOrdinalsExist("$facets", ir);
            assertOrdinalsExist("$bands", ir);
            assertOrdinalsExist("$composers", ir);

            IOUtils.Dispose(tr, ir, iw, tw, indexDir, taxoDir);
        }
        /// <summary>Build the example index.</summary>
        private void Index()
        {
            using (IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(EXAMPLE_VERSION,
                                                                                             new WhitespaceAnalyzer(EXAMPLE_VERSION))))
                // Writes facet ords to a separate directory from the main index
                using (DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir))
                {
                    Document doc = new Document();
                    doc.Add(new TextField("c", "foo bar", Field.Store.NO));
                    doc.Add(new NumericDocValuesField("popularity", 5L));
                    doc.Add(new FacetField("A", "B"));
                    indexWriter.AddDocument(config.Build(taxoWriter, doc));

                    doc = new Document();
                    doc.Add(new TextField("c", "foo foo bar", Field.Store.NO));
                    doc.Add(new NumericDocValuesField("popularity", 3L));
                    doc.Add(new FacetField("A", "C"));
                    indexWriter.AddDocument(config.Build(taxoWriter, doc));
                }// Disposes indexWriter and taxoWriter
        }
        public virtual void TestSumScoreAggregator()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            IndexWriter             iw         = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            FacetsConfig config = new FacetsConfig();

            for (int i = AtLeast(30); i > 0; --i)
            {
                Document doc = new Document();
                if (Random().NextBoolean()) // don't match all documents
                {
                    doc.Add(new StringField("f", "v", Field.Store.NO));
                }
                doc.Add(new FacetField("dim", "a"));
                iw.AddDocument(config.Build(taxoWriter, doc));
            }

            DirectoryReader         r          = DirectoryReader.Open(iw, true);
            DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector    fc  = new FacetsCollector(true);
            ConstantScoreQuery csq = new ConstantScoreQuery(new MatchAllDocsQuery());

            csq.Boost = 2.0f;

            TopDocs td = FacetsCollector.Search(NewSearcher(r), csq, 10, fc);

            Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, new TaxonomyFacetSumValueSource.ScoreValueSource());

            int expected = (int)(td.MaxScore * td.TotalHits);

            Assert.AreEqual(expected, (int)facets.GetSpecificValue("dim", "a"));

            IOUtils.Close(iw, taxoWriter, taxoReader, taxoDir, r, indexDir);
        }
예제 #9
0
        /// <summary>Build the example index.</summary>
        private void Index()
        {
            using IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(EXAMPLE_VERSION,
                                                                                            new WhitespaceAnalyzer(EXAMPLE_VERSION)));

            // Writes facet ords to a separate directory from the main index
            using DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            indexWriter.AddDocument(config.Build(taxoWriter, new Document
            {
                new FacetField("Author", "Bob"),
                new FacetField("Publish Date", "2010", "10", "15")
            }));

            indexWriter.AddDocument(config.Build(taxoWriter, new Document
            {
                new FacetField("Author", "Lisa"),
                new FacetField("Publish Date", "2010", "10", "20")
            }));

            indexWriter.AddDocument(config.Build(taxoWriter, new Document
            {
                new FacetField("Author", "Lisa"),
                new FacetField("Publish Date", "2012", "1", "1")
            }));

            indexWriter.AddDocument(config.Build(taxoWriter, new Document
            {
                new FacetField("Author", "Susan"),
                new FacetField("Publish Date", "2012", "1", "7")
            }));

            indexWriter.AddDocument(config.Build(taxoWriter, new Document
            {
                new FacetField("Author", "Frank"),
                new FacetField("Publish Date", "1999", "5", "5")
            }));
        }
예제 #10
0
        public virtual void TestDetectHierarchicalField()
        {
            Store.Directory dir        = NewDirectory();
            Store.Directory taxoDir    = NewDirectory();
            var             taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
            var             writer     = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig    config     = new FacetsConfig();

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path", "other"));
            try
            {
                config.Build(taxoWriter, doc);
                Fail("did not hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }
            IOUtils.Close(writer, taxoWriter, dir, taxoDir);
        }
예제 #11
0
        public virtual void TestDetectHierarchicalField()
        {
            Store.Directory dir        = NewDirectory();
            Store.Directory taxoDir    = NewDirectory();
            var             taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
            var             writer     = new RandomIndexWriter(Random, dir);
            FacetsConfig    config     = new FacetsConfig();

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path", "other"));
            try
            {
                config.Build(taxoWriter, doc);
                fail("did not hit expected exception");
            }
            catch (Exception iae) when(iae.IsIllegalArgumentException())
            {
                // expected
            }
            IOUtils.Dispose(writer, taxoWriter, dir, taxoDir);
        }
예제 #12
0
        public virtual void TestMixedTypesInSameIndexField()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            TaxonomyWriter    taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig      config     = new FacetsConfig();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new IntAssociationFacetField(14, "a", "x"));
            doc.Add(new FloatAssociationFacetField(55.0f, "b", "y"));
            try
            {
                writer.AddDocument(config.Build(taxoWriter, doc));
                Fail("did not hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }
            IOUtils.Close(writer, taxoWriter, dir, taxoDir);
        }
예제 #13
0
        public override void BeforeClass() // LUCENENET specific - renamed from BeforeClassCountingFacetsAggregatorTest() to ensure calling order
        {
            base.BeforeClass();

            indexDir = NewDirectory();
            taxoDir  = NewDirectory();

            // create an index which has:
            // 1. Segment with no categories, but matching results
            // 2. Segment w/ categories, but no results
            // 3. Segment w/ categories and results
            // 4. Segment w/ categories, but only some results

            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            conf.MergePolicy = NoMergePolicy.COMPOUND_FILES; // prevent merges, so we can control the index segments
            IndexWriter     indexWriter = new IndexWriter(indexDir, conf);
            ITaxonomyWriter taxoWriter  = new DirectoryTaxonomyWriter(taxoDir);

            allExpectedCounts  = newCounts();
            termExpectedCounts = newCounts();

            // segment w/ no categories
            IndexDocsNoFacets(indexWriter);

            // segment w/ categories, no content
            IndexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);

            // segment w/ categories and content
            IndexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);

            // segment w/ categories and some content
            IndexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);

            IOUtils.Dispose(indexWriter, taxoWriter);
        }
예제 #14
0
        public virtual void TestMixedTypesInSameIndexField()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            ITaxonomyWriter   taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig      config     = new FacetsConfig();
            RandomIndexWriter writer     = new RandomIndexWriter(Random, dir);

            Document doc = new Document();

            doc.Add(new Int32AssociationFacetField(14, "a", "x"));
            doc.Add(new SingleAssociationFacetField(55.0f, "b", "y"));
            try
            {
                writer.AddDocument(config.Build(taxoWriter, doc));
                fail("did not hit expected exception");
            }
            catch (Exception exc) when(exc.IsIllegalArgumentException())
            {
                // expected
            }
            IOUtils.Dispose(writer, taxoWriter, dir, taxoDir);
        }
예제 #15
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            // Uses default $facets field:
            Facets facets;

            if (Random().NextBoolean())
            {
                facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            }
            else
            {
                OrdinalsReader ordsReader = new DocValuesOrdinalsReader();
                if (Random().NextBoolean())
                {
                    ordsReader = new CachedOrdinalsReader(ordsReader);
                }
                facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
            }

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            // Reused across documents, to add the necessary facet
            // fields:
            Document doc = new Document();

            doc.Add(new IntField("num", 10, Field.Store.NO));
            doc.Add(new FacetField("Author", "Bob"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 20, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 30, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 40, Field.Store.NO));
            doc.Add(new FacetField("Author", "Susan"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 45, Field.Store.NO));
            doc.Add(new FacetField("Author", "Frank"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query and one of the
            // Facets.search utility methods:
            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num"));

            // Retrieve & verify results:
            Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n  Lisa (50.0)\n  Frank (45.0)\n  Susan (40.0)\n  Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString());

            taxoReader.Dispose();
            searcher.IndexReader.Dispose();
            dir.Dispose();
            taxoDir.Dispose();
        }
 public virtual void TestWriterLock()
 {
     // native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
     var indexDir = new RAMDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     tw.AddCategory(new FacetLabel("hi", "there"));
     tw.Commit();
     // we deliberately not close the write now, and keep it open and
     // locked.
     // Verify that the writer worked:
     var tr = new DirectoryTaxonomyReader(indexDir);
     Assert.AreEqual(2, tr.GetOrdinal(new FacetLabel("hi", "there")));
     // Try to open a second writer, with the first one locking the directory.
     // We expect to get a LockObtainFailedException.
     try
     {
         Assert.Null(new DirectoryTaxonomyWriter(indexDir));
         Fail("should have failed to write in locked directory");
     }
     catch (LockObtainFailedException)
     {
         // this is what we expect to happen.
     }
     // Remove the lock, and now the open should succeed, and we can
     // write to the new writer.
     DirectoryTaxonomyWriter.Unlock(indexDir);
     var tw2 = new DirectoryTaxonomyWriter(indexDir);
     tw2.AddCategory(new FacetLabel("hey"));
     tw2.Dispose();
     // See that the writer indeed wrote:
     var newtr = TaxonomyReader.OpenIfChanged(tr);
     Assert.NotNull(newtr);
     tr.Dispose();
     tr = newtr;
     Assert.AreEqual(3, tr.GetOrdinal(new FacetLabel("hey")));
     tr.Dispose();
     tw.Dispose();
     indexDir.Dispose();
 }
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n  foo2 (1)\n  foo3 (1)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=2 childCount=2\n  bar1 (1)\n  bar2 (1)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", results[2].ToString());

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        public virtual void TestNrt()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();
            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter w = new IndexWriter(dir, iwc);
            var tw = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
        public virtual void TestSeparateReaderAndWriter()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            tw.Commit();
            var tr = new DirectoryTaxonomyReader(indexDir);

            Assert.AreEqual(1, tr.Count); // the empty taxonomy has size 1 (the root)
            tw.AddCategory(new FacetLabel("Author"));
            Assert.AreEqual(1, tr.Count); // still root only...
            Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet
            Assert.AreEqual(1, tr.Count); // still root only...
            tw.Commit();
            Assert.AreEqual(1, tr.Count); // still root only...
            var newTaxoReader = TaxonomyReader.OpenIfChanged(tr);
            Assert.NotNull(newTaxoReader);
            tr.Dispose();
            tr = newTaxoReader;

            int author = 1;
            try
            {
                Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, tr.ParallelTaxonomyArrays.Parents[author]);
                // ok
            }
            catch (System.IndexOutOfRangeException)
            {
                Fail("After category addition, commit() and refresh(), getParent for " + author + " should NOT throw exception");
            }
            Assert.AreEqual(2, tr.Count); // finally, see there are two categories

            // now, add another category, and verify that after commit and refresh
            // the parent of this category is correct (this requires the reader
            // to correctly update its prefetched parent vector), and that the
            // old information also wasn't ruined:
            tw.AddCategory(new FacetLabel("Author", "Richard Dawkins"));
            int dawkins = 2;
            tw.Commit();
            newTaxoReader = TaxonomyReader.OpenIfChanged(tr);
            Assert.NotNull(newTaxoReader);
            tr.Dispose();
            tr = newTaxoReader;
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            Assert.AreEqual(author, parents[dawkins]);
            Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, parents[author]);
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]);
            Assert.AreEqual(3, tr.Count);
            tw.Dispose();
            tr.Dispose();
            indexDir.Dispose();
        }
 public virtual void TestWriterTwice3()
 {
     var indexDir = NewDirectory();
     // First, create and fill the taxonomy
     var tw = new DirectoryTaxonomyWriter(indexDir);
     FillTaxonomy(tw);
     tw.Dispose();
     // Now, open the same taxonomy and add the same categories again.
     // After a few categories, the LuceneTaxonomyWriter implementation
     // will stop looking for each category on disk, and rather read them
     // all into memory and close it's reader. The bug was that it closed
     // the reader, but forgot that it did (because it didn't set the reader
     // reference to null).
     tw = new DirectoryTaxonomyWriter(indexDir);
     FillTaxonomy(tw);
     // Add one new category, just to make commit() do something:
     tw.AddCategory(new FacetLabel("hi"));
     // Do a commit(). Here was a bug - if tw had a reader open, it should
     // be reopened after the commit. However, in our case the reader should
     // not be open (as explained above) but because it was not set to null,
     // we forgot that, tried to reopen it, and got an AlreadyClosedException.
     tw.Commit();
     Assert.AreEqual(ExpectedCategories.Length + 1, tw.Count);
     tw.Dispose();
     indexDir.Dispose();
 }
 public virtual void TestWriterTwice()
 {
     var indexDir = NewDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     FillTaxonomy(tw);
     // run fillTaxonomy again - this will try to add the same categories
     // again, and check that we see the same ordinal paths again, not
     // different ones. 
     FillTaxonomy(tw);
     // Let's check the number of categories again, to see that no
     // extraneous categories were created:
     Assert.AreEqual(ExpectedCategories.Length, tw.Count);
     tw.Dispose();
     indexDir.Dispose();
 }
 public virtual void TestWriterTwice2()
 {
     var indexDir = NewDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     FillTaxonomy(tw);
     tw.Dispose();
     tw = new DirectoryTaxonomyWriter(indexDir);
     // run fillTaxonomy again - this will try to add the same categories
     // again, and check that we see the same ordinals again, not different
     // ones, and that the number of categories hasn't grown by the new
     // additions
     FillTaxonomy(tw);
     Assert.AreEqual(ExpectedCategories.Length, tw.Count);
     tw.Dispose();
     indexDir.Dispose();
 }
 public virtual void TestWriter()
 {
     var indexDir = NewDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     FillTaxonomy(tw);
     // Also check ITaxonomyWriter.getSize() - see that the taxonomy's size
     // is what we expect it to be.
     Assert.AreEqual(ExpectedCategories.Length, tw.Count);
     tw.Dispose();
     indexDir.Dispose();
 }
        public virtual void TestNrt()
        {
            var dir = NewDirectory();
            var writer = new DirectoryTaxonomyWriter(dir);
            var reader = new DirectoryTaxonomyReader(writer);

            FacetLabel cp = new FacetLabel("a");
            writer.AddCategory(cp);
            var newReader = TaxonomyReader.OpenIfChanged(reader);
            Assert.NotNull(newReader, "expected a new instance");
            Assert.AreEqual(2, newReader.Count);
            Assert.AreNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.GetOrdinal(cp));
            reader.Dispose();
            reader = newReader;

            writer.Dispose();
            reader.Dispose();

            dir.Dispose();
        }
        public virtual void TestWriterCheckPaths2()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            FillTaxonomy(tw);
            CheckPaths(tw);
            FillTaxonomy(tw);
            CheckPaths(tw);
            tw.Dispose();

            tw = new DirectoryTaxonomyWriter(indexDir);
            CheckPaths(tw);
            FillTaxonomy(tw);
            CheckPaths(tw);
            tw.Dispose();
            indexDir.Dispose();
        }
        public virtual void TestReplaceTaxonomyNrt()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var tw = new DirectoryTaxonomyWriter(taxoDir);

            Store.Directory taxoDir2 = NewDirectory();
            var tw2 = new DirectoryTaxonomyWriter(taxoDir2);
            tw2.Dispose();

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);
            w.AddDocument(new Document());
            tw.ReplaceTaxonomy(taxoDir2);
            taxoDir2.Dispose();

            try
            {
                mgr.MaybeRefresh();
                Fail("should have hit exception");
            }
            catch (InvalidOperationException)
            {
                // expected
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
        public virtual void TestWriterSimpler()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            Assert.AreEqual(1, tw.Count); // the root only
            // Test that adding a new top-level category works
            Assert.AreEqual(1, tw.AddCategory(new FacetLabel("a")));
            Assert.AreEqual(2, tw.Count);
            // Test that adding the same category again is noticed, and the
            // same ordinal (and not a new one) is returned.
            Assert.AreEqual(1, tw.AddCategory(new FacetLabel("a")));
            Assert.AreEqual(2, tw.Count);
            // Test that adding another top-level category returns a new ordinal,
            // not the same one
            Assert.AreEqual(2, tw.AddCategory(new FacetLabel("b")));
            Assert.AreEqual(3, tw.Count);
            // Test that adding a category inside one of the above adds just one
            // new ordinal:
            Assert.AreEqual(3, tw.AddCategory(new FacetLabel("a", "c")));
            Assert.AreEqual(4, tw.Count);
            // Test that adding the same second-level category doesn't do anything:
            Assert.AreEqual(3, tw.AddCategory(new FacetLabel("a", "c")));
            Assert.AreEqual(4, tw.Count);
            // Test that adding a second-level category with two new components
            // indeed adds two categories
            Assert.AreEqual(5, tw.AddCategory(new FacetLabel("d", "e")));
            Assert.AreEqual(6, tw.Count);
            // Verify that the parents were added above in the order we expected
            Assert.AreEqual(4, tw.AddCategory(new FacetLabel("d")));
            // Similar, but inside a category that already exists:
            Assert.AreEqual(7, tw.AddCategory(new FacetLabel("b", "d", "e")));
            Assert.AreEqual(8, tw.Count);
            // And now inside two levels of categories that already exist:
            Assert.AreEqual(8, tw.AddCategory(new FacetLabel("b", "d", "f")));
            Assert.AreEqual(9, tw.Count);

            tw.Dispose();
            indexDir.Dispose();
        }
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, indexDir);
        }
 public virtual void TestRootOnly()
 {
     var indexDir = NewDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     // right after opening the index, it should already contain the
     // root, so have size 1:
     Assert.AreEqual(1, tw.Count);
     tw.Dispose();
     var tr = new DirectoryTaxonomyReader(indexDir);
     Assert.AreEqual(1, tr.Count);
     Assert.AreEqual(0, tr.GetPath(0).Length);
     Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.ParallelTaxonomyArrays.Parents[0]);
     Assert.AreEqual(0, tr.GetOrdinal(new FacetLabel()));
     tr.Dispose(true);
     indexDir.Dispose();
 }
        private void AssertConsistentYoungestChild(FacetLabel abPath, int abOrd, int abYoungChildBase1, int abYoungChildBase2, int retry, int numCategories)
        {
            var indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization
            var tw = new DirectoryTaxonomyWriter(indexDir);
            tw.AddCategory(new FacetLabel("a", "0"));
            tw.AddCategory(abPath);
            tw.Commit();

            
            var tr = new DirectoryTaxonomyReader(indexDir);
            for (int i = 0; i < numCategories; i++)
            {
                var cp = new FacetLabel("a", "b", Convert.ToString(i));
                tw.AddCategory(cp);
                Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(cp), "Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed");
            }
            tw.Dispose();

            
            var stop = new AtomicBoolean(false);
            Exception[] error = new Exception[] { null };
            int[] retrieval = new int[] { 0 };

            var thread = new ThreadAnonymousInnerClassHelper(this, abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, tr, stop, error, retrieval);
            thread.Start();

            indexDir.SleepMillis = 1; // some delay for refresh
            var newTaxoReader = TaxonomyReader.OpenIfChanged(tr);
            if (newTaxoReader != null)
            {
                newTaxoReader.Dispose();
            }

            stop.Set(true);
            thread.Join();
            Assert.Null(error[0], "Unexpcted exception at retry " + retry + " retrieval " + retrieval[0] + ": \n" + stackTraceStr(error[0]));

            tr.Dispose();
        }
 public virtual void TestRootOnly2()
 {
     var indexDir = NewDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     tw.Commit();
     var tr = new DirectoryTaxonomyReader(indexDir);
     Assert.AreEqual(1, tr.Count);
     Assert.AreEqual(0, tr.GetPath(0).Length);
     Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.ParallelTaxonomyArrays.Parents[0]);
     Assert.AreEqual(0, tr.GetOrdinal(new FacetLabel()));
     tw.Dispose();
     tr.Dispose(true);
     indexDir.Dispose();
 }
        public virtual void TestSeparateReaderAndWriter2()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            tw.Commit();
            var tr = new DirectoryTaxonomyReader(indexDir);

            // Test getOrdinal():
            FacetLabel author = new FacetLabel("Author");

            Assert.AreEqual(1, tr.Count); // the empty taxonomy has size 1 (the root)
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author));
            tw.AddCategory(author);
            // before commit and refresh, no change:
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author));
            Assert.AreEqual(1, tr.Count); // still root only...
            Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author));
            Assert.AreEqual(1, tr.Count); // still root only...
            tw.Commit();
            // still not enough before refresh:
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author));
            Assert.AreEqual(1, tr.Count); // still root only...
            var newTaxoReader = TaxonomyReader.OpenIfChanged(tr);
            Assert.NotNull(newTaxoReader);
            tr.Dispose();
            tr = newTaxoReader;
            Assert.AreEqual(1, tr.GetOrdinal(author));
            Assert.AreEqual(2, tr.Count);
            tw.Dispose();
            tr.Dispose();
            indexDir.Dispose();
        }
예제 #34
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
예제 #35
0
        public virtual void TestRandom()
        {
            string[]        tokens   = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexDir);
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt32(Random, 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                testDoc.value = Random.NextSingle();
                doc.Add(new SingleDocValuesField("value", testDoc.value));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.GetReader());

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            ValueSource values = new SingleFieldSource("value");

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random.Next(tokens.Length)];
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values);

                // Slow, yet hopefully bug-free, faceting:
                var expectedValues = new List <Dictionary <string, float?> >(numDims);
                for (int i = 0; i < numDims; i++)
                {
                    expectedValues.Add(new Dictionary <string, float?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken, StringComparison.Ordinal))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                if (!expectedValues[j].TryGetValue(doc.dims[j], out float?v) || v == null)
                                {
                                    expectedValues[j][doc.dims[j]] = doc.value;
                                }
                                else
                                {
                                    expectedValues[j][doc.dims[j]] = (float)v + doc.value;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    float totValue = 0;
                    foreach (KeyValuePair <string, float?> ent in expectedValues[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totValue += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totValue > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totValue, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                if (Verbose)
                {
                    Console.WriteLine("expected=\n" + expected.ToString());
                    Console.WriteLine("actual=\n" + actual.ToString());
                }

                AssertFloatValuesEquals(expected, actual);
            }

            IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new IntField("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new IntField("num", 20, Field.Store.NO));
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new IntField("num", 30, Field.Store.NO));
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n  foo3 (30.0)\n  foo2 (20.0)\n  foo1 (10.0)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n  bar2 (30.0)\n  bar1 (20.0)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n  baz1 (30.0)\n", results[2].ToString());

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
예제 #37
0
        public virtual void TestRandom()
        {
            string[]        tokens   = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            RandomIndexWriter w      = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone);
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt(Random(), 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.Reader);

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random().Next(tokens.Length)];
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = GetTaxonomyFacetCounts(tr, config, fc);

                // Slow, yet hopefully bug-free, faceting:
                var expectedCounts = new List <Dictionary <string, int?> >();
                for (int i = 0; i < numDims; i++)
                {
                    expectedCounts.Add(new Dictionary <string, int?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null;
                                if (v == null)
                                {
                                    expectedCounts[j][doc.dims[j]] = 1;
                                }
                                else
                                {
                                    expectedCounts[j][doc.dims[j]] = (int)v + 1;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    int totCount = 0;
                    foreach (KeyValuePair <string, int?> ent in expectedCounts[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totCount += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totCount > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                Assert.AreEqual(expected, actual);
            }

            IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
        public virtual void TestReplaceTaxonomyDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var tw = new DirectoryTaxonomyWriter(taxoDir);
            w.Commit();
            tw.Commit();

            Store.Directory taxoDir2 = NewDirectory();
            var tw2 = new DirectoryTaxonomyWriter(taxoDir2);
            tw2.AddCategory(new FacetLabel("a", "b"));
            tw2.Dispose();

            var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            SearcherAndTaxonomy pair = mgr.Acquire();
            try
            {
                Assert.AreEqual(1, pair.taxonomyReader.Size);
            }
            finally
            {
                mgr.Release(pair);
            }

            w.AddDocument(new Document());
            tw.ReplaceTaxonomy(taxoDir2);
            taxoDir2.Dispose();
            w.Commit();
            tw.Commit();

            mgr.MaybeRefresh();
            pair = mgr.Acquire();
            try
            {
                Assert.AreEqual(3, pair.taxonomyReader.Size);
            }
            finally
            {
                mgr.Release(pair);
            }

            IOUtils.Close(mgr, tw, w, taxoDir, indexDir);
        }
        public static void BeforeClassCountingFacetsAggregatorTest()
        {
            indexDir = NewDirectory();
            taxoDir = NewDirectory();

            // create an index which has:
            // 1. Segment with no categories, but matching results
            // 2. Segment w/ categories, but no results
            // 3. Segment w/ categories and results
            // 4. Segment w/ categories, but only some results

            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            //conf.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges, so we can control the index segments
            IndexWriter indexWriter = new IndexWriter(indexDir, conf);
            TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            allExpectedCounts = newCounts();
            termExpectedCounts = newCounts();

            // segment w/ no categories
            IndexDocsNoFacets(indexWriter);

            // segment w/ categories, no content
            IndexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);

            // segment w/ categories and content
            IndexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);

            // segment w/ categories and some content
            IndexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);

            IOUtils.Close(indexWriter, taxoWriter);
        }
예제 #40
0
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetHierarchical("Publish Date", true);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new FacetField("Author", "Bob"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Susan"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "7"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Frank"));
            doc.Add(new FacetField("Publish Date", "1999", "5", "5"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);

            // Retrieve & verify results:
            Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n  2010 (2)\n  2012 (2)\n  1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString());
            Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", facets.GetTopChildren(10, "Author").ToString());

            // Now user drills down on Publish Date/2010:
            DrillDownQuery q2 = new DrillDownQuery(config);

            q2.Add("Publish Date", "2010");
            c = new FacetsCollector();
            searcher.Search(q2, c);
            facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString());

            Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa"));

            Assert.Null(facets.GetTopChildren(10, "Non exitent dim"));

            // Smoke test PrintTaxonomyStats:
            string result;

            using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
            {
                using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true)
                {
                    AutoFlush = true
                })
                {
                    PrintTaxonomyStats.PrintStats(taxoReader, w, true);
                }
                result = bos.ToString();
            }
            Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1);
            // Make sure at least a few nodes of the tree came out:
            Assert.True(result.IndexOf("  /1999", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("  /2012", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("      /20", StringComparison.Ordinal) != -1);

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        public virtual void TestCustom()
        {
            Directory indexDir = NewDirectory();
            Directory taxoDir = NewDirectory();

            // create and open an index writer
            RandomIndexWriter iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
            // create and open a taxonomy writer
            var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = Config;
            config.SetIndexFieldName("Author", "$author");
            seedIndex(tw, iw, config);

            IndexReader ir = iw.Reader;
            tw.Commit();

            // prepare index reader and taxonomy.
            var tr = new DirectoryTaxonomyReader(taxoDir);

            // prepare searcher to search against
            IndexSearcher searcher = NewSearcher(ir);

            FacetsCollector sfc = PerformSearch(tr, ir, searcher);

            IDictionary<string, Facets> facetsMap = new Dictionary<string, Facets>();
            facetsMap["Author"] = GetTaxonomyFacetCounts(tr, config, sfc, "$author");
            Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc));

            // Obtain facets results and hand-test them
            AssertCorrectResults(facets);

            assertOrdinalsExist("$facets", ir);
            assertOrdinalsExist("$author", ir);

            IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir);
        }
예제 #42
0
        /// <summary>
        /// Add text to the existing index.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="addTextData">The text data to add.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddText(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, Dictionary <FacetField, AddTextData[]> addTextData, FacetsConfig config)
        {
            long totalTextLength           = 0;
            long maxTextLengthBeforeCommit = 30000000L;

            // For each text facet.
            foreach (KeyValuePair <FacetField, AddTextData[]> item in addTextData)
            {
                // If text exists.
                if (item.Value != null && item.Value.Length > 0)
                {
                    // Add the text.
                    FieldType nameFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType completeFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType textFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = false,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // For each text.
                    foreach (AddTextData data in item.Value)
                    {
                        // Should the data be stored.
                        completeFieldType.Stored = data.StoreText;

                        // Create the document.
                        Lucene.Net.Documents.Document document     = new Lucene.Net.Documents.Document();
                        Lucene.Net.Documents.Field    textName     = new Field("textname", data.Name.ToLower(), nameFieldType);
                        Lucene.Net.Documents.Field    textComplete = new Field("textcomplete", data.Text.ToLower(), completeFieldType);

                        document.Add(item.Key);
                        document.Add(textName);
                        document.Add(textComplete);

                        // Split the white spaces from the text.
                        string[] words = data.Text.Words();

                        // If words exist.
                        if (words != null && words.Length > 0)
                        {
                            // Add the query for each word.
                            for (int j = 0; j < words.Length; j++)
                            {
                                // Format the word.
                                string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                // If a word exists.
                                if (!String.IsNullOrEmpty(word))
                                {
                                    Lucene.Net.Documents.Field textData = new Field("facetcontent", word, textFieldType);
                                    document.Add(textData);
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(config.Build(facetWriter, document));

                        // Commit after a set number of documents.
                        totalTextLength += (long)data.Text.Length;
                        if (totalTextLength > maxTextLengthBeforeCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            facetWriter.Commit();
                            totalTextLength = 0;
                        }
                    }
                }
            }

            // Commit the index.
            writer.Commit();
            facetWriter.Commit();
        }
        public void BeforeClassDrillDownQueryTest()
        {
            dir = NewDirectory();
            Random r = Random();
            RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false)));

            taxoDir = NewDirectory();
            ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            config = new FacetsConfig();

            // Randomize the per-dim config:
            config.SetHierarchical("a", Random().NextBoolean());
            config.SetMultiValued("a", Random().NextBoolean());
            if (Random().NextBoolean())
            {
                config.SetIndexFieldName("a", "$a");
            }
            config.SetRequireDimCount("a", true);

            config.SetHierarchical("b", Random().NextBoolean());
            config.SetMultiValued("b", Random().NextBoolean());
            if (Random().NextBoolean())
            {
                config.SetIndexFieldName("b", "$b");
            }
            config.SetRequireDimCount("b", true);

            for (int i = 0; i < 100; i++)
            {
                Document doc = new Document();
                if (i % 2 == 0) // 50
                {
                    doc.Add(new TextField("content", "foo", Field.Store.NO));
                }
                if (i % 3 == 0) // 33
                {
                    doc.Add(new TextField("content", "bar", Field.Store.NO));
                }
                if (i % 4 == 0) // 25
                {
                    if (r.NextBoolean())
                    {
                        doc.Add(new FacetField("a", "1"));
                    }
                    else
                    {
                        doc.Add(new FacetField("a", "2"));
                    }
                }
                if (i % 5 == 0) // 20
                {
                    doc.Add(new FacetField("b", "1"));
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.Reader;
            writer.Dispose();

            taxo = new DirectoryTaxonomyReader(taxoDir);
        }
예제 #44
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        /// <param name="facetField">The facet field information.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter,
                                 DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config)
        {
            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(facetField);
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        OpenDocument(files[i]);
                        string content = Nequeo.Xml.Document.ExtractContent(_xDocument);

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(config.Build(facetWriter, document));
                        _document.Close();

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            facetWriter.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                    finally
                    {
                        CloseDocument();
                    }
                }
            }
        }
예제 #45
0
        public virtual void TestMixedRangeAndNonRangeTaxonomy()
        {
            Directory               d  = NewDirectory();
            RandomIndexWriter       w  = new RandomIndexWriter(Random(), d, Similarity, TimeZone);
            Directory               td = NewDirectory();
            DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            for (long l = 0; l < 100; l++)
            {
                Document doc = new Document();
                // For computing range facet counts:
                doc.Add(new NumericDocValuesField("field", l));
                // For drill down by numeric range:
                doc.Add(new Int64Field("field", l, Field.Store.NO));

                if ((l & 3) == 0)
                {
                    doc.Add(new FacetField("dim", "a"));
                }
                else
                {
                    doc.Add(new FacetField("dim", "b"));
                }
                w.AddDocument(config.Build(tw, doc));
            }

            IndexReader r = w.Reader;

            var tr = new DirectoryTaxonomyReader(tw);

            IndexSearcher s = NewSearcher(r);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: searcher=" + s);
            }

            DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr);

            // First search, no drill downs:
            DrillDownQuery      ddq = new DrillDownQuery(config);
            DrillSidewaysResult dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(100, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            // Second search, drill down on dim=b:
            ddq = new DrillDownQuery(config);
            ddq.Add("dim", "b");
            dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(75, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=16 childCount=5\n  less than 10 (7)\n  less than or equal to 10 (8)\n  over 90 (7)\n  90 or above (8)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            // Third search, drill down on "less than or equal to 10":
            ddq = new DrillDownQuery(config);
            ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true));
            dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(11, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n  b (8)\n  a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());
            IOUtils.Close(tw, tr, td, w, r, d);
        }
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var tw = new DirectoryTaxonomyWriter(taxoDir);
            // first empty commit
            w.Commit();
            tw.Commit();
            var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, indexDir);
        }
        public virtual void TestReaderBasic()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            FillTaxonomy(tw);
            tw.Dispose();
            var tr = new DirectoryTaxonomyReader(indexDir);

            // test TaxonomyReader.getSize():
            Assert.AreEqual(ExpectedCategories.Length, tr.Count);

            // test round trips of ordinal => category => ordinal
            for (int i = 0; i < tr.Count; i++)
            {
                Assert.AreEqual(i, tr.GetOrdinal(tr.GetPath(i)));
            }

            // test TaxonomyReader.getCategory():
            for (int i = 1; i < tr.Count; i++)
            {
                FacetLabel expectedCategory = new FacetLabel(ExpectedCategories[i]);
                FacetLabel category = tr.GetPath(i);
                if (!expectedCategory.Equals(category))
                {
                    Fail("For ordinal " + i + " expected category " + Showcat(expectedCategory) + ", but got " + Showcat(category));
                }
            }
            //  (also test invalid ordinals:)
            Assert.Null(tr.GetPath(-1));
            Assert.Null(tr.GetPath(tr.Count));
            Assert.Null(tr.GetPath(TaxonomyReader.INVALID_ORDINAL));

            // test TaxonomyReader.GetOrdinal():
            for (int i = 1; i < ExpectedCategories.Length; i++)
            {
                int expectedOrdinal = i;
                int ordinal = tr.GetOrdinal(new FacetLabel(ExpectedCategories[i]));
                if (expectedOrdinal != ordinal)
                {
                    Fail("For category " + Showcat(ExpectedCategories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal);
                }
            }
            // (also test invalid categories:)
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(new FacetLabel("non-existant")));
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(new FacetLabel("Author", "Jules Verne")));

            tr.Dispose();
            indexDir.Dispose();
        }
예제 #48
0
        /// <summary>
        /// Initializes a new instance of the <see cref="LuceneIndex" /> class.
        /// </summary>
        /// <param name="indexPath">The path to the directory that will contain the Lucene index files.</param>
        /// <param name="schema">The schema.</param>
        /// <exception cref="System.ArgumentNullException"></exception>
        public LuceneIndex(string indexPath, Schema schema)
        {
            if (String.IsNullOrWhiteSpace(indexPath))
                throw new ArgumentNullException(nameof(indexPath)); 
            if (schema == null)
                throw new ArgumentNullException(nameof(schema));

            IndexPath = indexPath;
            Schema = schema;

            if (System.IO.Directory.Exists(IndexPath))
            {
                if (Schema.IsDefault())
                    throw new InvalidOperationException($"There is an existing index on '{IndexPath}'.");
            }                
            else
            {
                System.IO.Directory.CreateDirectory(IndexPath);
            }                        

            _indexDirectory = new MMapDirectory(Paths.get(IndexPath));

            var taxonomyIndexPath = System.IO.Path.Combine(IndexPath, "taxonomy");
            if (!System.IO.Directory.Exists(taxonomyIndexPath))            
                System.IO.Directory.CreateDirectory(taxonomyIndexPath);

            _taxonomyDirectory = new MMapDirectory(Paths.get(taxonomyIndexPath));         
                           
            _compositeAnalyzer = new CompositeAnalyzer(Schema);            

            _ramBufferSizeMB = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.RAMBufferSizeMB"] ?? "128");

            var config = new IndexWriterConfig(_compositeAnalyzer)                            
                            .SetOpenMode(IndexWriterConfigOpenMode.CREATE_OR_APPEND)
                            .SetRAMBufferSizeMB(_ramBufferSizeMB)
                            .SetCommitOnClose(true);                            
            
            _indexWriter = new IndexWriter(_indexDirectory, config);
            _taxonomyWriter = new DirectoryTaxonomyWriter(_taxonomyDirectory, IndexWriterConfigOpenMode.CREATE_OR_APPEND);

            _searcherTaxonomyManager = new SearcherTaxonomyManager(_indexWriter, true, null, _taxonomyWriter);            
            _facetBuilder = new LuceneFacetBuilder(_taxonomyWriter);                        

            _refreshIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexSearcher.RefreshIntervalSeconds"] ?? "0.5");    
            _commitIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.CommitIntervalSeconds"] ?? "60");
           
            _writeAllowedFlag = new ManualResetEventSlim(true);

            _refreshTimer = new Timer(o => Refresh(), null, TimeSpan.FromSeconds(_refreshIntervalSeconds), TimeSpan.FromSeconds(_refreshIntervalSeconds));
            _commitTimer = new Timer(o => Commit(), null, TimeSpan.FromSeconds(_commitIntervalSeconds), TimeSpan.FromSeconds(_commitIntervalSeconds));

        }
예제 #49
0
        public override void BeforeClass() // LUCENENET specific - renamed from BeforeClassDrillDownQueryTest() to ensure calling order
        {
            base.BeforeClass();

            dir = NewDirectory();
            Random            r      = Random;
            RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false)));

            taxoDir = NewDirectory();
            ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            config = new FacetsConfig();

            // Randomize the per-dim config:
            config.SetHierarchical("a", Random.NextBoolean());
            config.SetMultiValued("a", Random.NextBoolean());
            if (Random.NextBoolean())
            {
                config.SetIndexFieldName("a", "$a");
            }
            config.SetRequireDimCount("a", true);

            config.SetHierarchical("b", Random.NextBoolean());
            config.SetMultiValued("b", Random.NextBoolean());
            if (Random.NextBoolean())
            {
                config.SetIndexFieldName("b", "$b");
            }
            config.SetRequireDimCount("b", true);

            for (int i = 0; i < 100; i++)
            {
                Document doc = new Document();
                if (i % 2 == 0) // 50
                {
                    doc.Add(new TextField("content", "foo", Field.Store.NO));
                }
                if (i % 3 == 0) // 33
                {
                    doc.Add(new TextField("content", "bar", Field.Store.NO));
                }
                if (i % 4 == 0) // 25
                {
                    if (r.NextBoolean())
                    {
                        doc.Add(new FacetField("a", "1"));
                    }
                    else
                    {
                        doc.Add(new FacetField("a", "2"));
                    }
                }
                if (i % 5 == 0) // 20
                {
                    doc.Add(new FacetField("b", "1"));
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.GetReader();
            writer.Dispose();

            taxo = new DirectoryTaxonomyReader(taxoDir);
        }
        public virtual void TestReaderParent()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            FillTaxonomy(tw);
            tw.Dispose();
            var tr = new DirectoryTaxonomyReader(indexDir);

            // check that the parent of the root ordinal is the invalid ordinal:
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, parents[0]);

            // check parent of non-root ordinals:
            for (int ordinal = 1; ordinal < tr.Count; ordinal++)
            {
                FacetLabel me = tr.GetPath(ordinal);
                int parentOrdinal = parents[ordinal];
                FacetLabel parent = tr.GetPath(parentOrdinal);
                if (parent == null)
                {
                    Fail("Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category.");
                }
                // verify that the parent is indeed my parent, according to the strings
                if (!me.Subpath(me.Length - 1).Equals(parent))
                {
                    Fail("Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + Showcat(parent) + " and " + Showcat(me) + " respectively.");
                }
            }

            tr.Dispose();
            indexDir.Dispose();
        }
예제 #51
0
        /// <summary>
        /// Add documents to the existing index.
        /// </summary>
        /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param>
        /// <param name="directoryFacetInfo">The directory infomation where the facet files are to be placed.</param>
        /// <param name="facetData">The complete facet information used to build the index information.</param>
        public void AddMultiFacetDocuments(DirectoryInfo directoryIndexInfo, DirectoryInfo directoryFacetInfo, FacetData facetData)
        {
            Lucene.Net.Index.IndexWriter writer      = null;
            DirectoryTaxonomyWriter      facetWriter = null;

            Lucene.Net.Store.Directory directory      = null;
            Lucene.Net.Store.Directory directoryFacet = null;

            try
            {
                if (facetData != null)
                {
                    // Create the analyzer.
                    SimpleAnalyzer   simpleAnalyzer   = new Analyzer.SimpleAnalyzer();
                    StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer);

                    // Create the index writer.
                    directory = FSDirectory.Open(directoryIndexInfo);
                    IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer);
                    indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND);

                    // Open existing or create new.
                    writer = new IndexWriter(directory, indexConfig);

                    // Create the facet writer.
                    directoryFacet = FSDirectory.Open(directoryFacetInfo);
                    facetWriter    = new DirectoryTaxonomyWriter(directoryFacet, IndexWriterConfig.OpenMode_e.APPEND);

                    // Create the facet filter.
                    FacetFilter filter = new FacetFilter();
                    filter.AddDocuments(writer, facetWriter, facetData);

                    // Commit the index.
                    writer.Commit();
                    facetWriter.Commit();
                }
            }
            catch (Exception)
            {
                throw;
            }
            finally
            {
                if (writer != null)
                {
                    writer.Dispose();
                }

                if (facetWriter != null)
                {
                    facetWriter.Dispose();
                }

                if (directory != null)
                {
                    directory.Dispose();
                }

                if (directoryFacet != null)
                {
                    directoryFacet.Dispose();
                }
            }
        }
        public virtual void TestWriterParent2()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            FillTaxonomy(tw);
            tw.Commit();
            var tr = new DirectoryTaxonomyReader(indexDir);

            CheckWriterParent(tr, tw);

            tw.Dispose();
            tr.Dispose();
            indexDir.Dispose();
        }
예제 #53
0
        /// <summary>
        /// Add text to the existing index.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="addFileData">The file data to add.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddFile(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, Dictionary <FacetField, FileFacetModel> addFileData, FacetsConfig config)
        {
            Nequeo.IO.Directory directory = new Nequeo.IO.Directory();

            // For each file facet.
            foreach (KeyValuePair <FacetField, FileFacetModel> item in addFileData)
            {
                // Select the document format filter.
                // If html has been selected.
                if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Html))
                {
                    // Create the html filter.
                    HtmlFilter htmlFilter = new HtmlFilter();
                    string[]   files      = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Html));
                    htmlFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config);
                }

                // If pdf has been selected.
                if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Pdf))
                {
                    // Create the pdf filter.
                    PdfFilter pdfFilter = new PdfFilter();
                    string[]  files     = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Pdf));
                    pdfFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config);
                }

                // If rtf has been selected.
                if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Rtf))
                {
                    // Create the rtf filter.
                    RtfFilter rtfFilter = new RtfFilter();
                    string[]  files     = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Rtf));
                    rtfFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config);
                }

                // If txt has been selected.
                if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Txt))
                {
                    // Create the txt filter.
                    TxtFilter txtFilter = new TxtFilter();
                    string[]  files     = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Txt));
                    txtFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config);
                }

                // If xml has been selected.
                if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Xml))
                {
                    // Create the xml filter.
                    XmlFilter xmlFilter = new XmlFilter();
                    string[]  files     = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Xml));
                    xmlFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config);
                }

                // If docx has been selected.
                if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Docx))
                {
                    // Create the docx filter.
                    MSDocFilter docxFilter = new MSDocFilter();
                    string[]    files      = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Docx));
                    docxFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config);
                }
            }
        }
 public virtual void TestChildrenArrays()
 {
     var indexDir = NewDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     FillTaxonomy(tw);
     tw.Dispose();
     var tr = new DirectoryTaxonomyReader(indexDir);
     ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays;
     int[] youngestChildArray = ca.Children;
     Assert.AreEqual(tr.Count, youngestChildArray.Length);
     int[] olderSiblingArray = ca.Siblings;
     Assert.AreEqual(tr.Count, olderSiblingArray.Length);
     for (int i = 0; i < ExpectedCategories.Length; i++)
     {
         // find expected children by looking at all expectedCategories
         // for children
         List<int?> expectedChildren = new List<int?>();
         for (int j = ExpectedCategories.Length - 1; j >= 0; j--)
         {
             if (ExpectedCategories[j].Length != ExpectedCategories[i].Length + 1)
             {
                 continue; // not longer by 1, so can't be a child
             }
             bool ischild = true;
             for (int k = 0; k < ExpectedCategories[i].Length; k++)
             {
                 if (!ExpectedCategories[j][k].Equals(ExpectedCategories[i][k]))
                 {
                     ischild = false;
                     break;
                 }
             }
             if (ischild)
             {
                 expectedChildren.Add(j);
             }
         }
         // check that children and expectedChildren are the same, with the
         // correct reverse (youngest to oldest) order:
         if (expectedChildren.Count == 0)
         {
             Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
         }
         else
         {
             int child = youngestChildArray[i];
             Assert.AreEqual((int)expectedChildren[0], child);
             for (int j = 1; j < expectedChildren.Count; j++)
             {
                 child = olderSiblingArray[child];
                 Assert.AreEqual((int)expectedChildren[j], child);
                 // if child is INVALID_ORDINAL we should stop, but
                 // AssertEquals would fail in this case anyway.
             }
             // When we're done comparing, olderSiblingArray should now point
             // to INVALID_ORDINAL, saying there are no more children. If it
             // doesn't, we found too many children...
             Assert.AreEqual(-1, olderSiblingArray[child]);
         }
     }
     tr.Dispose();
     indexDir.Dispose();
 }
        public virtual void TestRandomSampling()
        {
            Directory dir     = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter       writer     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10, CultureInfo.InvariantCulture)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random;

            // NRT open
            IndexSearcher searcher   = NewSearcher(writer.GetReader());
            var           taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            IOUtils.Dispose(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.IsNotNull(collectRandomZeroResults.GetMatchingDocs());

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs())
            {
                Assert.AreEqual(0, doc.TotalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextInt64());      // no sampling
            RandomSamplingFacetsCollector random10Percent  = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts  = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts     = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result  = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult     = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.IsTrue(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.IsTrue(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.Value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.Value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.IsTrue(sigma < 200);
            Assert.IsTrue(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestChildrenArraysInvariants()
        {
            var indexDir = NewDirectory();
            var tw = new DirectoryTaxonomyWriter(indexDir);
            FillTaxonomy(tw);
            tw.Dispose();
            var tr = new DirectoryTaxonomyReader(indexDir);
            ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays;
            int[] children = ca.Children;
            Assert.AreEqual(tr.Count, children.Length);
            int[] olderSiblingArray = ca.Siblings;
            Assert.AreEqual(tr.Count, olderSiblingArray.Length);

            // test that the "youngest child" of every category is indeed a child:
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            for (int i = 0; i < tr.Count; i++)
            {
                int youngestChild = children[i];
                if (youngestChild != TaxonomyReader.INVALID_ORDINAL)
                {
                    Assert.AreEqual(i, parents[youngestChild]);
                }
            }

            // test that the "older sibling" of every category is indeed older (lower)
            // (it can also be INVALID_ORDINAL, which is lower than any ordinal)
            for (int i = 0; i < tr.Count; i++)
            {
                Assert.True(olderSiblingArray[i] < i, "olderSiblingArray[" + i + "] should be <" + i);
            }

            // test that the "older sibling" of every category is indeed a sibling
            // (they share the same parent)
            for (int i = 0; i < tr.Count; i++)
            {
                int sibling = olderSiblingArray[i];
                if (sibling == TaxonomyReader.INVALID_ORDINAL)
                {
                    continue;
                }
                Assert.AreEqual(parents[i], parents[sibling]);
            }

            // And now for slightly more complex (and less "invariant-like"...)
            // tests:

            // test that the "youngest child" is indeed the youngest (so we don't
            // miss the first children in the chain)
            for (int i = 0; i < tr.Count; i++)
            {
                // Find the really youngest child:
                int j;
                for (j = tr.Count - 1; j > i; j--)
                {
                    if (parents[j] == i)
                    {
                        break; // found youngest child
                    }
                }
                if (j == i) // no child found
                {
                    j = TaxonomyReader.INVALID_ORDINAL;
                }
                Assert.AreEqual(j, children[i]);
            }

            // test that the "older sibling" is indeed the least oldest one - and
            // not a too old one or -1 (so we didn't miss some children in the
            // middle or the end of the chain).
            for (int i = 0; i < tr.Count; i++)
            {
                // Find the youngest older sibling:
                int j;
                for (j = i - 1; j >= 0; j--)
                {
                    if (parents[j] == parents[i])
                    {
                        break; // found youngest older sibling
                    }
                }
                if (j < 0) // no sibling found
                {
                    j = TaxonomyReader.INVALID_ORDINAL;
                }
                Assert.AreEqual(j, olderSiblingArray[i]);
            }

            tr.Dispose();
            indexDir.Dispose();
        }
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
 public virtual void TestChildrenArraysGrowth()
 {
     var indexDir = NewDirectory();
     var tw = new DirectoryTaxonomyWriter(indexDir);
     tw.AddCategory(new FacetLabel("hi", "there"));
     tw.Commit();
     var tr = new DirectoryTaxonomyReader(indexDir);
     ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays;
     Assert.AreEqual(3, tr.Count);
     Assert.AreEqual(3, ca.Siblings.Length);
     Assert.AreEqual(3, ca.Children.Length);
     Assert.True(Arrays.Equals(new int[] { 1, 2, -1 }, ca.Children));
     Assert.True(Arrays.Equals(new int[] { -1, -1, -1 }, ca.Siblings));
     tw.AddCategory(new FacetLabel("hi", "ho"));
     tw.AddCategory(new FacetLabel("hello"));
     tw.Commit();
     // Before refresh, nothing changed..
     ParallelTaxonomyArrays newca = tr.ParallelTaxonomyArrays;
     Assert.AreSame(newca, ca); // we got exactly the same object
     Assert.AreEqual(3, tr.Count);
     Assert.AreEqual(3, ca.Siblings.Length);
     Assert.AreEqual(3, ca.Children.Length);
     // After the refresh, things change:
     var newtr = TaxonomyReader.OpenIfChanged(tr);
     Assert.NotNull(newtr);
     tr.Dispose();
     tr = newtr;
     ca = tr.ParallelTaxonomyArrays;
     Assert.AreEqual(5, tr.Count);
     Assert.AreEqual(5, ca.Siblings.Length);
     Assert.AreEqual(5, ca.Children.Length);
     Assert.True(Arrays.Equals(new int[] { 4, 3, -1, -1, -1 }, ca.Children));
     Assert.True(Arrays.Equals(new int[] { -1, -1, -1, 2, 1 }, ca.Siblings));
     tw.Dispose();
     tr.Dispose();
     indexDir.Dispose();
 }
예제 #59
0
        /// <summary>
        /// Creates near-real-time searcher and taxonomy reader
        ///  from the corresponding writers.
        /// </summary>
        public SearcherTaxonomyManager(IndexWriter writer, bool applyAllDeletes, SearcherFactory searcherFactory, DirectoryTaxonomyWriter taxoWriter)
        {
            if (searcherFactory == null)
            {
                searcherFactory = new SearcherFactory();
            }
            this.searcherFactory = searcherFactory;
            this.taxoWriter      = taxoWriter;
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            Current        = new SearcherAndTaxonomy(SearcherManager.GetSearcher(searcherFactory, DirectoryReader.Open(writer, applyAllDeletes)), taxoReader);
            this.taxoEpoch = taxoWriter.TaxonomyEpoch;
        }
        public virtual void TestTaxonomyReaderRefreshRaces()
        {
            // compute base child arrays - after first chunk, and after the other
            var indexDirBase = NewDirectory();
            var twBase = new DirectoryTaxonomyWriter(indexDirBase);
            twBase.AddCategory(new FacetLabel("a", "0"));
            FacetLabel abPath = new FacetLabel("a", "b");
            twBase.AddCategory(abPath);
            twBase.Commit();
            var trBase = new DirectoryTaxonomyReader(indexDirBase);

            ParallelTaxonomyArrays ca1 = trBase.ParallelTaxonomyArrays;

            int abOrd = trBase.GetOrdinal(abPath);
            int abYoungChildBase1 = ca1.Children[abOrd];

            int numCategories = AtLeast(800);
            for (int i = 0; i < numCategories; i++)
            {
                twBase.AddCategory(new FacetLabel("a", "b", Convert.ToString(i)));
            }
            twBase.Dispose();

            var newTaxoReader = TaxonomyReader.OpenIfChanged(trBase);
            Assert.NotNull(newTaxoReader);
            trBase.Dispose();
            trBase = newTaxoReader;

            ParallelTaxonomyArrays ca2 = trBase.ParallelTaxonomyArrays;
            int abYoungChildBase2 = ca2.Children[abOrd];

            int numRetries = AtLeast(50);
            for (int retry = 0; retry < numRetries; retry++)
            {
                AssertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories);
            }

            trBase.Dispose();
            indexDirBase.Dispose();
        }