public void TestWithValueSource()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, 10);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
        /*
         * LUCENE-3528 - NRTManager hangs in certain situations
         */
        public virtual void TestThreadStarvationNoDeleteNRTReader()
        {
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            conf.SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES);
            Directory      d      = NewDirectory();
            CountDownLatch latch  = new CountDownLatch(1);
            CountDownLatch signal = new CountDownLatch(1);

            LatchedIndexWriter  _writer = new LatchedIndexWriter(d, conf, latch, signal);
            TrackingIndexWriter writer  = new TrackingIndexWriter(_writer);
            SearcherManager     manager = new SearcherManager(_writer, false, null);
            Document            doc     = new Document();

            doc.Add(NewTextField("test", "test", Field.Store.YES));
            writer.AddDocument(doc);
            manager.MaybeRefresh();
            ThreadClass t = new ThreadAnonymousInnerClassHelper(this, latch, signal, writer, manager);

            t.Start();
            _writer.WaitAfterUpdate = true;                                    // wait in addDocument to let some reopens go through
            long lastGen = writer.UpdateDocument(new Term("foo", "bar"), doc); // once this returns the doc is already reflected in the last reopen

            Assert.IsFalse(manager.SearcherCurrent);                           // false since there is a delete in the queue

            IndexSearcher searcher = manager.Acquire();

            try
            {
                Assert.AreEqual(2, searcher.IndexReader.NumDocs());
            }
            finally
            {
                manager.Release(searcher);
            }
            ControlledRealTimeReopenThread <IndexSearcher> thread = new ControlledRealTimeReopenThread <IndexSearcher>(writer, manager, 0.01, 0.01);

            thread.Start(); // start reopening
            if (VERBOSE)
            {
                Console.WriteLine("waiting now for generation " + lastGen);
            }

            AtomicBoolean finished = new AtomicBoolean(false);
            ThreadClass   waiter   = new ThreadAnonymousInnerClassHelper2(this, lastGen, thread, finished);

            waiter.Start();
            manager.MaybeRefresh();
            waiter.Join(1000);
            if (!finished.Get())
            {
                waiter.Interrupt();
                Assert.Fail("thread deadlocked on waitForGeneration");
            }
            thread.Dispose();
            thread.Join();
            IOUtils.Close(manager, _writer, d);
        }
Пример #3
0
        public virtual void TestLongFieldCache()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            cfg.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw    = new RandomIndexWriter(Random, dir, cfg);
            Document          doc   = new Document();
            Int64Field        field = new Int64Field("f", 0L, Field.Store.YES);

            doc.Add(field);
            long[] values = new long[TestUtil.NextInt32(Random, 1, 10)];
            for (int i = 0; i < values.Length; ++i)
            {
                long v;
                switch (Random.Next(10))
                {
                case 0:
                    v = long.MinValue;
                    break;

                case 1:
                    v = 0;
                    break;

                case 2:
                    v = long.MaxValue;
                    break;

                default:
                    v = TestUtil.NextInt64(Random, -10, 10);
                    break;
                }
                values[i] = v;
                if (v == 0 && Random.NextBoolean())
                {
                    // missing
                    iw.AddDocument(new Document());
                }
                else
                {
                    field.SetInt64Value(v);
                    iw.AddDocument(doc);
                }
            }
            iw.ForceMerge(1);
            DirectoryReader reader = iw.GetReader();
            Int64s          longs  = FieldCache.DEFAULT.GetInt64s(GetOnlySegmentReader(reader), "f", false);

            for (int i = 0; i < values.Length; ++i)
            {
                Assert.AreEqual(values[i], longs.Get(i));
            }
            reader.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #4
0
        public override void SetUp()
        {
            base.SetUp();

            dir = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwConfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw       = new RandomIndexWriter(Random, dir, iwConfig);
            Document          document = new Document();
            Field             idField  = new StringField("id", "", Field.Store.NO);

            document.Add(idField);
            Field byteField = new StringField("byte", "", Field.Store.NO);

            document.Add(byteField);
            Field doubleField = new StringField("double", "", Field.Store.NO);

            document.Add(doubleField);
            Field floatField = new StringField("float", "", Field.Store.NO);

            document.Add(floatField);
            Field intField = new StringField("int", "", Field.Store.NO);

            document.Add(intField);
            Field longField = new StringField("long", "", Field.Store.NO);

            document.Add(longField);
            Field shortField = new StringField("short", "", Field.Store.NO);

            document.Add(shortField);
            Field stringField = new StringField("string", "", Field.Store.NO);

            document.Add(stringField);
            Field textField = new TextField("text", "", Field.Store.NO);

            document.Add(textField);

            foreach (string[] doc in documents)
            {
                idField.SetStringValue(doc[0]);
                byteField.SetStringValue(doc[1]);
                doubleField.SetStringValue(doc[2]);
                floatField.SetStringValue(doc[3]);
                intField.SetStringValue(doc[4]);
                longField.SetStringValue(doc[5]);
                shortField.SetStringValue(doc[6]);
                stringField.SetStringValue(doc[7]);
                textField.SetStringValue(doc[8]);
                iw.AddDocument(document);
            }

            reader   = iw.GetReader();
            searcher = NewSearcher(reader);
            iw.Dispose();
        }
        private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf)
        {
            LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
            logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain
            // files
            conf.SetMergePolicy(logByteSizeMergePolicy);

            IndexWriter writer = new IndexWriter(dir, conf);
            return writer;
        }
        public void TestWhichMTQMatched()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            IndexSearcher          searcher    = NewSearcher(ir);
            ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            BooleanQuery           query       = new BooleanQuery();

            query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("body", "one")), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("body", "se*")), Occur.SHOULD);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(1, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(1, snippets.Length);

            // Default formatter just bolds each hit:
            assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);

            // Now use our own formatter, that also stuffs the
            // matching term's text into the result:
            highlighter = new PostingsHighlighterAnalyzerAndFormatterHelper(analyzer, new PassageFormatterHelper());

            assertEquals(1, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(1, snippets.Length);

            // Default formatter bolds each hit:
            assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]);

            ir.Dispose();
            dir.Dispose();
        }
        public void TestWildcardInBoolean()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.");
            iw.AddDocument(doc);
            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            IndexSearcher          searcher    = NewSearcher(ir);
            ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            BooleanQuery           query       = new BooleanQuery();

            query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(2, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // must not
            query = new BooleanQuery();
            query.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("bogus", "te*")), Occur.MUST_NOT);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            ir.Dispose();
            dir.Dispose();
        }
Пример #8
0
        private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf)
        {
            LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();

            logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain
            // files
            conf.SetMergePolicy(logByteSizeMergePolicy);

            IndexWriter writer = new IndexWriter(dir, conf);

            return(writer);
        }
Пример #9
0
        public void TestWithContexts()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), true, true);
            IDictionary <string, Document> docs = res.Value;
            List <string> invalidDocTerms       = res.Key;

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
                ISet <BytesRef>        oriCtxs    = new HashSet <BytesRef>();
                IEnumerable <BytesRef> contextSet = inputIterator.Contexts;
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXT_FIELD_NAME))
                {
                    oriCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(oriCtxs.size(), contextSet.Count());
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
Пример #10
0
        public void TestWithContext()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]    toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary      dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
            IInputEnumerator inputIterator = dictionary.GetEntryEnumerator();

            while (inputIterator.MoveNext())
            {
                string   field = inputIterator.Current.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(inputIterator.Current.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));

                // LUCENENET NOTE: This test was once failing because we used SCG.HashSet<T> whose
                // Equals() implementation does not check for set equality. As a result SortedInputEnumerator
                // had been modified to reverse the results to get the test to pass. However, using JCG.HashSet<T>
                // ensures that set equality (that is equality that doesn't care about order of items) is respected.
                // SortedInputEnumerator has also had the specific sorting removed.
                ISet <BytesRef> originalCtxs = new JCG.HashSet <BytesRef>();
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXTS_FIELD_NAME))
                {
                    originalCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(originalCtxs, inputIterator.Contexts);
            }
            assertTrue(docs.Count == 0);
            ir.Dispose();
            dir.Dispose();
        }
        public void TestSpanNot()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.");
            iw.AddDocument(doc);
            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.Reader;

            iw.Dispose();

            IndexSearcher       searcher    = NewSearcher(ir);
            PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            //PostingsHighlighter highlighter = new PostingsHighlighter() {
            //      @Override
            //      protected Analyzer getIndexAnalyzer(String field)
            //{
            //    return analyzer;
            //}
            //    };
            SpanQuery include = new SpanMultiTermQueryWrapper <WildcardQuery>(new WildcardQuery(new Term("body", "te*")));
            SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus"));
            Query     query   = new SpanNotQuery(include, exclude);
            TopDocs   topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(2, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            ir.Dispose();
            dir.Dispose();
        }
Пример #12
0
        public virtual void TestDeletePartiallyWrittenFilesIfAbort()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            iwConf.SetCodec(CompressingCodec.RandomInstance(Random()));
            // disable CFS because this test checks file names
            iwConf.SetMergePolicy(NewLogMergePolicy(false));
            iwConf.SetUseCompoundFile(false);
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            Document validDoc = new Document();

            validDoc.Add(new IntField("id", 0, Field.Store.YES));
            iw.AddDocument(validDoc);
            iw.Commit();

            // make sure that #writeField will fail to trigger an abort
            Document  invalidDoc = new Document();
            FieldType fieldType  = new FieldType();

            fieldType.Stored = true;
            invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType));

            Assert.Throws <ArgumentException>(() =>
            {
                try
                {
                    iw.AddDocument(invalidDoc);
                    iw.Commit();
                }
                finally
                {
                    int counter = 0;
                    foreach (string fileName in dir.ListAll())
                    {
                        if (fileName.EndsWith(".fdt") || fileName.EndsWith(".fdx"))
                        {
                            counter++;
                        }
                    }
                    // Only one .fdt and one .fdx files must have been found
                    Assert.AreEqual(2, counter);
                    iw.Dispose();
                    dir.Dispose();
                }
            });
        }
Пример #13
0
        /// <summary>
        /// Override this to customize index settings, e.g. which
        /// codec to use.
        /// </summary>
        protected internal virtual IndexWriterConfig GetIndexWriterConfig(LuceneVersion matchVersion,
                                                                          Analyzer indexAnalyzer, IndexWriterConfig.OpenMode_e openMode)
        {
            IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);

            iwc.SetCodec(new Lucene46Codec());
            iwc.SetOpenMode(openMode);

            // This way all merged segments will be sorted at
            // merge time, allow for per-segment early termination
            // when those segments are searched:
            iwc.SetMergePolicy(new SortingMergePolicy(iwc.MergePolicy, SORT));

            return(iwc);
        }
        public void TestCustomK1()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This has only foo foo. " +
                                "On the other hand this sentence contains both foo and bar. " +
                                "This has only bar bar bar bar bar bar bar bar bar bar bar bar.");
            iw.AddDocument(doc);

            IndexReader ir = iw.Reader;

            iw.Dispose();

            IndexSearcher       searcher    = NewSearcher(ir);
            PostingsHighlighter highlighter = new CustomK1PostingsHighlighter();
            //PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
            //        @Override
            //        protected PassageScorer getScorer(String field)
            //{
            //    return new PassageScorer(0, 0.75f, 87);
            //}
            //      };
            BooleanQuery query = new BooleanQuery();

            query.Add(new TermQuery(new Term("body", "foo")), Occur.SHOULD);
            query.Add(new TermQuery(new Term("body", "bar")), Occur.SHOULD);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(1, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 1);
            assertEquals(1, snippets.Length);
            assertTrue(snippets[0].StartsWith("On the other hand", StringComparison.Ordinal));

            ir.Dispose();
            dir.Dispose();
        }
Пример #15
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            // NOTE: turn off compound file, this test will open some index files directly.
            OldFormatImpersonationIsActive = true;
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetUseCompoundFile(false);

            TermIndexInterval   = config.TermIndexInterval;
            IndexDivisor        = TestUtil.NextInt32(Random, 1, 10);
            NUMBER_OF_DOCUMENTS = AtLeast(100);
            NUMBER_OF_FIELDS    = AtLeast(Math.Max(10, 3 * TermIndexInterval * IndexDivisor / NUMBER_OF_DOCUMENTS));

            Directory = NewDirectory();

            config.SetCodec(new PreFlexRWCodec());
            LogMergePolicy mp = NewLogMergePolicy();

            // NOTE: turn off compound file, this test will open some index files directly.
            mp.NoCFSRatio = 0.0;
            config.SetMergePolicy(mp);

            Populate(Directory, config);

            DirectoryReader r0      = IndexReader.Open(Directory);
            SegmentReader   r       = LuceneTestCase.GetOnlySegmentReader(r0);
            string          segment = r.SegmentName;

            r.Dispose();

            FieldInfosReader infosReader     = (new PreFlexRWCodec()).FieldInfosFormat.FieldInfosReader;
            FieldInfos       fieldInfos      = infosReader.Read(Directory, segment, "", IOContext.READ_ONCE);
            string           segmentFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION);
            long             tiiFileLength   = Directory.FileLength(segmentFileName);
            IndexInput       input           = Directory.OpenInput(segmentFileName, NewIOContext(Random));

            TermEnum = new SegmentTermEnum(Directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), NewIOContext(Random)), fieldInfos, false);
            int totalIndexInterval = TermEnum.indexInterval * IndexDivisor;

            SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);

            Index = new TermInfosReaderIndex(indexEnum, IndexDivisor, tiiFileLength, totalIndexInterval);
            indexEnum.Dispose();
            input.Dispose();

            Reader      = IndexReader.Open(Directory);
            SampleTerms = Sample(Random, Reader, 1000);
        }
Пример #16
0
        public void TestWithoutPayload()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <string> invalidDocTerms       = res.Key;

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? weightField.GetInt64ValueOrDefault() : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }


            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
Пример #17
0
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwConfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw       = new RandomIndexWriter(Random, dir, iwConfig);
            Document          document = new Document();
            Field             idField  = new StringField("id", "", Field.Store.NO);

            document.Add(idField);
            iw.AddDocument(document);
            ir  = iw.GetReader();
            @is = NewSearcher(ir);
            iw.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();
            Analyzer = new MockAnalyzer(Random());
            Dir      = NewDirectory();
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, Analyzer);

            config.SetMergePolicy(NewLogMergePolicy()); // we will use docids to validate
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, config);

            writer.AddDocument(Doc("lucene", "lucene is a very popular search engine library"));
            writer.AddDocument(Doc("solr", "solr is a very popular search server and is using lucene"));
            writer.AddDocument(Doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }
        public void TestWithContext()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
                ISet <BytesRef> originalCtxs = new HashSet <BytesRef>();
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXTS_FIELD_NAME))
                {
                    originalCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(originalCtxs, inputIterator.Contexts);
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
        public void TestCustomB()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.  This test is a better test but the sentence is excruiatingly long, " +
                                "you have no idea how painful it was for me to type this long sentence into my IDE.");
            iw.AddDocument(doc);

            IndexReader ir = iw.Reader;

            iw.Dispose();

            IndexSearcher       searcher    = NewSearcher(ir);
            PostingsHighlighter highlighter = new CustomBPostingsHighlighter();
            //PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
            //        @Override
            //        protected PassageScorer getScorer(String field)
            //{
            //    return new PassageScorer(1.2f, 0, 87);
            //}
            //      };
            Query   query   = new TermQuery(new Term("body", "test"));
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(1, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 1);
            assertEquals(1, snippets.Length);
            assertTrue(snippets[0].StartsWith("This <b>test</b> is a better <b>test</b>", StringComparison.Ordinal));

            ir.Dispose();
            dir.Dispose();
        }
        public void TestBasic()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                long w1 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_1).NumericValue);
                long w2 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_2).NumericValue);
                long w3 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_3).NumericValue);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
Пример #22
0
        public void TestEmptyReader()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            // Make sure the index is created?
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);

            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();

            assertNull(inputIterator.Next());
            assertEquals(inputIterator.Weight, 0);
            assertNull(inputIterator.Payload);

            ir.Dispose();
            dir.Dispose();
        }
Пример #23
0
        public void TestWithoutPayload()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd));
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertEquals(inputIterator.Payload, null);
            }
            assertTrue(docs.Count == 0);
            ir.Dispose();
            dir.Dispose();
        }
Пример #24
0
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer
                                                             (Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            var iw = new RandomIndexWriter(Random, dir, iwc);

            var doc = new Document
            {
                NewStringField("id", "1", Field.Store.YES),
                NewTextField("body", "some contents and more contents", Field.Store.NO),
                new NumericDocValuesField("popularity", 5)
            };

            iw.AddDocument(doc);

            doc = new Document
            {
                NewStringField("id", "2", Field.Store.YES),
                NewTextField("body", "another document with different contents", Field.Store.NO),
                new NumericDocValuesField("popularity", 20)
            };
            iw.AddDocument(doc);

            doc = new Document
            {
                NewStringField("id", "3", Field.Store.YES),
                NewTextField("body", "crappy contents", Field.Store.NO),
                new NumericDocValuesField("popularity", 2)
            };
            iw.AddDocument(doc);
            iw.ForceMerge(1);

            reader = iw.GetReader();
            iw.Dispose();
        }
Пример #25
0
        public override void SetUp()
        {
            base.SetUp();

            dir = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwConfig.SetMergePolicy(NewLogMergePolicy());
            iwConfig.SetSimilarity(sim);
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConfig);

            Document doc = new Document();

            doc.Add(new TextField("text", "this is a test test test", Field.Store.NO));
            iw.AddDocument(doc);

            doc = new Document();
            doc.Add(new TextField("text", "second test", Field.Store.NO));
            iw.AddDocument(doc);

            reader   = iw.GetReader();
            searcher = NewSearcher(reader);
            iw.Dispose();
        }
Пример #26
0
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <String> invalidDocTerms       = res.Key;
            Random        rand       = Random();
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                IIndexableField f2 = doc.GetField(FIELD_NAME);
                if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.GetStringValue()))
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                assertTrue(toDel != null);
                docs.Remove(termToDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertEquals(ir.NumDocs, docs.size());
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));
            Random        rand       = Random();
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1)
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                docs.Remove(termToDel);
                assertTrue(null != toDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0);
            assertEquals(ir.NumDocs, docs.size());
            ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) };

            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, w2 + w1);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
Пример #28
0
        public void Test()
        {
            RandomIndexWriter writer;
            DirectoryReader   indexReader;
            int numParents        = AtLeast(200);
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            cfg.SetMergePolicy(NewLogMergePolicy());
            using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg))
            {
                Document parentDoc = new Document();
                NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L);
                parentDoc.Add(parentVal);
                StringField parent = new StringField("parent", "true", Field.Store.YES);
                parentDoc.Add(parent);
                for (int i = 0; i < numParents; ++i)
                {
                    List <Document> documents   = new List <Document>();
                    int             numChildren = Random().nextInt(10);
                    for (int j = 0; j < numChildren; ++j)
                    {
                        Document childDoc = new Document();
                        childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5)));
                        documents.Add(childDoc);
                    }
                    parentVal.SetInt64Value(Random().nextInt(50));
                    documents.Add(parentDoc);
                    writer.AddDocuments(documents);
                }
                writer.ForceMerge(1);
                indexReader = writer.Reader;
            }

            AtomicReader     reader        = GetOnlySegmentReader(indexReader);
            Filter           parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
            FixedBitSet      parentBits    = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null);
            NumericDocValues parentValues  = reader.GetNumericDocValues("parent_val");

            NumericDocValues childValues = reader.GetNumericDocValues("child_val");

            Sort parentSort = new Sort(new SortField("parent_val", SortFieldType.INT64));
            Sort childSort  = new Sort(new SortField("child_val", SortFieldType.INT64));

            Sort   sort   = new Sort(new SortField("custom", new BlockJoinComparerSource(parentsFilter, parentSort, childSort)));
            Sorter sorter = new Sorter(sort);

            Sorter.DocMap docMap = sorter.Sort(reader);
            assertEquals(reader.MaxDoc, docMap.Count);

            int[] children       = new int[1];
            int   numChildren2   = 0;
            int   previousParent = -1;

            for (int i = 0; i < docMap.Count; ++i)
            {
                int oldID = docMap.NewToOld(i);
                if (parentBits.Get(oldID))
                {
                    // check that we have the right children
                    for (int j = 0; j < numChildren2; ++j)
                    {
                        assertEquals(oldID, parentBits.NextSetBit(children[j]));
                    }
                    // check that children are sorted
                    for (int j = 1; j < numChildren2; ++j)
                    {
                        int doc1 = children[j - 1];
                        int doc2 = children[j];
                        if (childValues.Get(doc1) == childValues.Get(doc2))
                        {
                            assertTrue(doc1 < doc2); // sort is stable
                        }
                        else
                        {
                            assertTrue(childValues.Get(doc1) < childValues.Get(doc2));
                        }
                    }
                    // check that parents are sorted
                    if (previousParent != -1)
                    {
                        if (parentValues.Get(previousParent) == parentValues.Get(oldID))
                        {
                            assertTrue(previousParent < oldID);
                        }
                        else
                        {
                            assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID));
                        }
                    }
                    // reset
                    previousParent = oldID;
                    numChildren2   = 0;
                }
                else
                {
                    children = ArrayUtil.Grow(children, numChildren2 + 1);
                    children[numChildren2++] = oldID;
                }
            }
            indexReader.Dispose();
            writer.w.Directory.Dispose();
        }
        public void TestRanges()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.");
            iw.AddDocument(doc);
            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            IndexSearcher          searcher    = NewSearcher(ir);
            ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            Query   query   = TermRangeQuery.NewStringRange("body", "ta", "tf", true, true);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(2, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // null start
            query   = TermRangeQuery.NewStringRange("body", null, "tf", true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]);

            // null end
            query   = TermRangeQuery.NewStringRange("body", "ta", null, true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // exact start inclusive
            query   = TermRangeQuery.NewStringRange("body", "test", "tf", true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // exact end inclusive
            query   = TermRangeQuery.NewStringRange("body", "ta", "test", true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // exact start exclusive
            BooleanQuery bq = new BooleanQuery();

            bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            bq.Add(TermRangeQuery.NewStringRange("body", "test", "tf", false, true), Occur.SHOULD);
            topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", bq, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            // exact end exclusive
            bq = new BooleanQuery();
            bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            bq.Add(TermRangeQuery.NewStringRange("body", "ta", "test", true, false), Occur.SHOULD);
            topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", bq, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            // wrong field
            bq = new BooleanQuery();
            bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            bq.Add(TermRangeQuery.NewStringRange("bogus", "ta", "tf", true, true), Occur.SHOULD);
            topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", bq, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
Пример #31
0
        public void TestSearchAfterWhenSortingByFunctionValues()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy()); // depends on docid order
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);

            Document doc   = new Document();
            Field    field = new StringField("value", "", Field.Store.YES);

            doc.Add(field);

            // Save docs unsorted (decreasing value n, n-1, ...)
            const int NUM_VALS = 5;

            for (int val = NUM_VALS; val > 0; val--)
            {
                field.SetStringValue(Convert.ToString(val));
                writer.AddDocument(doc);
            }

            // Open index
            IndexReader reader = writer.GetReader();

            writer.Dispose();
            IndexSearcher searcher = NewSearcher(reader);

            // Get ValueSource from FieldCache
            Int32FieldSource src = new Int32FieldSource("value");
            // ...and make it a sort criterion
            SortField sf      = src.GetSortField(false).Rewrite(searcher);
            Sort      orderBy = new Sort(sf);

            // Get hits sorted by our FunctionValues (ascending values)
            Query   q    = new MatchAllDocsQuery();
            TopDocs hits = searcher.Search(q, reader.MaxDoc, orderBy);

            assertEquals(NUM_VALS, hits.ScoreDocs.Length);
            // Verify that sorting works in general
            int i = 0;

            foreach (ScoreDoc hit in hits.ScoreDocs)
            {
                int valueFromDoc = Convert.ToInt32(reader.Document(hit.Doc).Get("value"));
                assertEquals(++i, valueFromDoc);
            }

            // Now get hits after hit #2 using IS.searchAfter()
            int      afterIdx = 1;
            FieldDoc afterHit = (FieldDoc)hits.ScoreDocs[afterIdx];

            hits = searcher.SearchAfter(afterHit, q, reader.MaxDoc, orderBy);

            // Expected # of hits: NUM_VALS - 2
            assertEquals(NUM_VALS - (afterIdx + 1), hits.ScoreDocs.Length);

            // Verify that hits are actually "after"
            int afterValue = (int)((double?)afterHit.Fields[0]);

            foreach (ScoreDoc hit in hits.ScoreDocs)
            {
                int val = Convert.ToInt32(reader.Document(hit.Doc).Get("value"));
                assertTrue(afterValue <= val);
                assertFalse(hit.Doc == afterHit.Doc);
            }
            reader.Dispose();
            dir.Dispose();
        }