/// <summary>
        /// Called only from <seealso cref="#doOpenIfChanged()"/>. If the taxonomy has been
        /// recreated, you should pass {@code null} as the caches and parent/children
        /// arrays.
        /// </summary>
        internal DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter, LRUHashMap<FacetLabel, IntClass> ordinalCache, LRUHashMap<int, FacetLabel> categoryCache, TaxonomyIndexArrays taxoArrays)
        {
            this.indexReader = indexReader;
            this.taxoWriter = taxoWriter;
            this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.TaxonomyEpoch;

            // use the same instance of the cache, note the protective code in getOrdinal and getPath
            this.ordinalCache = ordinalCache == null ? new LRUHashMap<FacetLabel, IntClass>(DEFAULT_CACHE_VALUE) : ordinalCache;
            this.categoryCache = categoryCache == null ? new LRUHashMap<int, FacetLabel>(DEFAULT_CACHE_VALUE) : categoryCache;

            this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null;
        }
示例#2
0
        public virtual void TestNonIndexedFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document doc = new Document();

            doc.Add(new StoredField("bogusbytes", "bogus"));
            doc.Add(new StoredField("bogusshorts", "bogus"));
            doc.Add(new StoredField("bogusints", "bogus"));
            doc.Add(new StoredField("boguslongs", "bogus"));
            doc.Add(new StoredField("bogusfloats", "bogus"));
            doc.Add(new StoredField("bogusdoubles", "bogus"));
            doc.Add(new StoredField("bogusterms", "bogus"));
            doc.Add(new StoredField("bogustermsindex", "bogus"));
            doc.Add(new StoredField("bogusmultivalued", "bogus"));
            doc.Add(new StoredField("bogusbits", "bogus"));
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();

            AtomicReader ar = GetOnlySegmentReader(ir);

            IFieldCache cache = FieldCache.DEFAULT;

            cache.PurgeAllCaches();
            Assert.AreEqual(0, cache.GetCacheEntries().Length);

#pragma warning disable 612, 618
            Bytes bytes = cache.GetBytes(ar, "bogusbytes", true);
            Assert.AreEqual((byte)0, bytes.Get(0));

            Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true);
            Assert.AreEqual(0, shorts.Get(0));
#pragma warning restore 612, 618

            Int32s ints = cache.GetInt32s(ar, "bogusints", true);
            Assert.AreEqual(0, ints.Get(0));

            Int64s longs = cache.GetInt64s(ar, "boguslongs", true);
            Assert.AreEqual(0, longs.Get(0));

            Singles floats = cache.GetSingles(ar, "bogusfloats", true);
            Assert.AreEqual(0, floats.Get(0), 0.0f);

            Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true);
            Assert.AreEqual(0, doubles.Get(0), 0.0D);

            BytesRef        scratch  = new BytesRef();
            BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true);
            binaries.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex");
            Assert.AreEqual(-1, sorted.GetOrd(0));
            sorted.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued");
            sortedSet.SetDocument(0);
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());

            IBits bits = cache.GetDocsWithField(ar, "bogusbits");
            Assert.IsFalse(bits.Get(0));

            // check that we cached nothing
            Assert.AreEqual(0, cache.GetCacheEntries().Length);
            ir.Dispose();
            dir.Dispose();
        }
 public ThreadAnonymousClass(TestCachedOrdinalsReader outerInstance, string threadName, DirectoryReader reader, CachedOrdinalsReader ordsReader)
     : base(threadName)
 {
     this.outerInstance = outerInstance;
     this.reader        = reader;
     this.ordsReader    = ordsReader;
 }
示例#4
0
 /// <summary>
 /// Open the <see cref="DirectoryReader"/> from this <see cref="IndexWriter"/>.
 /// </summary>
 protected virtual DirectoryReader OpenIndexReader(IndexWriter writer)
 {
     return(DirectoryReader.Open(writer, false));
 }
示例#5
0
        /// <summary>
        /// Implements the opening of a new <see cref="DirectoryTaxonomyReader"/> instance if
        /// the taxonomy has changed.
        ///
        /// <para>
        /// <b>NOTE:</b> the returned <see cref="DirectoryTaxonomyReader"/> shares the
        /// ordinal and category caches with this reader. This is not expected to cause
        /// any issues, unless the two instances continue to live. The reader
        /// guarantees that the two instances cannot affect each other in terms of
        /// correctness of the caches, however if the size of the cache is changed
        /// through <see cref="SetCacheSize(int)"/>, it will affect both reader instances.
        /// </para>
        /// </summary>
        protected override TaxonomyReader DoOpenIfChanged()
        {
            EnsureOpen();

            // This works for both NRT and non-NRT readers (i.e. an NRT reader remains NRT).
            var r2 = DirectoryReader.OpenIfChanged(indexReader);

            if (r2 == null)
            {
                return(null); // no changes, nothing to do
            }

            // check if the taxonomy was recreated
            bool success = false;

            try
            {
                bool recreated = false;
                if (taxoWriter == null)
                {
                    // not NRT, check epoch from commit data
                    string t1 = indexReader.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH];
                    string t2 = r2.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH];
                    if (t1 == null)
                    {
                        if (t2 != null)
                        {
                            recreated = true;
                        }
                    }
                    else if (!t1.Equals(t2))
                    {
                        // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data.
                        // it's ok to use String.equals because we require the two epoch values to be the same.
                        recreated = true;
                    }
                }
                else
                {
                    // NRT, compare current taxoWriter.epoch() vs the one that was given at construction
                    if (taxoEpoch != taxoWriter.TaxonomyEpoch)
                    {
                        recreated = true;
                    }
                }

                DirectoryTaxonomyReader newtr;
                if (recreated)
                {
                    // if recreated, do not reuse anything from this instace. the information
                    // will be lazily computed by the new instance when needed.
                    newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
                }
                else
                {
                    newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
                }

                success = true;
                return(newtr);
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(r2);
                }
            }
        }
示例#6
0
 private bool IndexExists(string appname)
 {
     return(DirectoryReader.IndexExists(GetDirectory(appname)));
 }
示例#7
0
        private void DoTest(DocValuesType type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case DocValuesType.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw AssertionError.Create();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.SetInt64Value(i);
                switch (type)
                {
                case DocValuesType.SORTED:
                case DocValuesType.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random, 20);
                    } while (((string)vals[i]).Length == 0);
                    f.SetBytesValue(new BytesRef((string)vals[i]));
                    break;

                case DocValuesType.NUMERIC:
                    int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31);     // keep it an int
                    vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue));
                    f.SetInt64Value((long)vals[i]);
                    break;
                }
                iw.AddDocument(document);
                if (Random.NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case DocValuesType.BINARY:
                case DocValuesType.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case DocValuesType.NUMERIC:
                    vs = new Int64FieldSource("dv");
                    break;

                default:
                    throw AssertionError.Create();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is Int64FieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is J2N.Numerics.Int64);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw AssertionError.Create();
                    }

                    object expected = vals[ids.Int32Val(i)];
                    switch (type)
                    {
                    case DocValuesType.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd >= 1);
                        goto case DocValuesType.BINARY;

                    case DocValuesType.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case DocValuesType.NUMERIC:
                        assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
 protected override DirectoryReader DoWrapDirectoryReader(DirectoryReader @in)
 {
     return new AssertingDirectoryReader(@in);
 }
示例#9
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));

            for (int i = 0; i < DocFields.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, DocFields[i], Field.Store.NO));
                writer.AddDocument(doc);
            }
            writer.Dispose();
            LittleReader = DirectoryReader.Open(Directory);
            Searcher     = NewSearcher(LittleReader);
            // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
            Searcher.Similarity = new DefaultSimilarity();

            // Make big index
            Dir2 = new MockDirectoryWrapper(Random, new RAMDirectory(Directory, IOContext.DEFAULT));

            // First multiply small test index:
            MulFactor = 1;
            int docCount = 0;

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now copy index...");
            }
            do
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: cycle...");
                }
                Directory         copy = new MockDirectoryWrapper(Random, new RAMDirectory(Dir2, IOContext.DEFAULT));
                RandomIndexWriter w    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, Dir2);
                w.AddIndexes(copy);
                docCount = w.MaxDoc;
                w.Dispose();
                MulFactor *= 2;
            } while (docCount < 3000);

            RandomIndexWriter riw  = new RandomIndexWriter(Random, Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));
            Document          doc_ = new Document();

            doc_.Add(NewTextField("field2", "xxx", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            doc_ = new Document();
            doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            Reader      = riw.GetReader();
            BigSearcher = NewSearcher(Reader);
            riw.Dispose();
        }
示例#10
0
        public override int DoLogic()
        {
            int res = 0;

            // open reader or use existing one
            IndexSearcher searcher = RunData.GetIndexSearcher();

            IndexReader reader;

            bool closeSearcher;

            if (searcher == null)
            {
                // open our own reader
                Directory dir = RunData.Directory;
                reader        = DirectoryReader.Open(dir);
                searcher      = new IndexSearcher(reader);
                closeSearcher = true;
            }
            else
            {
                // use existing one; this passes +1 ref to us
                reader        = searcher.IndexReader;
                closeSearcher = false;
            }

            // optionally warm and add num docs traversed to count
            if (WithWarm)
            {
                Document doc      = null;
                IBits    liveDocs = MultiFields.GetLiveDocs(reader);
                for (int m = 0; m < reader.MaxDoc; m++)
                {
                    if (null == liveDocs || liveDocs.Get(m))
                    {
                        doc  = reader.Document(m);
                        res += (doc == null ? 0 : 1);
                    }
                }
            }

            if (WithSearch)
            {
                res++;
                Query   q       = queryMaker.MakeQuery();
                Sort    sort    = Sort;
                TopDocs hits    = null;
                int     numHits = NumHits;
                if (numHits > 0)
                {
                    if (WithCollector == false)
                    {
                        if (sort != null)
                        {
                            // TODO: instead of always passing false we
                            // should detect based on the query; if we make
                            // the IndexSearcher search methods that take
                            // Weight public again, we can go back to
                            // pulling the Weight ourselves:
                            TopFieldCollector collector = TopFieldCollector.Create(sort, numHits,
                                                                                   true, WithScore,
                                                                                   WithMaxScore,
                                                                                   false);
                            searcher.Search(q, null, collector);
                            hits = collector.GetTopDocs();
                        }
                        else
                        {
                            hits = searcher.Search(q, numHits);
                        }
                    }
                    else
                    {
                        ICollector collector = CreateCollector();
                        searcher.Search(q, null, collector);
                        //hits = collector.topDocs();
                    }

                    string printHitsField = RunData.Config.Get("print.hits.field", null);
                    if (hits != null && printHitsField != null && printHitsField.Length > 0)
                    {
                        Console.WriteLine("totalHits = " + hits.TotalHits);
                        Console.WriteLine("maxDoc()  = " + reader.MaxDoc);
                        Console.WriteLine("numDocs() = " + reader.NumDocs);
                        for (int i = 0; i < hits.ScoreDocs.Length; i++)
                        {
                            int      docID = hits.ScoreDocs[i].Doc;
                            Document doc   = reader.Document(docID);
                            Console.WriteLine("  " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField));
                        }
                    }

                    if (WithTraverse)
                    {
                        ScoreDoc[] scoreDocs     = hits.ScoreDocs;
                        int        traversalSize = Math.Min(scoreDocs.Length, TraversalSize);

                        if (traversalSize > 0)
                        {
                            bool                 retrieve     = WithRetrieve;
                            int                  numHighlight = Math.Min(NumToHighlight, scoreDocs.Length);
                            Analyzer             analyzer     = RunData.Analyzer;
                            BenchmarkHighlighter highlighter  = null;
                            if (numHighlight > 0)
                            {
                                highlighter = GetBenchmarkHighlighter(q);
                            }
                            for (int m = 0; m < traversalSize; m++)
                            {
                                int id = scoreDocs[m].Doc;
                                res++;
                                if (retrieve)
                                {
                                    Document document = RetrieveDoc(reader, id);
                                    res += document != null ? 1 : 0;
                                    if (numHighlight > 0 && m < numHighlight)
                                    {
                                        ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document);
                                        foreach (string field in fieldsToHighlight)
                                        {
                                            string text = document.Get(field);
                                            res += highlighter.DoHighlight(reader, id, field, document, analyzer, text);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            if (closeSearcher)
            {
                reader.Dispose();
            }
            else
            {
                // Release our +1 ref from above
                reader.DecRef();
            }
            return(res);
        }
示例#11
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        ///  amount of temporary RAM while building.  Note that
        ///  the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(InputIterator iterator, double ramBufferSizeMB)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();
            // TODO: messy ... java7 has Files.createTempDirectory
            // ... but 4.x is java6:
            File   tempIndexPath = null;
            Random random        = new Random();

            while (true)
            {
                tempIndexPath = new File(directory, prefix + ".index." + random.Next(int.MaxValue));
                if (tempIndexPath.mkdir())
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, indexAnalyzer);

            iwc.OpenMode        = IndexWriterConfig.OpenMode_e.CREATE;
            iwc.RAMBufferSizeMB = ramBufferSizeMB;
            IndexWriter writer = new IndexWriter(dir, iwc);

            var ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: if only we had IndexOptions.TERMS_ONLY...
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            ft.Freeze();

            Document doc   = new Document();
            Field    field = new Field("body", "", ft);

            doc.Add(field);

            totTokens = 0;
            IndexReader reader = null;

            bool success = false;

            count = 0;
            try
            {
                while (true)
                {
                    BytesRef surfaceForm = iterator.Next();
                    if (surfaceForm == null)
                    {
                        break;
                    }
                    field.StringValue = surfaceForm.Utf8ToString();
                    writer.AddDocument(doc);
                    count++;
                }
                reader = DirectoryReader.Open(writer, false);

                Terms terms = MultiFields.GetTerms(reader, "body");
                if (terms == null)
                {
                    throw new System.ArgumentException("need at least one suggestion");
                }

                // Move all ngrams into an FST:
                TermsEnum termsEnum = terms.Iterator(null);

                Outputs <long?> outputs = PositiveIntOutputs.Singleton;
                Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                IntsRef scratchInts = new IntsRef();
                while (true)
                {
                    BytesRef term = termsEnum.Next();
                    if (term == null)
                    {
                        break;
                    }
                    int ngramCount = CountGrams(term);
                    if (ngramCount > grams)
                    {
                        throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                    }
                    if (ngramCount == 1)
                    {
                        totTokens += termsEnum.TotalTermFreq();
                    }

                    builder.Add(Lucene.Net.Util.Fst.Util.ToIntsRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq()));
                }

                fst = builder.Finish();
                if (fst == null)
                {
                    throw new System.ArgumentException("need at least one suggestion");
                }
                //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                /*
                 * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                 * Util.toDot(fst, pw, true, true);
                 * pw.close();
                 */

                success = true;
            }
            finally
            {
                try
                {
                    if (success)
                    {
                        IOUtils.Close(writer, reader);
                    }
                    else
                    {
                        IOUtils.CloseWhileHandlingException(writer, reader);
                    }
                }
                finally
                {
                    foreach (string file in dir.ListAll())
                    {
                        File path = new File(tempIndexPath, file);
                        if (path.Delete() == false)
                        {
                            throw new InvalidOperationException("failed to remove " + path);
                        }
                    }

                    if (tempIndexPath.Delete() == false)
                    {
                        throw new InvalidOperationException("failed to remove " + tempIndexPath);
                    }

                    dir.Dispose();
                }
            }
        }
示例#12
0
        public void TestClose()
        {
            using (IndexReader r = DirectoryReader.Open(userindex))
            {
                spellChecker.ClearIndex();
                string field = "field1";
                Addwords(r, spellChecker, "field1");
                int num_field1 = this.NumDoc();
                Addwords(r, spellChecker, "field2");
                int num_field2 = this.NumDoc();
                assertEquals(num_field2, num_field1 + 1);
                CheckCommonSuggestions(r);
                AssertLastSearcherOpen(4);
                spellChecker.Dispose();
                AssertSearchersClosed();
                // LUCENENET NOTE: Per MSDN, calling Dispose() multiple times
                // should be a safe operation. http://stackoverflow.com/a/5306896/181087
                // Certainly, there shouldn't be a problem with calling Dispose() within
                // a using block if you decide to free up resources early.
                //try
                //{
                //    spellChecker.Dispose();
                //    fail("spellchecker was already closed");
                //}
                //catch (ObjectDisposedException e)
                //{
                //    // expected
                //}
                try
                {
                    CheckCommonSuggestions(r);
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }

                try
                {
                    spellChecker.ClearIndex();
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }

                try
                {
                    spellChecker.IndexDictionary(new LuceneDictionary(r, field), NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false);
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }

                try
                {
                    spellChecker.SetSpellIndex(spellindex);
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }
                assertEquals(4, searchers.Count);
                AssertSearchersClosed();
            }
        }
示例#13
0
        public void TestSuggestModes()
        {
            using (IndexReader r = DirectoryReader.Open(userindex))
            {
                spellChecker.ClearIndex();
                Addwords(r, spellChecker, "field1");


                {
                    string[] similar = spellChecker.SuggestSimilar("eighty", 2, r, "field1",
                                                                   SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
                    assertEquals(1, similar.Length);
                    assertEquals("eighty", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eight", 2, r, "field1",
                                                                   SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
                    assertEquals(1, similar.Length);
                    assertEquals("eight", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_MORE_POPULAR);
                    assertEquals(5, similar.Length);
                    assertEquals("eight", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("twenty", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_MORE_POPULAR);
                    assertEquals(1, similar.Length);
                    assertEquals("twenty-one", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_MORE_POPULAR);
                    assertEquals(0, similar.Length);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_ALWAYS);
                    assertEquals(5, similar.Length);
                    assertEquals("eight", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_ALWAYS);
                    assertEquals(5, similar.Length);
                    assertEquals("eighty", similar[0]);
                }
            }
        }
示例#14
0
 /// <summary>
 /// Creates a new read-only IndexSearcher </summary>
 /// <param name="dir"> the directory used to open the searcher </param>
 /// <returns> a new read-only IndexSearcher </returns>
 /// <exception cref="IOException"> f there is a low-level IO error </exception>
 // for testing purposes
 internal virtual IndexSearcher CreateSearcher(Directory dir)
 {
     return(new IndexSearcher(DirectoryReader.Open(dir)));
 }
示例#15
0
        public virtual void TestInfiniteValues()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document          doc    = new Document();

            doc.Add(new SingleField("float", float.NegativeInfinity, Field.Store.NO));
            doc.Add(new Int32Field("int", int.MinValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new SingleField("float", float.PositiveInfinity, Field.Store.NO));
            doc.Add(new Int32Field("int", int.MaxValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new SingleField("float", 0.0f, Field.Store.NO));
            doc.Add(new Int32Field("int", 0, Field.Store.NO));
            writer.AddDocument(doc);

            foreach (float f in TestNumericUtils.FLOAT_NANs)
            {
                doc = new Document();
                doc.Add(new SingleField("float", f, Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Dispose();

            IndexReader   r = DirectoryReader.Open(dir);
            IndexSearcher s = NewSearcher(r);

            Query   q       = NumericRangeQuery.NewInt32Range("int", null, null, true, true);
            TopDocs topDocs = s.Search(q, 10);

            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewInt32Range("int", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewInt32Range("int", int.MinValue, int.MaxValue, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewInt32Range("int", int.MinValue, int.MaxValue, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewSingleRange("float", null, null, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewSingleRange("float", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewSingleRange("float", float.NegativeInfinity, float.PositiveInfinity, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewSingleRange("float", float.NegativeInfinity, float.PositiveInfinity, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewSingleRange("float", float.NaN, float.NaN, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(TestNumericUtils.FLOAT_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count");

            r.Dispose();
            dir.Dispose();
        }
示例#16
0
        public static bool IndexExists(string luceneIndex)
        {
            luceneIndex.RequireNotNullOrEmpty(nameof(luceneIndex));

            return(DirectoryReader.IndexExists(FSDirectory.Open(luceneIndex)));
        }
示例#17
0
        public virtual void TestParsingAndSearching()
        {
            string field = "content";

            string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" };

            // queries that should find all docs
            Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) };

            // queries that should find no docs
            Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) };

            PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } };

            WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } };

            // prepare the index
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, docs[i], Field.Store.NO));
                iw.AddDocument(doc);
            }
            iw.Dispose();

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            // test queries that must find all
            foreach (Query q in matchAll)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(docs.Length, hits.Length);
            }

            // test queries that must find none
            foreach (Query q in matchNone)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(0, hits.Length);
            }

            // thest the prefi queries find only one doc
            for (int i = 0; i < matchOneDocPrefix.Length; i++)
            {
                for (int j = 0; j < matchOneDocPrefix[i].Length; j++)
                {
                    Query q = matchOneDocPrefix[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            // test the wildcard queries find only one doc
            for (int i = 0; i < matchOneDocWild.Length; i++)
            {
                for (int j = 0; j < matchOneDocWild[i].Length; j++)
                {
                    Query q = matchOneDocWild[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            reader.Dispose();
            dir.Dispose();
        }
示例#18
0
        // we need to guarantee that if several threads call this concurrently, only
        // one executes it, and after it returns, the cache is updated and is either
        // complete or not.
        private void PerhapsFillCache()
        {
            lock (this)
            {
                if (cacheMisses.Get() < cacheMissesUntilFill)
                {
                    return;
                }

                if (!shouldFillCache)
                {
                    // we already filled the cache once, there's no need to re-fill it
                    return;
                }
                shouldFillCache = false;

                InitReaderManager();

                bool            aborted = false;
                DirectoryReader reader  = readerManager.Acquire();
                try
                {
                    TermsEnum termsEnum = null;
                    DocsEnum  docsEnum  = null;
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.Terms(Consts.FULL);
                        if (terms != null) // cannot really happen, but be on the safe side
                        {
                            termsEnum = terms.Iterator(termsEnum);
                            while (termsEnum.Next() != null)
                            {
                                if (!cache.Full)
                                {
                                    BytesRef t = termsEnum.Term();
                                    // Since we guarantee uniqueness of categories, each term has exactly
                                    // one document. Also, since we do not allow removing categories (and
                                    // hence documents), there are no deletions in the index. Therefore, it
                                    // is sufficient to call next(), and then doc(), exactly once with no
                                    // 'validation' checks.
                                    FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString()));
                                    docsEnum = termsEnum.Docs(null, docsEnum, DocsEnum.FLAG_NONE);
                                    bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase);
                                    Debug.Assert(!res, "entries should not have been evicted from the cache");
                                }
                                else
                                {
                                    // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
                                    aborted = true;
                                    break;
                                }
                            }
                        }
                        if (aborted)
                        {
                            break;
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }

                cacheIsComplete = !aborted;
                if (cacheIsComplete)
                {
                    lock (this)
                    {
                        // everything is in the cache, so no need to keep readerManager open.
                        // this block is executed in a sync block so that it works well with
                        // initReaderManager called in parallel.
                        readerManager.Dispose();
                        readerManager            = null;
                        initializedReaderManager = false;
                    }
                }
            }
        }
示例#19
0
        /// <summary>Simple command-line based search demo.</summary>
        public static void Main(string[] args)
        {
            // The <CONSOLE_APP_NAME> should be the assembly name of the application
            // this code is compiled into. In .NET Framework, it is the name of the EXE file.
            // In .NET Core, you have the option of compiling this into either a DLL or an EXE
            // (see https://docs.microsoft.com/en-us/dotnet/core/deploying/index).
            // In the first case, the <CONSOLE_APP_NAME> will be "dotnet <DLL_NAME>.dll".
            string usage = "Usage: <CONSOLE_APP_NAME> <INDEX_DIRECTORY> [-f|--field <FIELD>] " +
                           "[-r|--repeat <NUMBER>] [-qf|--queries-file <PATH>] [-q|--query <QUERY>] " +
                           "[--raw] [-p|--page-size <NUMBER>]\n\n" +
                           "Use no --query or --queries-file option for interactive mode.\n\n" +
                           "See http://lucene.apache.org/core/4_8_0/demo/ for details.";

            if (args.Length < 1 || args.Length > 0 &&
                ("?".Equals(args[0], StringComparison.Ordinal) || "-h".Equals(args[0], StringComparison.Ordinal) || "--help".Equals(args[0], StringComparison.Ordinal)))
            {
                Console.WriteLine(usage);
                Environment.Exit(0);
            }

            string index       = args[0];
            string field       = "contents";
            string queries     = null;
            int    repeat      = 0;
            bool   raw         = false;
            string queryString = null;
            int    hitsPerPage = 10;

            for (int i = 0; i < args.Length; i++)
            {
                if ("-f".Equals(args[i], StringComparison.Ordinal) || "-field".Equals(args[i], StringComparison.Ordinal))
                {
                    field = args[i + 1];
                    i++;
                }
                else if ("-qf".Equals(args[i], StringComparison.Ordinal) || "--queries-file".Equals(args[i], StringComparison.Ordinal))
                {
                    queries = args[i + 1];
                    i++;
                }
                else if ("-q".Equals(args[i], StringComparison.Ordinal) || "--query".Equals(args[i], StringComparison.Ordinal))
                {
                    queryString = args[i + 1];
                    i++;
                }
                else if ("-r".Equals(args[i], StringComparison.Ordinal) || "--repeat".Equals(args[i], StringComparison.Ordinal))
                {
                    repeat = int.Parse(args[i + 1], CultureInfo.InvariantCulture);
                    i++;
                }
                else if ("--raw".Equals(args[i], StringComparison.Ordinal))
                {
                    raw = true;
                }
                else if ("-p".Equals(args[i], StringComparison.Ordinal) || "--paging".Equals(args[i], StringComparison.Ordinal))
                {
                    hitsPerPage = int.Parse(args[i + 1], CultureInfo.InvariantCulture);
                    if (hitsPerPage <= 0)
                    {
                        Console.WriteLine("There must be at least 1 hit per page.");
                        Environment.Exit(1);
                    }
                    i++;
                }
            }

            using IndexReader reader = DirectoryReader.Open(FSDirectory.Open(index));
            IndexSearcher searcher = new IndexSearcher(reader);
            // :Post-Release-Update-Version.LUCENE_XY:
            Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);

            TextReader input = null;

            if (queries != null)
            {
                input = new StreamReader(new FileStream(queries, FileMode.Open, FileAccess.Read), Encoding.UTF8);
            }
            else
            {
                input = Console.In;
            }
            // :Post-Release-Update-Version.LUCENE_XY:
            QueryParser parser = new QueryParser(LuceneVersion.LUCENE_48, field, analyzer);

            while (true)
            {
                if (queries is null && queryString is null)
                {
                    // prompt the user
                    Console.WriteLine("Enter query (or press Enter to exit): ");
                }

                string line = queryString ?? input.ReadLine();

                if (line is null || line.Length == 0)
                {
                    break;
                }

                line = line.Trim();
                if (line.Length == 0)
                {
                    break;
                }

                Query query = parser.Parse(line);
                Console.WriteLine("Searching for: " + query.ToString(field));

                if (repeat > 0) // repeat & time as benchmark
                {
                    DateTime start = DateTime.UtcNow;
                    for (int i = 0; i < repeat; i++)
                    {
                        searcher.Search(query, null, 100);
                    }
                    DateTime end = DateTime.UtcNow;
                    Console.WriteLine("Time: " + (end - start).TotalMilliseconds + "ms");
                }

                DoPagingSearch(searcher, query, hitsPerPage, raw, queries is null && queryString is null);

                if (queryString != null)
                {
                    break;
                }
            }
        }
示例#20
0
 /// <summary>
 /// An <see cref="IndexReader"/> is an instance of the index at a given point in time
 /// We need to update this Reader by reopen the <see cref="IndexReader"/>
 /// Maybe change this method later ?
 /// </summary>
 /// <param name="appName"></param>
 private void UpdateReader(string appName)
 {
     _readers[appName] = DirectoryReader.OpenIfChanged(_readers[appName] as DirectoryReader) ?? _readers[appName];
 }
        public void Index()
        {
            var indexConfig = LuceneIndexDefaults.CreateStandardIndexWriterConfig();

            long readCount = 0;

            // Read All lines in the file (IEnumerable, yield)
            // And group them by QCode.
            var subjectGroups = FileHelper.GetInputLines(InputFilename).GroupBySubject();

            using var luceneDirectory       = FSDirectory.Open(EntitiesIndexPath);
            using var luceneDirectoryReader = DirectoryReader.Open(luceneDirectory);
            var docCount = luceneDirectoryReader.MaxDoc;

            for (var i = 0; i < docCount; i++)
            {
                var doc               = luceneDirectoryReader.Document(i);
                var entity            = doc.MapEntity();
                var reverseProperties = entity.ReverseProperties.Select(x => x.Id.ToInt()).ToList();
                var properties        = entity.Properties.Select(x => x.Id.ToInt()).ToList();

                //TODO: Use constant:
                var otherProperties = properties.Where(x => !x.Equals(31)).ToList();
                var types           = entity.ParentTypes.Select(x => x.ToInt()).ToList();
                var isType          = entity.IsType;

                //Range
                //TODO: Use constant:
                //if (isType)
                RangeDictionary.AddSafe(31, types);

                foreach (var reversePropertyId in reverseProperties)
                {
                    RangeDictionary.AddSafe(reversePropertyId, types);
                }

                //Domain
                DomainDictionary.AddSafe(31, types);

                foreach (var propertyId in otherProperties)
                {
                    DomainDictionary.AddSafe(propertyId, types);
                }

                //Frequency
                foreach (var propertyIntId in properties)
                {
                    if (!FrequencyHashTable.ContainsKey(propertyIntId))
                    {
                        FrequencyHashTable.Add(propertyIntId, 0);
                    }
                    FrequencyHashTable[propertyIntId] = (int)FrequencyHashTable[propertyIntId] + 1;
                }

                LogMessage(readCount++, "Frequency, Domain, Range", false);
            }
            LogMessage(readCount, "Frequency, Domain, Range");
            readCount = 0;

            using (var indexDirectory = FSDirectory.Open(OutputDirectory.GetOrCreateDirectory()))
            {
                using var writer = new IndexWriter(indexDirectory, indexConfig);
                foreach (var subjectGroup in subjectGroups.Where(FilterGroups))
                {
                    var document = new Document();

                    foreach (var field in FrequencyGetField(subjectGroup))
                    {
                        document.Add(field);
                    }

                    foreach (var field in DomainGetField(subjectGroup))
                    {
                        document.Add(field);
                    }

                    foreach (var field in RangeGetField(subjectGroup))
                    {
                        document.Add(field);
                    }

                    var boostField = document.Fields.FirstOrDefault(x => x.Name.Equals(Labels.Rank.ToString()));
                    var boost      = 0.0;
                    if (boostField != null)
                    {
                        boost = (double)boostField.GetDoubleValue();
                    }

                    foreach (var fieldIndexer in FieldIndexers)
                    {
                        fieldIndexer.Boost = boost;
                    }

                    foreach (var fieldIndexer in FieldIndexers)
                    {
                        foreach (var field in fieldIndexer.GetField(subjectGroup))
                        {
                            document.Add(field);
                        }
                    }

                    LogProgress(readCount++);

                    writer.AddDocument(document);
                }
            }

            LogProgress(readCount, true);
        }
        /// <summary>
        /// 执行搜索
        /// </summary>
        /// <param name="options">搜索选项</param>
        /// <param name="safeSearch">启用安全搜索</param>
        /// <returns></returns>
        private ILuceneSearchResultCollection PerformSearch(SearchOptions options, bool safeSearch)
        {
            // 结果集
            ILuceneSearchResultCollection results = new LuceneSearchResultCollection();

            using var reader = DirectoryReader.Open(_directory);
            var   searcher = new IndexSearcher(reader);
            Query query;

            // 启用安全搜索
            if (safeSearch)
            {
                options.Keywords = QueryParserBase.Escape(options.Keywords);
            }

            if (options.Fields.Count == 1)
            {
                // 单字段搜索
                var queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields[0], _analyzer);
                query = queryParser.Parse(options.Keywords);
            }
            else
            {
                // 多字段搜索
                var multiFieldQueryParser = new MultiFieldQueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields.ToArray(), _analyzer, options.Boosts);
                query = GetFuzzyquery(multiFieldQueryParser, options.Keywords);
            }

            var sortFields = new List <SortField>
            {
                SortField.FIELD_SCORE
            };

            sortFields.AddRange(options.OrderBy.Select(sortField => new SortField(sortField, SortFieldType.STRING)));

            // 排序规则处理

            var sort = new Sort(sortFields.ToArray());

            Expression <Func <ScoreDoc, bool> > where = m => m.Score >= options.Score;
            if (options.Type != null)
            {
                // 过滤掉已经设置了类型的对象
                @where = @where.And(m => options.Type.AssemblyQualifiedName == searcher.Doc(m.Doc).Get("Type"));
            }
            var matches = searcher.Search(query, null, options.MaximumNumberOfHits, sort, true, true).ScoreDocs.Where(@where.Compile());

            results.TotalHits = matches.Count();

            // 分页处理
            if (options.Skip.HasValue)
            {
                matches = matches.Skip(options.Skip.Value);
            }
            if (options.Take.HasValue)
            {
                matches = matches.Take(options.Take.Value);
            }

            var docs = matches.ToList();

            // 创建结果集
            foreach (var match in docs)
            {
                var doc = searcher.Doc(match.Doc);
                results.Results.Add(new LuceneSearchResult()
                {
                    Score    = match.Score,
                    Document = doc
                });
            }

            return(results);
        }
示例#23
0
        public virtual void TestNonIndexedFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc = new Document();

            doc.Add(new StoredField("bogusbytes", "bogus"));
            doc.Add(new StoredField("bogusshorts", "bogus"));
            doc.Add(new StoredField("bogusints", "bogus"));
            doc.Add(new StoredField("boguslongs", "bogus"));
            doc.Add(new StoredField("bogusfloats", "bogus"));
            doc.Add(new StoredField("bogusdoubles", "bogus"));
            doc.Add(new StoredField("bogusterms", "bogus"));
            doc.Add(new StoredField("bogustermsindex", "bogus"));
            doc.Add(new StoredField("bogusmultivalued", "bogus"));
            doc.Add(new StoredField("bogusbits", "bogus"));
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;

            iw.Dispose();

            AtomicReader ar = GetOnlySegmentReader(ir);

            IFieldCache cache = FieldCache.DEFAULT;

            cache.PurgeAllCaches();
            Assert.AreEqual(0, cache.CacheEntries.Length);

            Bytes bytes = cache.GetBytes(ar, "bogusbytes", true);

            Assert.AreEqual(0, bytes.Get(0));

            Shorts shorts = cache.GetShorts(ar, "bogusshorts", true);

            Assert.AreEqual(0, shorts.Get(0));

            Ints ints = cache.GetInts(ar, "bogusints", true);

            Assert.AreEqual(0, ints.Get(0));

            Longs longs = cache.GetLongs(ar, "boguslongs", true);

            Assert.AreEqual(0, longs.Get(0));

            Floats floats = cache.GetFloats(ar, "bogusfloats", true);

            Assert.AreEqual(0, floats.Get(0), 0.0f);

            Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true);

            Assert.AreEqual(0, doubles.Get(0), 0.0D);

            BytesRef        scratch  = new BytesRef();
            BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true);

            binaries.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex");

            Assert.AreEqual(-1, sorted.GetOrd(0));
            sorted.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued");

            sortedSet.Document = 0;
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());

            Bits bits = cache.GetDocsWithField(ar, "bogusbits");

            Assert.IsFalse(bits.Get(0));

            // check that we cached nothing
            Assert.AreEqual(0, cache.CacheEntries.Length);
            ir.Dispose();
            dir.Dispose();
        }
示例#24
0
 /// <summary>
 /// Open the <see cref="DirectoryReader"/> from this <see cref="Directory"/>.
 /// </summary>
 protected virtual DirectoryReader OpenIndexReader(Directory directory)
 {
     return(DirectoryReader.Open(directory));
 }
        public void TestRandomDiscreteMultiValueHighlighting()
        {
            String[]
            randomValues = new String[3 + Random().nextInt(10 * RANDOM_MULTIPLIER)];
            for (int i = 0; i < randomValues.Length; i++)
            {
                String randomValue;
                do
                {
                    randomValue = TestUtil.RandomSimpleString(Random());
                } while ("".equals(randomValue));
                randomValues[i] = randomValue;
            }

            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
                Random(),
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            FieldType customType = new FieldType(TextField.TYPE_STORED);

            customType.StoreTermVectors         = (true);
            customType.StoreTermVectorOffsets   = (true);
            customType.StoreTermVectorPositions = (true);

            int             numDocs   = randomValues.Length * 5;
            int             numFields = 2 + Random().nextInt(5);
            int             numTerms  = 2 + Random().nextInt(3);
            List <Doc>      docs      = new List <Doc>(numDocs);
            List <Document> documents = new List <Document>(numDocs);
            IDictionary <String, ISet <int> > valueToDocId = new HashMap <String, ISet <int> >();

            for (int i = 0; i < numDocs; i++)
            {
                Document   document = new Document();
                String[][] fields   = RectangularArrays.ReturnRectangularArray <string>(numFields, numTerms); //new String[numFields][numTerms];
                for (int j = 0; j < numFields; j++)
                {
                    String[] fieldValues = new String[numTerms];
                    fieldValues[0] = getRandomValue(randomValues, valueToDocId, i);
                    StringBuilder builder = new StringBuilder(fieldValues[0]);
                    for (int k = 1; k < numTerms; k++)
                    {
                        fieldValues[k] = getRandomValue(randomValues, valueToDocId, i);
                        builder.append(' ').append(fieldValues[k]);
                    }
                    document.Add(new Field(F, builder.toString(), customType));
                    fields[j] = fieldValues;
                }
                docs.Add(new Doc(fields));
                documents.Add(document);
            }
            writer.AddDocuments(documents);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(dir);

            try
            {
                int highlightIters = 1 + Random().nextInt(120 * RANDOM_MULTIPLIER);
                for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++)
                {
                    String queryTerm = randomValues[Random().nextInt(randomValues.Length)];
                    var    iter      = valueToDocId[queryTerm].GetEnumerator();
                    iter.MoveNext();
                    int randomHit = iter.Current;
                    List <StringBuilder> builders = new List <StringBuilder>();
                    foreach (String[] fieldValues in docs[randomHit].fieldValues)
                    {
                        StringBuilder builder = new StringBuilder();
                        bool          hit     = false;
                        for (int i = 0; i < fieldValues.Length; i++)
                        {
                            if (queryTerm.equals(fieldValues[i]))
                            {
                                builder.append("<b>").append(queryTerm).append("</b>");
                                hit = true;
                            }
                            else
                            {
                                builder.append(fieldValues[i]);
                            }
                            if (i != fieldValues.Length - 1)
                            {
                                builder.append(' ');
                            }
                        }
                        if (hit)
                        {
                            builders.Add(builder);
                        }
                    }

                    FieldQuery     fq    = new FieldQuery(tq(queryTerm), true, true);
                    FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq);

                    FieldPhraseList       fpl  = new FieldPhraseList(stack, fq);
                    SimpleFragListBuilder sflb = new SimpleFragListBuilder(100);
                    FieldFragList         ffl  = sflb.CreateFieldFragList(fpl, 300);

                    SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
                    sfb.IsDiscreteMultiValueHighlighting = (true);
                    String[] actualFragments = sfb.CreateFragments(reader, randomHit, F, ffl, numFields);
                    assertEquals(builders.size(), actualFragments.Length);
                    for (int i = 0; i < actualFragments.Length; i++)
                    {
                        assertEquals(builders[i].toString(), actualFragments[i]);
                    }
                }
            }
            finally
            {
                reader.Dispose();
                dir.Dispose();
            }
        }
示例#26
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        ///  amount of temporary RAM while building.  Note that
        ///  the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(IInputIterator iterator, double ramBufferSizeMB)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();

            // LUCENENET specific - using GetRandomFileName() instead of picking a random int
            DirectoryInfo tempIndexPath = null;

            while (true)
            {
                tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName())));
                tempIndexPath.Create();
                if (System.IO.Directory.Exists(tempIndexPath.FullName))
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            try
            {
#pragma warning disable 612, 618
                IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer);
#pragma warning restore 612, 618
                iwc.SetOpenMode(OpenMode.CREATE);
                iwc.SetRAMBufferSizeMB(ramBufferSizeMB);
                IndexWriter writer = new IndexWriter(dir, iwc);

                var ft = new FieldType(TextField.TYPE_NOT_STORED);
                // TODO: if only we had IndexOptions.TERMS_ONLY...
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                ft.OmitNorms    = true;
                ft.Freeze();

                Document doc   = new Document();
                Field    field = new Field("body", "", ft);
                doc.Add(field);

                totTokens = 0;
                IndexReader reader = null;

                bool success = false;
                count = 0;
                try
                {
                    while (true)
                    {
                        BytesRef surfaceForm = iterator.Next();
                        if (surfaceForm == null)
                        {
                            break;
                        }
                        field.SetStringValue(surfaceForm.Utf8ToString());
                        writer.AddDocument(doc);
                        count++;
                    }
                    reader = DirectoryReader.Open(writer, false);

                    Terms terms = MultiFields.GetTerms(reader, "body");
                    if (terms == null)
                    {
                        throw new System.ArgumentException("need at least one suggestion");
                    }

                    // Move all ngrams into an FST:
                    TermsEnum termsEnum = terms.GetIterator(null);

                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                    Int32sRef scratchInts = new Int32sRef();
                    while (true)
                    {
                        BytesRef term = termsEnum.Next();
                        if (term == null)
                        {
                            break;
                        }
                        int ngramCount = CountGrams(term);
                        if (ngramCount > grams)
                        {
                            throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                        }
                        if (ngramCount == 1)
                        {
                            totTokens += termsEnum.TotalTermFreq;
                        }

                        builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq));
                    }

                    fst = builder.Finish();
                    if (fst == null)
                    {
                        throw new System.ArgumentException("need at least one suggestion");
                    }
                    //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                    /*
                     * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                     * Util.toDot(fst, pw, true, true);
                     * pw.close();
                     */

                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(writer, reader);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(writer, reader);
                    }
                }
            }
            finally
            {
                try
                {
                    IOUtils.Dispose(dir);
                }
                finally
                {
                    // LUCENENET specific - since we are removing the entire directory anyway,
                    // it doesn't make sense to first do a loop in order remove the files.
                    // Let the System.IO.Directory.Delete() method handle that.
                    // We also need to dispose the Directory instance first before deleting from disk.
                    try
                    {
                        System.IO.Directory.Delete(tempIndexPath.FullName, true);
                    }
                    catch (Exception e)
                    {
                        throw new InvalidOperationException("failed to remove " + tempIndexPath, e);
                    }
                }
            }
        }
        public void TestRandom()
        {
            int numberOfRuns = TestUtil.NextInt32(Random, 3, 6);

            for (int iter = 0; iter < numberOfRuns; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
                }

                int numDocs   = TestUtil.NextInt32(Random, 100, 1000) * RANDOM_MULTIPLIER;
                int numGroups = TestUtil.NextInt32(Random, 1, numDocs);

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
                }

                List <BytesRef> groups = new List <BytesRef>();
                for (int i = 0; i < numGroups; i++)
                {
                    string randomValue;
                    do
                    {
                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
                        // For that reason we don't generate empty string groups.
                        randomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    } while ("".Equals(randomValue, StringComparison.Ordinal));
                    groups.Add(new BytesRef(randomValue));
                }
                string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)];
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: create fake content");
                }
                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.append("real").append(Random.nextInt(3)).append(' ');
                    int fakeCount = Random.nextInt(10);
                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
                    {
                        sb.append("fake ");
                    }
                    contentStrings[contentIDX] = sb.toString();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + sb.toString());
                    }
                }

                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
                    Random,
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                         new MockAnalyzer(Random)));
                bool          preFlex   = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
                bool          canUseIDV = !preFlex;
                DocValuesType valueType = vts[Random.nextInt(vts.Length)];

                Document doc        = new Document();
                Document docNoGroup = new Document();
                Field    group      = NewStringField("group", "", Field.Store.NO);
                doc.Add(group);
                Field valuesField = null;
                if (canUseIDV)
                {
                    switch (valueType)
                    {
                    case DocValuesType.BINARY:
                        valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
                        break;

                    case DocValuesType.SORTED:
                        valuesField = new SortedDocValuesField("group_dv", new BytesRef());
                        break;

                    default:
                        fail("unhandled type");
                        break;
                    }
                    doc.Add(valuesField);
                }
                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
                doc.Add(sort1);
                docNoGroup.Add(sort1);
                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
                doc.Add(sort2);
                docNoGroup.Add(sort2);
                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
                doc.Add(sort3);
                docNoGroup.Add(sort3);
                Field content = NewTextField("content", "", Field.Store.NO);
                doc.Add(content);
                docNoGroup.Add(content);
                Int32Field id = new Int32Field("id", 0, Field.Store.NO);
                doc.Add(id);
                docNoGroup.Add(id);
                GroupDoc[] groupDocs = new GroupDoc[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    BytesRef groupValue;
                    if (Random.nextInt(24) == 17)
                    {
                        // So we test the "doc doesn't have the group'd
                        // field" case:
                        groupValue = null;
                    }
                    else
                    {
                        groupValue = groups[Random.nextInt(groups.size())];
                    }

                    GroupDoc groupDoc = new GroupDoc(
                        i,
                        groupValue,
                        groups[Random.nextInt(groups.size())],
                        groups[Random.nextInt(groups.size())],
                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
                        contentStrings[Random.nextInt(contentStrings.Length)]
                        );

                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
                    }

                    groupDocs[i] = groupDoc;
                    if (groupDoc.group != null)
                    {
                        group.SetStringValue(groupDoc.group.Utf8ToString());
                        if (canUseIDV)
                        {
                            valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString()));
                        }
                    }
                    sort1.SetStringValue(groupDoc.sort1.Utf8ToString());
                    sort2.SetStringValue(groupDoc.sort2.Utf8ToString());
                    sort3.SetStringValue(groupDoc.sort3.Utf8ToString());
                    content.SetStringValue(groupDoc.content);
                    id.SetInt32Value(groupDoc.id);
                    if (groupDoc.group == null)
                    {
                        w.AddDocument(docNoGroup);
                    }
                    else
                    {
                        w.AddDocument(doc);
                    }
                }

                DirectoryReader r = w.GetReader();
                w.Dispose();

                // NOTE: intentional but temporary field cache insanity!
                FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);
                int[]             fieldIdToDocID = new int[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    int fieldId = docIdToFieldId.Get(i);
                    fieldIdToDocID[fieldId] = i;
                }

                try
                {
                    IndexSearcher s = NewSearcher(r);
                    if (typeof(SlowCompositeReaderWrapper).GetTypeInfo().IsAssignableFrom(s.IndexReader.GetType()))
                    {
                        canUseIDV = false;
                    }
                    else
                    {
                        canUseIDV = !preFlex;
                    }

                    for (int contentID = 0; contentID < 3; contentID++)
                    {
                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
                        foreach (ScoreDoc hit in hits)
                        {
                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
                            assertTrue(gd.score == 0.0);
                            gd.score = hit.Score;
                            int docId = gd.id;
                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
                        }
                    }

                    foreach (GroupDoc gd in groupDocs)
                    {
                        assertTrue(gd.score != 0.0);
                    }

                    for (int searchIter = 0; searchIter < 100; searchIter++)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: searchIter=" + searchIter);
                        }

                        string searchTerm      = "real" + Random.nextInt(3);
                        bool   sortByScoreOnly = Random.nextBoolean();
                        Sort   sortWithinGroup = GetRandomSort(sortByScoreOnly);
                        AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
                        int[] actualGroupHeads   = allGroupHeadsCollector.RetrieveGroupHeads();
                        // The actual group heads contains Lucene ids. Need to change them into our id value.
                        for (int i = 0; i < actualGroupHeads.Length; i++)
                        {
                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
                        }
                        // Allows us the easily iterate and assert the actual and expected results.
                        Array.Sort(expectedGroupHeads);
                        Array.Sort(actualGroupHeads);

                        if (VERBOSE)
                        {
                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
                            Console.WriteLine("Sort within group: " + sortWithinGroup);
                            Console.WriteLine("Num group: " + numGroups);
                            Console.WriteLine("Num doc: " + numDocs);
                            Console.WriteLine("\n=== Expected: \n");
                            foreach (int expectedDocId in expectedGroupHeads)
                            {
                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                                string   expectedGroup    = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
                                                  expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
                                    );
                            }
                            Console.WriteLine("\n=== Actual: \n");
                            foreach (int actualDocId in actualGroupHeads)
                            {
                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
                                string   actualGroup    = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
                                                  actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
                                    );
                            }
                            Console.WriteLine("\n===================================================================================");
                        }

                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
                    }
                }
                finally
                {
                    QueryUtils.PurgeFieldCache(r);
                }

                r.Dispose();
                dir.Dispose();
            }
        }
示例#28
0
        public virtual void TestDocValuesIntegration()
        {
            AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues);
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
            doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
            doc.Add(new NumericDocValuesField("numeric", 42));
            if (DefaultCodecSupportsSortedSet)
            {
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
            }
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();
            AtomicReader ar = GetOnlySegmentReader(ir);

            BytesRef scratch = new BytesRef();

            // Binary type: can be retrieved via getTerms()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "binary", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true);
            binary.Get(0, scratch);
            Assert.AreEqual("binary value", scratch.Utf8ToString());

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary");
            Assert.IsTrue(bits.Get(0));

            // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "sorted", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "sorted");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true);
            binary.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted");
            Assert.AreEqual(0, sorted.GetOrd(0));
            Assert.AreEqual(1, sorted.ValueCount);
            sorted.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted");
            sortedSet.SetDocument(0);
            Assert.AreEqual(0, sortedSet.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
            Assert.AreEqual(1, sortedSet.ValueCount);

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted");
            Assert.IsTrue(bits.Get(0));

            // Numeric type: can be retrieved via getInts() and so on
            Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false);
            Assert.AreEqual(42, numeric.Get(0));

            try
            {
                FieldCache.DEFAULT.GetTerms(ar, "numeric", true);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric");
            Assert.IsTrue(bits.Get(0));

            // SortedSet type: can be retrieved via getDocTermOrds()
            if (DefaultCodecSupportsSortedSet)
            {
                try
                {
                    FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTerms(ar, "sortedset", true);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    new DocTermOrds(ar, null, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset");
                sortedSet.SetDocument(0);
                Assert.AreEqual(0, sortedSet.NextOrd());
                Assert.AreEqual(1, sortedSet.NextOrd());
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
                Assert.AreEqual(2, sortedSet.ValueCount);

                bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset");
                Assert.IsTrue(bits.Get(0));
            }

            ir.Dispose();
            dir.Dispose();
        }
        public override void BeforeClass()
        {
            base.BeforeClass();

            dir   = NewDirectory();
            sdir1 = NewDirectory();
            sdir2 = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, new MockAnalyzer(Random));
            RandomIndexWriter swriter1 = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, sdir1, new MockAnalyzer(Random));
            RandomIndexWriter swriter2 = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, sdir2, new MockAnalyzer(Random));

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO));
                writer.AddDocument(doc);
                ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc);
            }
            writer.ForceMerge(1);
            swriter1.ForceMerge(1);
            swriter2.ForceMerge(1);
            writer.Dispose();
            swriter1.Dispose();
            swriter2.Dispose();

            reader   = DirectoryReader.Open(dir);
            searcher = NewSearcher(reader);

            multiReader   = new MultiReader(new IndexReader[] { DirectoryReader.Open(sdir1), DirectoryReader.Open(sdir2) }, true);
            multiSearcher = NewSearcher(multiReader);

            multiReaderDupls   = new MultiReader(new IndexReader[] { DirectoryReader.Open(sdir1), DirectoryReader.Open(dir) }, true);
            multiSearcherDupls = NewSearcher(multiReaderDupls);
        }
示例#30
0
        public void TestLazy()
        {
            int         id     = Random.nextInt(NUM_DOCS);
            IndexReader reader = DirectoryReader.Open(dir);

            try
            {
                Query         q        = new TermQuery(new Term("docid", "" + id));
                IndexSearcher searcher = NewSearcher(reader);
                ScoreDoc[]    hits     = searcher.Search(q, 100).ScoreDocs;
                assertEquals("Too many docs", 1, hits.Length);
                LazyTestingStoredFieldVisitor visitor
                    = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc),
                                                        FIELDS);
                reader.Document(hits[0].Doc, visitor);
                Document d = visitor.doc;

                int numFieldValues = 0;
                IDictionary <string, int> fieldValueCounts = new JCG.Dictionary <string, int>();

                // at this point, all FIELDS should be Lazy and unrealized
                foreach (IIndexableField f in d)
                {
                    numFieldValues++;
                    if (f.Name.Equals("never_load", StringComparison.Ordinal))
                    {
                        fail("never_load was loaded");
                    }
                    if (f.Name.Equals("load_later", StringComparison.Ordinal))
                    {
                        fail("load_later was loaded on first pass");
                    }
                    if (f.Name.Equals("docid", StringComparison.Ordinal))
                    {
                        assertFalse(f.Name, f is LazyDocument.LazyField);
                    }
                    else
                    {
                        if (!fieldValueCounts.TryGetValue(f.Name, out int count))
                        {
                            count = 0;
                        }
                        count++;
                        fieldValueCounts.Put(f.Name, count);
                        assertTrue(f.Name + " is " + f.GetType(),
                                   f is LazyDocument.LazyField);
                        LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                        assertFalse(f.Name + " is loaded", lf.HasBeenLoaded);
                    }
                }
                Console.WriteLine("numFieldValues == " + numFieldValues);
                assertEquals("numFieldValues", 1 + (NUM_VALUES * FIELDS.Length),
                             numFieldValues);

                foreach (string field in fieldValueCounts.Keys)
                {
                    assertEquals("fieldName count: " + field,
                                 NUM_VALUES, fieldValueCounts[field]);
                }

                // pick a single field name to load a single value
                string            fieldName   = FIELDS[Random.nextInt(FIELDS.Length)];
                IIndexableField[] fieldValues = d.GetFields(fieldName);
                assertEquals("#vals in field: " + fieldName,
                             NUM_VALUES, fieldValues.Length);
                int valNum = Random.nextInt(fieldValues.Length);
                assertEquals(id + "_" + fieldName + "_" + valNum,
                             fieldValues[valNum].GetStringValue());

                // now every value of fieldName should be loaded
                foreach (IIndexableField f in d)
                {
                    if (f.Name.Equals("never_load", StringComparison.Ordinal))
                    {
                        fail("never_load was loaded");
                    }
                    if (f.Name.Equals("load_later", StringComparison.Ordinal))
                    {
                        fail("load_later was loaded too soon");
                    }
                    if (f.Name.Equals("docid", StringComparison.Ordinal))
                    {
                        assertFalse(f.Name, f is LazyDocument.LazyField);
                    }
                    else
                    {
                        assertTrue(f.Name + " is " + f.GetType(),
                                   f is LazyDocument.LazyField);
                        LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                        assertEquals(f.Name + " is loaded?",
                                     lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded);
                    }
                }

                // use the same LazyDoc to ask for one more lazy field
                visitor = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc),
                                                            "load_later");
                reader.Document(hits[0].Doc, visitor);
                d = visitor.doc;

                // ensure we have all the values we expect now, and that
                // adding one more lazy field didn't "unload" the existing LazyField's
                // we already loaded.
                foreach (IIndexableField f in d)
                {
                    if (f.Name.Equals("never_load", StringComparison.Ordinal))
                    {
                        fail("never_load was loaded");
                    }
                    if (f.Name.Equals("docid", StringComparison.Ordinal))
                    {
                        assertFalse(f.Name, f is LazyDocument.LazyField);
                    }
                    else
                    {
                        assertTrue(f.Name + " is " + f.GetType(),
                                   f is LazyDocument.LazyField);
                        LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                        assertEquals(f.Name + " is loaded?",
                                     lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded);
                    }
                }

                // even the underlying doc shouldn't have never_load
                assertNull("never_load was loaded in wrapped doc",
                           visitor.lazyDoc.GetDocument().GetField("never_load"));
            }
            finally
            {
                reader.Dispose();
            }
        }
示例#31
0
        /// <summary>
        /// Construct a Taxonomy writer.
        /// </summary>
        /// <param name="directory">
        ///    The <seealso cref="Directory"/> in which to store the taxonomy. Note that
        ///    the taxonomy is written directly to that directory (not to a
        ///    subdirectory of it). </param>
        /// <param name="openMode">
        ///    Specifies how to open a taxonomy for writing: <code>APPEND</code>
        ///    means open an existing index for append (failing if the index does
        ///    not yet exist). <code>CREATE</code> means create a new index (first
        ///    deleting the old one if it already existed).
        ///    <code>APPEND_OR_CREATE</code> appends to an existing index if there
        ///    is one, otherwise it creates a new index. </param>
        /// <param name="cache">
        ///    A <seealso cref="TaxonomyWriterCache"/> implementation which determines
        ///    the in-memory caching policy. See for example
        ///    <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>.
        ///    If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param>
        /// <exception cref="CorruptIndexException">
        ///     if the taxonomy is corrupted. </exception>
        /// <exception cref="LockObtainFailedException">
        ///     if the taxonomy is locked by another writer. If it is known
        ///     that no other concurrent writer is active, the lock might
        ///     have been left around by an old dead process, and should be
        ///     removed using <seealso cref="#unlock(Directory)"/>. </exception>
        /// <exception cref="IOException">
        ///     if another error occurred. </exception>
        public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache)
        {
            dir = directory;
            IndexWriterConfig config = CreateIndexWriterConfig(openMode);

            indexWriter = OpenIndexWriter(dir, config);

            // verify (to some extent) that merge policy in effect would preserve category docids
            if (indexWriter != null)
            {
                Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed");
            }

            // after we opened the writer, and the index is locked, it's safe to check
            // the commit data and read the index epoch
            openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND;
            if (!DirectoryReader.IndexExists(directory))
            {
                indexEpoch = 1;
            }
            else
            {
                string epochStr = null;
                IDictionary <string, string> commitData = ReadCommitData(directory);
                if (commitData != null && commitData.ContainsKey(INDEX_EPOCH))
                {
                    epochStr = commitData[INDEX_EPOCH];
                }
                // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
                // of a better value.
                indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16);
            }

            if (openMode == OpenMode.CREATE)
            {
                ++indexEpoch;
            }

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.OmitNorms      = true;
            parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
            fullPathField     = new StringField(Consts.FULL, "", Field.Store.YES);

            if (indexWriter == null)
            {
                return;
            }

            nextID = indexWriter.MaxDoc;

            if (cache == null)
            {
                cache = DefaultTaxonomyWriterCache();
            }
            this.cache = cache;

            if (nextID == 0)
            {
                cacheIsComplete = true;
                // Make sure that the taxonomy always contain the root category
                // with category id 0.
                AddCategory(new FacetLabel());
            }
            else
            {
                // There are some categories on the disk, which we have not yet
                // read into the cache, and therefore the cache is incomplete.
                // We choose not to read all the categories into the cache now,
                // to avoid terrible performance when a taxonomy index is opened
                // to add just a single category. We will do it later, after we
                // notice a few cache misses.
                cacheIsComplete = false;
            }
        }
        /// <summary>
        /// Opens a <seealso cref="DirectoryTaxonomyReader"/> over the given
        /// <seealso cref="DirectoryTaxonomyWriter"/> (for NRT).
        /// </summary>
        /// <param name="taxoWriter">
        ///          The <seealso cref="DirectoryTaxonomyWriter"/> from which to obtain newly
        ///          added categories, in real-time. </param>
        public DirectoryTaxonomyReader(DirectoryTaxonomyWriter taxoWriter)
        {
            this.taxoWriter = taxoWriter;
            taxoEpoch = taxoWriter.TaxonomyEpoch;
            indexReader = OpenIndexReader(taxoWriter.InternalIndexWriter);

            // These are the default cache sizes; they can be configured after
            // construction with the cache's setMaxSize() method

            ordinalCache = new LRUHashMap<FacetLabel, IntClass>(DEFAULT_CACHE_VALUE);
            categoryCache = new LRUHashMap<int, FacetLabel>(DEFAULT_CACHE_VALUE);
        }
示例#33
0
        /// <summary>
        /// Look up the given category in the cache and/or the on-disk storage,
        /// returning the category's ordinal, or a negative number in case the
        /// category does not yet exist in the taxonomy.
        /// </summary>
        protected virtual int FindCategory(FacetLabel categoryPath)
        {
            lock (this)
            {
                // If we can find the category in the cache, or we know the cache is
                // complete, we can return the response directly from it
                int res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    return(res);
                }

                cacheMisses.IncrementAndGet();
                // After a few cache misses, it makes sense to read all the categories
                // from disk and into the cache. The reason not to do this on the first
                // cache miss (or even when opening the writer) is that it will
                // significantly slow down the case when a taxonomy is opened just to
                // add one category. The idea only spending a long time on reading
                // after enough time was spent on cache misses is known as an "online
                // algorithm".
                PerhapsFillCache();
                res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    // if after filling the cache from the info on disk, the category is in it
                    // or the cache is complete, return whatever cache.get returned.
                    return(res);
                }

                // if we get here, it means the category is not in the cache, and it is not
                // complete, and therefore we must look for the category on disk.

                // We need to get an answer from the on-disk index.
                InitReaderManager();

                int             doc    = -1;
                DirectoryReader reader = readerManager.Acquire();
                try
                {
                    BytesRef  catTerm   = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
                    TermsEnum termsEnum = null; // reuse
                    DocsEnum  docs      = null; // reuse
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.Terms(Consts.FULL);
                        if (terms != null)
                        {
                            termsEnum = terms.Iterator(termsEnum);
                            if (termsEnum.SeekExact(catTerm))
                            {
                                // liveDocs=null because the taxonomy has no deletes
                                docs = termsEnum.Docs(null, docs, 0); // freqs not required
                                // if the term was found, we know it has exactly one document.
                                doc = docs.NextDoc() + ctx.DocBase;
                                break;
                            }
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }
                if (doc > 0)
                {
                    AddToCache(categoryPath, doc);
                }
                return(doc);
            }
        }
        /// <summary>
        /// Open for reading a taxonomy stored in a given <seealso cref="Directory"/>.
        /// </summary>
        /// <param name="directory">
        ///          The <seealso cref="Directory"/> in which the taxonomy resides. </param>
        /// <exception cref="CorruptIndexException">
        ///           if the Taxonomy is corrupt. </exception>
        /// <exception cref="IOException">
        ///           if another error occurred. </exception>
        public DirectoryTaxonomyReader(Directory directory)
        {
            indexReader = OpenIndexReader(directory);
            taxoWriter = null;
            taxoEpoch = -1;

            // These are the default cache sizes; they can be configured after
            // construction with the cache's setMaxSize() method

            ordinalCache = new LRUHashMap<FacetLabel, IntClass>(DEFAULT_CACHE_VALUE);
            categoryCache = new LRUHashMap<int, FacetLabel>(DEFAULT_CACHE_VALUE);
        }
 public AssertingDirectoryReader(DirectoryReader @in)
     : base(@in, new AssertingSubReaderWrapper())
 {
 }