Ejemplo n.º 1
0
        /// <summary>
        /// Annotates the given sequence of <see cref="Document"/> objects by adding a <b>_highlight</b> field;
        /// the <b>_highlight</b> field will contain the best matching text fragment from the <see cref="Document"/> 
        /// object's full-text field.
        /// </summary>
        /// <param name="hits">The sequence of <see cref="Document"/> objects.</param>
        /// <param name="criteria">The search criteria that produced the hits.</param>
        /// <returns>
        /// The original sequence of Document objects, with a <b>_highlight</b> field added to each Document.
        /// </returns>
        public static IEnumerable<Document> GenerateHighlights(this IEnumerable<Document> hits, SearchCriteria criteria)
        {
            if (hits == null)
                throw new ArgumentNullException(nameof(hits));
            if (criteria == null)
                throw new ArgumentNullException(nameof(criteria));
            if (String.IsNullOrWhiteSpace(criteria.Query))
                throw new ArgumentException("SearchCriteria.Query cannot be empty");

            var documents = hits.ToList();
            try
            {
                var indexDirectory = new RAMDirectory();
                var analyzer = new FullTextAnalyzer();
                var config = new IndexWriterConfig(analyzer).SetRAMBufferSizeMB(_ramBufferSizeMB);
                var writer = new IndexWriter(indexDirectory, config);

                BuidIndex(documents, writer);
                GenerateHighlights(documents, writer, criteria);

                writer.DeleteAll();
                writer.Commit();
                writer.Close();
                indexDirectory.Close();
            }
            catch (Exception ex)
            {
                _log.Error(ex);
            }

            return documents;
        }
Ejemplo n.º 2
0
	  public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2)
	  {
		this.input = input;
		this.dir1 = dir1;
		this.dir2 = dir2;
		this.docsInFirstIndex = docsInFirstIndex;
		this.config1 = config1;
		this.config2 = config2;
	  }
 public override void SetUp()
 {
     base.SetUp();
     Dir = NewFSDirectory(CreateTempDir("testDFBlockSize"));
     Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     Iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
     Iw = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)Iwc.Clone());
     Iw.RandomForceMerge = false; // we will ourselves
 }
        private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf)
        {
            LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
            logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain
            // files
            conf.SetMergePolicy(logByteSizeMergePolicy);

            IndexWriter writer = new IndexWriter(dir, conf);
            return writer;
        }
        public ThumbnailIndexer(IndexPreferences indexPreferences)
        {
            this.indexPreferences = indexPreferences;

            if (!System.IO.Directory.Exists(Preferences.Instance.ThumbIndexFolder))
            {
                logger.Info("Creating thumbs folder: '{0}'", Preferences.Instance.ThumbIndexFolder);
                System.IO.Directory.CreateDirectory(Preferences.Instance.ThumbIndexFolder);
            }

            var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing());
            writer = new IndexWriter(
                FSDirectory.open(new java.io.File(Preferences.Instance.ThumbIndexFolder)),
                config);
            writer.commit();

            searchManager = new SearchManager(writer.getDirectory());

            tasks.Add(Task.Run(DequeueFiles));
            tasks.Add(Task.Run(DequeueFiles));
            tasks.Add(Task.Run(CommitTask));
        }
Ejemplo n.º 6
0
        public DocumentIndexer()
        {
            if (!System.IO.Directory.Exists(Preferences.Instance.MainIndexFolder))
            {
                logger.Info("Creating main index folder: '{0}'", Preferences.Instance.MainIndexFolder);
                System.IO.Directory.CreateDirectory(Preferences.Instance.MainIndexFolder);
            }
            else
            {
                logger.Info("Updating index at '{0}'", Preferences.Instance.MainIndexFolder);
            }

            var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing());
            mainWriter = new IndexWriter(
                FSDirectory.open(new java.io.File(Preferences.Instance.MainIndexFolder)),
                config);
            taxonomyWriter = new DirectoryTaxonomyWriter(
                FSDirectory.open(new java.io.File(Preferences.Instance.FacetIndexFolder)),
                IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

            indexPreferences = new IndexPreferences(mainWriter);
            thumbnailIndexer = new ThumbnailIndexer(indexPreferences);
            startTime = DateTime.Now;
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        /// amount of temporary RAM while building.  Note that
        /// the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(IInputEnumerator enumerator, double ramBufferSizeMB)
        {
            // LUCENENET: Added guard clause for null
            if (enumerator is null)
            {
                throw new ArgumentNullException(nameof(enumerator));
            }

            if (enumerator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();

            // LUCENENET specific - using GetRandomFileName() instead of picking a random int
            DirectoryInfo tempIndexPath; // LUCENENET: IDE0059: Remove unnecessary value assignment

            while (true)
            {
                tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName())));
                tempIndexPath.Create();
                if (System.IO.Directory.Exists(tempIndexPath.FullName))
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            try
            {
#pragma warning disable 612, 618
                IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer);
#pragma warning restore 612, 618
                iwc.SetOpenMode(OpenMode.CREATE);
                iwc.SetRAMBufferSizeMB(ramBufferSizeMB);
                IndexWriter writer = new IndexWriter(dir, iwc);

                var ft = new FieldType(TextField.TYPE_NOT_STORED);
                // TODO: if only we had IndexOptions.TERMS_ONLY...
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                ft.OmitNorms    = true;
                ft.Freeze();

                Document doc   = new Document();
                Field    field = new Field("body", "", ft);
                doc.Add(field);

                totTokens = 0;
                IndexReader reader = null;

                bool success = false;
                count = 0;
                try
                {
                    while (enumerator.MoveNext())
                    {
                        BytesRef surfaceForm = enumerator.Current;
                        field.SetStringValue(surfaceForm.Utf8ToString());
                        writer.AddDocument(doc);
                        count++;
                    }

                    reader = DirectoryReader.Open(writer, false);

                    Terms terms = MultiFields.GetTerms(reader, "body");
                    if (terms is null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }

                    // Move all ngrams into an FST:
                    TermsEnum termsEnum = terms.GetEnumerator(null);

                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                    Int32sRef scratchInts = new Int32sRef();
                    while (termsEnum.MoveNext())
                    {
                        BytesRef term       = termsEnum.Term;
                        int      ngramCount = CountGrams(term);
                        if (ngramCount > grams)
                        {
                            throw new ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                        }
                        if (ngramCount == 1)
                        {
                            totTokens += termsEnum.TotalTermFreq;
                        }

                        builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq));
                    }

                    fst = builder.Finish();
                    if (fst is null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }
                    //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                    /*
                     * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                     * Util.toDot(fst, pw, true, true);
                     * pw.close();
                     */

                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(writer, reader);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(writer, reader);
                    }
                }
            }
            finally
            {
                try
                {
                    IOUtils.Dispose(dir);
                }
                finally
                {
                    // LUCENENET specific - since we are removing the entire directory anyway,
                    // it doesn't make sense to first do a loop in order remove the files.
                    // Let the System.IO.Directory.Delete() method handle that.
                    // We also need to dispose the Directory instance first before deleting from disk.
                    try
                    {
                        System.IO.Directory.Delete(tempIndexPath.FullName, true);
                    }
                    catch (Exception e)
                    {
                        throw IllegalStateException.Create("failed to remove " + tempIndexPath, e);
                    }
                }
            }
        }
Ejemplo n.º 8
0
 public IndexWriterWrapper(LDirectory directory, IndexWriterConfig config) : base(directory, config)
 {
     IsClosing = false;
 }
Ejemplo n.º 9
0
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            //int ordLimit = TestNightly ? 100000 : 6000;
            // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under
            // the 1 hour free limit of Azure DevOps, this was reduced to 30000.
            int ordLimit = TestNightly ? 30000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousClass(stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop)
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.IsTrue(result.ChildCount > 0);
                            Assert.IsTrue(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, dir);
        }
Ejemplo n.º 10
0
        public virtual void TestChangeCodecAndMerge()
        {
            Directory dir = NewDirectory();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: make new index");
            }
            IndexWriterConfig iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                                            new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetCodec(new MockCodec());

            iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10);
            IndexWriter writer = NewWriter(dir, iwconf);

            AddDocs(writer, 10);
            writer.Commit();
            AssertQuery(new Term("content", "aaa"), dir, 10);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: addDocs3");
            }
            AddDocs3(writer, 10);
            writer.Commit();
            writer.Dispose();

            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "aaa"), dir, 10);
            Codec codec = iwconf.Codec;

            iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                     .SetOpenMode(OpenMode.APPEND).SetCodec(codec);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setNoCFSRatio(0.0);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10);
            iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);

            iwconf.SetCodec(new MockCodec2()); // uses standard for field content
            writer = NewWriter(dir, iwconf);
            // swap in new codec for currently written segments
            if (VERBOSE)
            {
                Console.WriteLine("TEST: add docs w/ Standard codec for content field");
            }
            AddDocs2(writer, 10);
            writer.Commit();
            codec = iwconf.Codec;
            Assert.AreEqual(30, writer.MaxDoc);
            AssertQuery(new Term("content", "bbb"), dir, 10);
            AssertQuery(new Term("content", "ccc"), dir, 10); ////
            AssertQuery(new Term("content", "aaa"), dir, 10);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: add more docs w/ new codec");
            }
            AddDocs2(writer, 10);
            writer.Commit();
            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "bbb"), dir, 20);
            AssertQuery(new Term("content", "aaa"), dir, 10);
            Assert.AreEqual(40, writer.MaxDoc);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now optimize");
            }
            writer.ForceMerge(1);
            Assert.AreEqual(40, writer.MaxDoc);
            writer.Dispose();
            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "bbb"), dir, 20);
            AssertQuery(new Term("content", "aaa"), dir, 10);

            dir.Dispose();
        }
Ejemplo n.º 11
0
 /// <summary>
 /// Open internal index writer, which contains the taxonomy data.
 /// <para>
 /// Extensions may provide their own <seealso cref="IndexWriter"/> implementation or instance.
 /// <br><b>NOTE:</b> the instance this method returns will be closed upon calling
 /// to <seealso cref="#close()"/>.
 /// <br><b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See
 /// comment in <seealso cref="#createIndexWriterConfig(IndexWriterConfig.OpenMode)"/> for the logic behind this.
 ///
 /// </para>
 /// </summary>
 /// <seealso cref= #createIndexWriterConfig(IndexWriterConfig.OpenMode)
 /// </seealso>
 /// <param name="directory">
 ///          the <seealso cref="Directory"/> on top of which an <seealso cref="IndexWriter"/>
 ///          should be opened. </param>
 /// <param name="config">
 ///          configuration for the internal index writer. </param>
 protected virtual IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config)
 {
     return(new IndexWriter(directory, config));
 }
Ejemplo n.º 12
0
        public async Task CreateIndexFileAsync(bool forceCreate)
        {
            SpinWait.SpinUntil(() => 0 == Interlocked.Read(ref this.ActiveSearchCount));

            Interlocked.Exchange(ref this.IsSearcherReady, 0);
            this.OnIndexCreating();

            DirectoryInfo indexDirectoryInfo = new DirectoryInfo(this.IndexPath);

            if (!forceCreate && this.CanReuseIndex(indexDirectoryInfo))
            {
                return;
            }

            this.Dispose(true);

            var files = indexDirectoryInfo.GetFiles();

            Array.ForEach(files, file =>
            {
                if (!file.Name.Equals("placeholder.txt", StringComparison.OrdinalIgnoreCase) &&
                    !file.Name.Equals("work.lock", StringComparison.OrdinalIgnoreCase))
                {
                    file.Delete();
                }
            });

            using (SimpleFSDirectory fsDirectory = new SimpleFSDirectory(indexDirectoryInfo))
            {
                using (var analyzer = new AdrivaAnalyzer(LuceneVersion.LUCENE_48))
                {
                    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer)
                    {
                    };
                    using (IndexWriter writer = new IndexWriter(fsDirectory, indexWriterConfig))
                    {
                        RawDataResult result = new RawDataResult
                        {
                            HasMore = false
                        };

                        do
                        {
                            result = await this.GetRawDataAsync(result.LastRowId);

                            var rawDataEnumerator = result.Items.GetEnumerator();

                            while (rawDataEnumerator.MoveNext())
                            {
                                Document document = this.ResolveDocument(rawDataEnumerator.Current);
                                if (null != document)
                                {
                                    writer.AddDocument(document, analyzer);
                                }
                            }
                        } while (result.HasMore);

                        writer.Commit();
                    }
                }
            }

            this.OnIndexCreated();
        }
Ejemplo n.º 13
0
            public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes)
            {
                this.OuterInstance = outerInstance;
                MyNodeID = nodeID;
                Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase"));
                // TODO: set warmer
                MockAnalyzer analyzer = new MockAnalyzer(Random());
                analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
                IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
                iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
                if (VERBOSE)
                {
                    iwc.InfoStream = new PrintStreamInfoStream(Console.Out);
                }
                Writer = new IndexWriter(Dir, iwc);
                Mgr = new SearcherManager(Writer, true, null);
                Searchers = new SearcherLifetimeManager();

                // Init w/ 0s... caller above will do initial
                // "broadcast" by calling initSearcher:
                CurrentNodeVersions = new long[numNodes];
            }
Ejemplo n.º 14
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.MergePolicy"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="mergePolicy"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetMergePolicy(this IndexWriterConfig config, MergePolicy mergePolicy)
 {
     config.MergePolicy = mergePolicy;
     return(config);
 }
Ejemplo n.º 15
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.WriteLockTimeout"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="writeLockTimeout"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetWriteLockTimeout(this IndexWriterConfig config, long writeLockTimeout)
 {
     config.WriteLockTimeout = writeLockTimeout;
     return(config);
 }
Ejemplo n.º 16
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.MergeScheduler"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="mergeScheduler"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetMergeScheduler(this IndexWriterConfig config, IMergeScheduler mergeScheduler)
 {
     config.MergeScheduler = mergeScheduler;
     return(config);
 }
Ejemplo n.º 17
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.Similarity"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="similarity"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetSimilarity(this IndexWriterConfig config, Similarity similarity)
 {
     config.Similarity = similarity;
     return(config);
 }
Ejemplo n.º 18
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.IndexCommit"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="commit"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetIndexCommit(this IndexWriterConfig config, IndexCommit commit)
 {
     config.IndexCommit = commit;
     return(config);
 }
Ejemplo n.º 19
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.IndexDeletionPolicy"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="deletionPolicy"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetIndexDeletionPolicy(this IndexWriterConfig config, IndexDeletionPolicy deletionPolicy)
 {
     config.IndexDeletionPolicy = deletionPolicy;
     return(config);
 }
Ejemplo n.º 20
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.OpenMode"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="openMode"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetOpenMode(this IndexWriterConfig config, OpenMode openMode)
 {
     config.OpenMode = openMode;
     return(config);
 }
Ejemplo n.º 21
0
        public override void Dispose()
        {
            lock (this)
            {
                // files that we tried to delete, but couldn't because readers were open.
                // all that matters is that we tried! (they will eventually go away)
                ISet<string> pendingDeletions = new HashSet<string>(OpenFilesDeleted);
                MaybeYield();
                if (OpenFiles == null)
                {
                    OpenFiles = new Dictionary<string, int>();
                    OpenFilesDeleted = new HashSet<string>();
                }
                if (OpenFiles.Count > 0)
                {
                    // print the first one as its very verbose otherwise
                    Exception cause = null;
                    IEnumerator<Exception> stacktraces = OpenFileHandles.Values.GetEnumerator();
                    if (stacktraces.MoveNext())
                    {
                        cause = stacktraces.Current;
                    }

                    // RuntimeException instead ofSystem.IO.IOException because
                    // super() does not throwSystem.IO.IOException currently:
                    throw new Exception("MockDirectoryWrapper: cannot close: there are still open files: "
                        + String.Join(" ,", OpenFiles.ToArray().Select(x => x.Key)), cause);
                }
                if (OpenLocks.Count > 0)
                {
                    throw new Exception("MockDirectoryWrapper: cannot close: there are still open locks: "
                        + String.Join(" ,", OpenLocks.ToArray()));
                }

                IsOpen = false;
                if (CheckIndexOnClose)
                {
                    RandomIOExceptionRate_Renamed = 0.0;
                    RandomIOExceptionRateOnOpen_Renamed = 0.0;
                    if (DirectoryReader.IndexExists(this))
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("\nNOTE: MockDirectoryWrapper: now crush");
                        }
                        Crash(); // corrupt any unsynced-files
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
                        }
                        TestUtil.CheckIndex(this, CrossCheckTermVectorsOnClose);

                        // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles
                        if (AssertNoUnreferencedFilesOnClose)
                        {
                            // now look for unreferenced files: discount ones that we tried to delete but could not
                            HashSet<string> allFiles = new HashSet<string>(Arrays.AsList(ListAll()));
                            allFiles.RemoveAll(pendingDeletions);
                            string[] startFiles = allFiles.ToArray(/*new string[0]*/);
                            IndexWriterConfig iwc = new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null);
                            iwc.SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
                            (new IndexWriter(@in, iwc)).Rollback();
                            string[] endFiles = @in.ListAll();

                            ISet<string> startSet = new SortedSet<string>(Arrays.AsList(startFiles));
                            ISet<string> endSet = new SortedSet<string>(Arrays.AsList(endFiles));

                            if (pendingDeletions.Contains("segments.gen") && endSet.Contains("segments.gen"))
                            {
                                // this is possible if we hit an exception while writing segments.gen, we try to delete it
                                // and it ends out in pendingDeletions (but IFD wont remove this).
                                startSet.Add("segments.gen");
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("MDW: Unreferenced check: Ignoring segments.gen that we could not delete.");
                                }
                            }

                            // its possible we cannot delete the segments_N on windows if someone has it open and
                            // maybe other files too, depending on timing. normally someone on windows wouldnt have
                            // an issue (IFD would nuke this stuff eventually), but we pass NoDeletionPolicy...
                            foreach (string file in pendingDeletions)
                            {
                                if (file.StartsWith("segments") && !file.Equals("segments.gen") && endSet.Contains(file))
                                {
                                    startSet.Add(file);
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("MDW: Unreferenced check: Ignoring segments file: " + file + " that we could not delete.");
                                    }
                                    SegmentInfos sis = new SegmentInfos();
                                    try
                                    {
                                        sis.Read(@in, file);
                                    }
                                    catch (System.IO.IOException ioe)
                                    {
                                        // OK: likely some of the .si files were deleted
                                    }

                                    try
                                    {
                                        ISet<string> ghosts = new HashSet<string>(sis.Files(@in, false));
                                        foreach (string s in ghosts)
                                        {
                                            if (endSet.Contains(s) && !startSet.Contains(s))
                                            {
                                                Debug.Assert(pendingDeletions.Contains(s));
                                                if (LuceneTestCase.VERBOSE)
                                                {
                                                    Console.WriteLine("MDW: Unreferenced check: Ignoring referenced file: " + s + " " + "from " + file + " that we could not delete.");
                                                }
                                                startSet.Add(s);
                                            }
                                        }
                                    }
                                    catch (Exception t)
                                    {
                                        Console.Error.WriteLine("ERROR processing leftover segments file " + file + ":");
                                        Console.WriteLine(t.ToString());
                                        Console.Write(t.StackTrace);
                                    }
                                }
                            }

                            startFiles = startSet.ToArray(/*new string[0]*/);
                            endFiles = endSet.ToArray(/*new string[0]*/);

                            if (!Arrays.Equals(startFiles, endFiles))
                            {
                                IList<string> removed = new List<string>();
                                foreach (string fileName in startFiles)
                                {
                                    if (!endSet.Contains(fileName))
                                    {
                                        removed.Add(fileName);
                                    }
                                }

                                IList<string> added = new List<string>();
                                foreach (string fileName in endFiles)
                                {
                                    if (!startSet.Contains(fileName))
                                    {
                                        added.Add(fileName);
                                    }
                                }

                                string extras;
                                if (removed.Count != 0)
                                {
                                    extras = "\n\nThese files were removed: " + removed;
                                }
                                else
                                {
                                    extras = "";
                                }

                                if (added.Count != 0)
                                {
                                    extras += "\n\nThese files were added (waaaaaaaaaat!): " + added;
                                }

                                if (pendingDeletions.Count != 0)
                                {
                                    extras += "\n\nThese files we had previously tried to delete, but couldn't: " + pendingDeletions;
                                }

                                Debug.Assert(false, "unreferenced files: before delete:\n    " + Arrays.ToString(startFiles) + "\n  after delete:\n    " + Arrays.ToString(endFiles) + extras);
                            }

                            DirectoryReader ir1 = DirectoryReader.Open(this);
                            int numDocs1 = ir1.NumDocs;
                            ir1.Dispose();
                            (new IndexWriter(this, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null))).Dispose();
                            DirectoryReader ir2 = DirectoryReader.Open(this);
                            int numDocs2 = ir2.NumDocs;
                            ir2.Dispose();
                            Debug.Assert(numDocs1 == numDocs2, "numDocs changed after opening/closing IW: before=" + numDocs1 + " after=" + numDocs2);
                        }
                    }
                }
                @in.Dispose();
            }
        }
        // NOTE: not a test; just here to make sure the code frag
        // in the javadocs is correct!
        public virtual void VerifyCompiles()
        {
            Analyzer analyzer = null;

            Directory fsDir = FSDirectory.Open(new DirectoryInfo("/path/to/index"));
            NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0);
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            IndexWriter writer = new IndexWriter(cachedFSDir, conf);
        }
Ejemplo n.º 23
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.Codec"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="codec"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetCodec(this IndexWriterConfig config, Codec codec)
 {
     config.Codec = codec;
     return(config);
 }
Ejemplo n.º 24
0
        // -- IndexWriterConfig.cs (members)

        /// <summary>
        /// Builder method for <see cref="IndexWriterConfig.DefaultWriteLockTimeout"/>.
        /// </summary>
        /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
        /// <param name="writeLockTimeout"></param>
        /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
        public static void SetDefaultWriteLockTimeout(this IndexWriterConfig config, long writeLockTimeout)
        {
            IndexWriterConfig.DefaultWriteLockTimeout = writeLockTimeout;
        }
Ejemplo n.º 25
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.IndexerThreadPool"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="threadPool"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 internal static IndexWriterConfig SetIndexerThreadPool(this IndexWriterConfig config, DocumentsWriterPerThreadPool threadPool)
 {
     config.IndexerThreadPool = threadPool;
     return(config);
 }
Ejemplo n.º 26
0
        /// <summary>
        /// Construct a Taxonomy writer.
        /// </summary>
        /// <param name="directory">
        ///    The <seealso cref="Directory"/> in which to store the taxonomy. Note that
        ///    the taxonomy is written directly to that directory (not to a
        ///    subdirectory of it). </param>
        /// <param name="openMode">
        ///    Specifies how to open a taxonomy for writing: <code>APPEND</code>
        ///    means open an existing index for append (failing if the index does
        ///    not yet exist). <code>CREATE</code> means create a new index (first
        ///    deleting the old one if it already existed).
        ///    <code>APPEND_OR_CREATE</code> appends to an existing index if there
        ///    is one, otherwise it creates a new index. </param>
        /// <param name="cache">
        ///    A <seealso cref="TaxonomyWriterCache"/> implementation which determines
        ///    the in-memory caching policy. See for example
        ///    <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>.
        ///    If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param>
        /// <exception cref="CorruptIndexException">
        ///     if the taxonomy is corrupted. </exception>
        /// <exception cref="LockObtainFailedException">
        ///     if the taxonomy is locked by another writer. If it is known
        ///     that no other concurrent writer is active, the lock might
        ///     have been left around by an old dead process, and should be
        ///     removed using <seealso cref="#unlock(Directory)"/>. </exception>
        /// <exception cref="IOException">
        ///     if another error occurred. </exception>
        public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache)
        {
            dir = directory;
            IndexWriterConfig config = CreateIndexWriterConfig(openMode);

            indexWriter = OpenIndexWriter(dir, config);

            // verify (to some extent) that merge policy in effect would preserve category docids
            if (indexWriter != null)
            {
                Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed");
            }

            // after we opened the writer, and the index is locked, it's safe to check
            // the commit data and read the index epoch
            openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND;
            if (!DirectoryReader.IndexExists(directory))
            {
                indexEpoch = 1;
            }
            else
            {
                string epochStr = null;
                IDictionary <string, string> commitData = ReadCommitData(directory);
                if (commitData != null && commitData.ContainsKey(INDEX_EPOCH))
                {
                    epochStr = commitData[INDEX_EPOCH];
                }
                // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
                // of a better value.
                indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16);
            }

            if (openMode == OpenMode.CREATE)
            {
                ++indexEpoch;
            }

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.OmitNorms      = true;
            parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
            fullPathField     = new StringField(Consts.FULL, "", Field.Store.YES);

            if (indexWriter == null)
            {
                return;
            }

            nextID = indexWriter.MaxDoc;

            if (cache == null)
            {
                cache = DefaultTaxonomyWriterCache();
            }
            this.cache = cache;

            if (nextID == 0)
            {
                cacheIsComplete = true;
                // Make sure that the taxonomy always contain the root category
                // with category id 0.
                AddCategory(new FacetLabel());
            }
            else
            {
                // There are some categories on the disk, which we have not yet
                // read into the cache, and therefore the cache is incomplete.
                // We choose not to read all the categories into the cache now,
                // to avoid terrible performance when a taxonomy index is opened
                // to add just a single category. We will do it later, after we
                // notice a few cache misses.
                cacheIsComplete = false;
            }
        }
Ejemplo n.º 27
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.MaxThreadStates"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="maxThreadStates"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetMaxThreadStates(this IndexWriterConfig config, int maxThreadStates)
 {
     config.MaxThreadStates = maxThreadStates;
     return(config);
 }
        public virtual void TestPostings()
        {
            Directory         dir = NewFSDirectory(CreateTempDir("postings"));
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetCodec(Codec.ForName("Lucene40"));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            Document doc = new Document();

            // id field
            FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);

            idType.StoreTermVectors = true;
            Field idField = new Field("id", "", idType);

            doc.Add(idField);

            // title field: short text field
            FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);

            titleType.StoreTermVectors         = true;
            titleType.StoreTermVectorPositions = true;
            titleType.StoreTermVectorOffsets   = true;
            titleType.IndexOptions             = IndexOptions();
            Field titleField = new Field("title", "", titleType);

            doc.Add(titleField);

            // body field: long text field
            FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);

            bodyType.StoreTermVectors         = true;
            bodyType.StoreTermVectorPositions = true;
            bodyType.StoreTermVectorOffsets   = true;
            bodyType.IndexOptions             = IndexOptions();
            Field bodyField = new Field("body", "", bodyType);

            doc.Add(bodyField);

            int numDocs = AtLeast(1000);

            for (int i = 0; i < numDocs; i++)
            {
                idField.SetStringValue(Convert.ToString(i));
                titleField.SetStringValue(FieldValue(1));
                bodyField.SetStringValue(FieldValue(3));
                iw.AddDocument(doc);
                if (Random().Next(20) == 0)
                {
                    iw.DeleteDocuments(new Term("id", Convert.ToString(i)));
                }
            }
            if (Random().NextBoolean())
            {
                // delete 1-100% of docs
                iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)]));
            }
            iw.Dispose();
            dir.Dispose(); // checkindex
        }
Ejemplo n.º 29
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.UseReaderPooling"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="readerPooling"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetReaderPooling(this IndexWriterConfig config, bool readerPooling)
 {
     config.UseReaderPooling = readerPooling;
     return(config);
 }
Ejemplo n.º 30
0
        public virtual void TestDocValuesIntegration()
        {
            AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues);
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
            doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
            doc.Add(new NumericDocValuesField("numeric", 42));
            if (DefaultCodecSupportsSortedSet)
            {
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
            }
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();
            AtomicReader ar = GetOnlySegmentReader(ir);

            BytesRef scratch = new BytesRef();

            // Binary type: can be retrieved via getTerms()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "binary", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true);
            binary.Get(0, scratch);
            Assert.AreEqual("binary value", scratch.Utf8ToString());

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary");
            Assert.IsTrue(bits.Get(0));

            // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "sorted", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "sorted");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true);
            binary.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted");
            Assert.AreEqual(0, sorted.GetOrd(0));
            Assert.AreEqual(1, sorted.ValueCount);
            sorted.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted");
            sortedSet.SetDocument(0);
            Assert.AreEqual(0, sortedSet.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
            Assert.AreEqual(1, sortedSet.ValueCount);

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted");
            Assert.IsTrue(bits.Get(0));

            // Numeric type: can be retrieved via getInts() and so on
            Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false);
            Assert.AreEqual(42, numeric.Get(0));

            try
            {
                FieldCache.DEFAULT.GetTerms(ar, "numeric", true);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric");
            Assert.IsTrue(bits.Get(0));

            // SortedSet type: can be retrieved via getDocTermOrds()
            if (DefaultCodecSupportsSortedSet)
            {
                try
                {
                    FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTerms(ar, "sortedset", true);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    new DocTermOrds(ar, null, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset");
                sortedSet.SetDocument(0);
                Assert.AreEqual(0, sortedSet.NextOrd());
                Assert.AreEqual(1, sortedSet.NextOrd());
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
                Assert.AreEqual(2, sortedSet.ValueCount);

                bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset");
                Assert.IsTrue(bits.Get(0));
            }

            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 31
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.IndexingChain"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="indexingChain"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 internal static IndexWriterConfig SetIndexingChain(this IndexWriterConfig config, DocumentsWriterPerThread.IndexingChain indexingChain)
 {
     config.IndexingChain = indexingChain;
     return(config);
 }
Ejemplo n.º 32
0
        public static IndexWriterConfig CreateWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit)
        {
            // :Post-Release-Update-Version.LUCENE_XY:
            LuceneVersion     version = (LuceneVersion)Enum.Parse(typeof(LuceneVersion), config.Get("writer.version", LuceneVersion.LUCENE_48.ToString()));
            IndexWriterConfig iwConf  = new IndexWriterConfig(version, runData.Analyzer);

            iwConf.OpenMode = mode;
            IndexDeletionPolicy indexDeletionPolicy = GetIndexDeletionPolicy(config);

            iwConf.IndexDeletionPolicy = indexDeletionPolicy;
            if (commit != null)
            {
                iwConf.IndexCommit = commit;
            }


            string mergeScheduler = config.Get("merge.scheduler",
                                               "Lucene.Net.Index.ConcurrentMergeScheduler, Lucene.Net");

#if !FEATURE_CONCURRENTMERGESCHEDULER
            // LUCENENET specific - hack to get our TaskMergeScheduler
            // when a ConcurrentMergeScheduler is requested.
            if (mergeScheduler.Contains(".ConcurrentMergeScheduler,"))
            {
                mergeScheduler = "Lucene.Net.Index.TaskMergeScheduler, Lucene.Net";
            }
#endif
            Type mergeSchedulerType = Type.GetType(mergeScheduler);
            if (mergeSchedulerType == null)
            {
                throw new Exception("Unrecognized merge scheduler type '" + mergeScheduler + "'");
            }
            else if (mergeSchedulerType.Equals(typeof(NoMergeScheduler)))
            {
                iwConf.MergeScheduler = NoMergeScheduler.INSTANCE;
            }
            else
            {
                try
                {
                    iwConf.MergeScheduler = (IMergeScheduler)Activator.CreateInstance(mergeSchedulerType);
                }
                catch (Exception e)
                {
                    throw new Exception("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
                }

                if (mergeScheduler.Equals("Lucene.Net.Index.ConcurrentMergeScheduler", StringComparison.Ordinal))
                {
#if FEATURE_CONCURRENTMERGESCHEDULER
                    ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler)iwConf.MergeScheduler;
                    int maxThreadCount           = config.Get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.DEFAULT_MAX_THREAD_COUNT);
                    int maxMergeCount            = config.Get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.DEFAULT_MAX_MERGE_COUNT);
#else
                    TaskMergeScheduler cms = (TaskMergeScheduler)iwConf.MergeScheduler;
                    int maxThreadCount     = config.Get("concurrent.merge.scheduler.max.thread.count", 1);
                    int maxMergeCount      = config.Get("concurrent.merge.scheduler.max.merge.count", 2);
#endif
                    cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount);
                }
            }

            string defaultCodec = config.Get("default.codec", null);
            if (defaultCodec != null)
            {
                try
                {
                    Type clazz = Type.GetType(defaultCodec);
                    iwConf.Codec = (Codec)Activator.CreateInstance(clazz);
                }
                catch (Exception e)
                {
                    throw new Exception("Couldn't instantiate Codec: " + defaultCodec, e);
                }
            }

            string mergePolicy = config.Get("merge.policy",
                                            "Lucene.Net.Index.LogByteSizeMergePolicy, Lucene.Net");
            bool isCompound      = config.Get("compound", true);
            Type mergePolicyType = Type.GetType(mergePolicy);
            if (mergePolicyType == null)
            {
                throw new Exception("Unrecognized merge policy type '" + mergePolicy + "'");
            }
            else if (mergePolicyType.Equals(typeof(NoMergePolicy)))
            {
                iwConf.MergePolicy = isCompound ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES;
            }
            else
            {
                try
                {
                    iwConf.MergePolicy = (MergePolicy)Activator.CreateInstance(mergePolicyType);
                }
                catch (Exception e)
                {
                    throw new Exception("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
                }
                iwConf.MergePolicy.NoCFSRatio = isCompound ? 1.0 : 0.0;
                if (iwConf.MergePolicy is LogMergePolicy)
                {
                    LogMergePolicy logMergePolicy = (LogMergePolicy)iwConf.MergePolicy;
                    logMergePolicy.MergeFactor = config.Get("merge.factor", OpenIndexTask.DEFAULT_MERGE_PFACTOR);
                }
            }
            double ramBuffer   = config.Get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
            int    maxBuffered = config.Get("max.buffered", OpenIndexTask.DEFAULT_MAX_BUFFERED);
            if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH)
            {
                iwConf.RAMBufferSizeMB = ramBuffer;
                iwConf.MaxBufferedDocs = maxBuffered;
            }
            else
            {
                iwConf.MaxBufferedDocs = maxBuffered;
                iwConf.RAMBufferSizeMB = ramBuffer;
            }

            return(iwConf);
        }
Ejemplo n.º 33
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.FlushPolicy"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="flushPolicy"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 internal static IndexWriterConfig SetFlushPolicy(this IndexWriterConfig config, FlushPolicy flushPolicy)
 {
     config.FlushPolicy = flushPolicy;
     return(config);
 }
Ejemplo n.º 34
0
        public virtual void TestNRTAndCommit()
        {
            Directory           dir       = NewDirectory();
            NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
            MockAnalyzer        analyzer  = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter w    = new RandomIndexWriter(Random, cachedDir, conf);
            LineFileDocs      docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int numDocs            = TestUtil.NextInt32(Random, 100, 400);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }

            IList <BytesRef> ids = new List <BytesRef>();
            DirectoryReader  r   = null;

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = docs.NextDoc();
                ids.Add(new BytesRef(doc.Get("docid")));
                w.AddDocument(doc);
                if (Random.Next(20) == 17)
                {
                    if (r == null)
                    {
                        r = DirectoryReader.Open(w.IndexWriter, false);
                    }
                    else
                    {
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                        if (r2 != null)
                        {
                            r.Dispose();
                            r = r2;
                        }
                    }
                    Assert.AreEqual(1 + docCount, r.NumDocs);
                    IndexSearcher s = NewSearcher(r);
                    // Just make sure search can run; we can't assert
                    // totHits since it could be 0
                    TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10);
                    // System.out.println("tot hits " + hits.totalHits);
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            // Close should force cache to clear since all files are sync'd
            w.Dispose();

            string[] cachedFiles = cachedDir.ListCachedFiles();
            foreach (string file in cachedFiles)
            {
                Console.WriteLine("FAIL: cached file " + file + " remains after sync");
            }
            Assert.AreEqual(0, cachedFiles.Length);

            r = DirectoryReader.Open(dir);
            foreach (BytesRef id in ids)
            {
                Assert.AreEqual(1, r.DocFreq(new Term("docid", id)));
            }
            r.Dispose();
            cachedDir.Dispose();
            docs.Dispose();
        }
Ejemplo n.º 35
0
 /// <summary>
 /// Builder method for <see cref="IndexWriterConfig.RAMPerThreadHardLimitMB"/>.
 /// </summary>
 /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param>
 /// <param name="perThreadHardLimitMB"></param>
 /// <returns>this <see cref="IndexWriterConfig"/> instance</returns>
 public static IndexWriterConfig SetRAMPerThreadHardLimitMB(this IndexWriterConfig config, int perThreadHardLimitMB)
 {
     config.RAMPerThreadHardLimitMB = perThreadHardLimitMB;
     return(config);
 }
Ejemplo n.º 36
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void createIndex(IndexWriterConfig config, org.apache.lucene.store.Directory target, IndexReader reader, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException
	  private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, bool negateFilter)
	  {
		bool success = false;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final IndexWriter w = new IndexWriter(target, config);
		IndexWriter w = new IndexWriter(target, config);
		try
		{
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.List<AtomicReaderContext> leaves = reader.leaves();
		  IList<AtomicReaderContext> leaves = reader.leaves();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final IndexReader[] subReaders = new IndexReader[leaves.size()];
		  IndexReader[] subReaders = new IndexReader[leaves.Count];
		  int i = 0;
		  foreach (AtomicReaderContext ctx in leaves)
		  {
			subReaders[i++] = new DocumentFilteredAtomicIndexReader(ctx, preserveFilter, negateFilter);
		  }
		  w.addIndexes(subReaders);
		  success = true;
		}
		finally
		{
		  if (success)
		  {
			IOUtils.close(w);
		  }
		  else
		  {
			IOUtils.closeWhileHandlingException(w);
		  }
		}
	  }
 private void Populate(Directory directory, IndexWriterConfig config)
 {
     RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, config);
     for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++)
     {
         Document document = new Document();
         for (int f = 0; f < NUMBER_OF_FIELDS; f++)
         {
             document.Add(NewStringField("field" + f, Text, Field.Store.NO));
         }
         writer.AddDocument(document);
     }
     writer.ForceMerge(1);
     writer.Dispose();
 }
Ejemplo n.º 38
0
	  public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) : this(input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2)
	  {
	  }
Ejemplo n.º 39
0
        /// <summary>
        /// Initializes a new instance of the <see cref="LuceneIndex" /> class.
        /// </summary>
        /// <param name="indexPath">The path to the directory that will contain the Lucene index files.</param>
        /// <param name="schema">The schema.</param>
        /// <exception cref="System.ArgumentNullException"></exception>
        public LuceneIndex(string indexPath, Schema schema)
        {
            if (String.IsNullOrWhiteSpace(indexPath))
                throw new ArgumentNullException(nameof(indexPath)); 
            if (schema == null)
                throw new ArgumentNullException(nameof(schema));

            IndexPath = indexPath;
            Schema = schema;

            if (System.IO.Directory.Exists(IndexPath))
            {
                if (Schema.IsDefault())
                    throw new InvalidOperationException($"There is an existing index on '{IndexPath}'.");
            }                
            else
            {
                System.IO.Directory.CreateDirectory(IndexPath);
            }                        

            _indexDirectory = new MMapDirectory(Paths.get(IndexPath));

            var taxonomyIndexPath = System.IO.Path.Combine(IndexPath, "taxonomy");
            if (!System.IO.Directory.Exists(taxonomyIndexPath))            
                System.IO.Directory.CreateDirectory(taxonomyIndexPath);

            _taxonomyDirectory = new MMapDirectory(Paths.get(taxonomyIndexPath));         
                           
            _compositeAnalyzer = new CompositeAnalyzer(Schema);            

            _ramBufferSizeMB = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.RAMBufferSizeMB"] ?? "128");

            var config = new IndexWriterConfig(_compositeAnalyzer)                            
                            .SetOpenMode(IndexWriterConfigOpenMode.CREATE_OR_APPEND)
                            .SetRAMBufferSizeMB(_ramBufferSizeMB)
                            .SetCommitOnClose(true);                            
            
            _indexWriter = new IndexWriter(_indexDirectory, config);
            _taxonomyWriter = new DirectoryTaxonomyWriter(_taxonomyDirectory, IndexWriterConfigOpenMode.CREATE_OR_APPEND);

            _searcherTaxonomyManager = new SearcherTaxonomyManager(_indexWriter, true, null, _taxonomyWriter);            
            _facetBuilder = new LuceneFacetBuilder(_taxonomyWriter);                        

            _refreshIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexSearcher.RefreshIntervalSeconds"] ?? "0.5");    
            _commitIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.CommitIntervalSeconds"] ?? "60");
           
            _writeAllowedFlag = new ManualResetEventSlim(true);

            _refreshTimer = new Timer(o => Refresh(), null, TimeSpan.FromSeconds(_refreshIntervalSeconds), TimeSpan.FromSeconds(_refreshIntervalSeconds));
            _commitTimer = new Timer(o => Commit(), null, TimeSpan.FromSeconds(_commitIntervalSeconds), TimeSpan.FromSeconds(_commitIntervalSeconds));

        }
Ejemplo n.º 40
0
        public virtual void TestBooleanSpanQuery()
        {
            bool failed = false;
            int hits = 0;
            Directory directory = NewDirectory();
            Analyzer indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            string FIELD = "content";
            Document d = new Document();
            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher = NewSearcher(indexReader);

            BooleanQuery query = new BooleanQuery();
            SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
            query.Add(sq1, BooleanClause.Occur.SHOULD);
            query.Add(sq2, BooleanClause.Occur.SHOULD);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
            searcher.Search(query, collector);
            hits = collector.TopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries");
            Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries");
            directory.Dispose();
        }
Ejemplo n.º 41
0
        /// <summary>Index all text files under a directory.</summary>
        public static void Main(string[] args)
        {
            // The <CONSOLE_APP_NAME> should be the assembly name of the application
            // this code is compiled into. In .NET Framework, it is the name of the EXE file.
            // In .NET Core, you have the option of compiling this into either an EXE or a DLL
            // (see https://docs.microsoft.com/en-us/dotnet/core/deploying/index).
            // In the latter case, the <CONSOLE_APP_NAME> will be "dotnet <DLL_NAME>.dll".
            string usage = "Usage: <CONSOLE_APP_NAME> <INDEX_DIRECTORY> <SOURCE_DIRECTORY> "
                           + "[-u|--update]\n\n"
                           + "This indexes the documents in <SOURCE_DIRECTORY>, creating a Lucene index"
                           + "in <INDEX_DIRECTORY> that can be searched with the search-files demo.";

            // Validate required arguments are present.
            // If not, show usage information.
            if (args.Length < 2)
            {
                Console.WriteLine(usage);
                Environment.Exit(1);
            }
            string indexPath  = args[0];
            string sourcePath = args[1];
            bool   create     = true;

            for (int i = 0; i < args.Length; i++)
            {
                if ("-u".Equals(args[i], StringComparison.Ordinal) || "--update".Equals(args[i], StringComparison.Ordinal))
                {
                    create = false;
                }
            }

            DirectoryInfo sourceDirectory = new DirectoryInfo(sourcePath);

            if (!sourceDirectory.Exists)
            {
                Console.WriteLine("Source directory '" + sourcePath + "' does not exist, please check the path");
                Environment.Exit(1);
            }

            DateTime start = DateTime.UtcNow;

            try
            {
                Console.WriteLine("Indexing to directory '" + indexPath + "'...");

                Store.Directory dir = FSDirectory.Open(indexPath);
                // :Post-Release-Update-Version.LUCENE_XY:
                Analyzer          analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
                IndexWriterConfig iwc      = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);

                if (create)
                {
                    // Create a new index in the directory, removing any
                    // previously indexed documents:
                    iwc.OpenMode = OpenMode.CREATE;
                }
                else
                {
                    // Add new documents to an existing index:
                    iwc.OpenMode = OpenMode.CREATE_OR_APPEND;
                }

                // Optional: for better indexing performance, if you
                // are indexing many documents, increase the RAM
                // buffer.
                //
                // iwc.RAMBufferSizeMB = 256.0;

                using (IndexWriter writer = new IndexWriter(dir, iwc))
                {
                    IndexDocs(writer, sourceDirectory);

                    // NOTE: if you want to maximize search performance,
                    // you can optionally call forceMerge here.  This can be
                    // a terribly costly operation, so generally it's only
                    // worth it when your index is relatively static (ie
                    // you're done adding documents to it):
                    //
                    // writer.ForceMerge(1);
                }

                DateTime end = DateTime.UtcNow;
                Console.WriteLine((end - start).TotalMilliseconds + " total milliseconds");
            }
            catch (IOException e)
            {
                Console.WriteLine(" caught a " + e.GetType() +
                                  "\n with message: " + e.Message);
            }
        }
Ejemplo n.º 42
0
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));
            Random        rand       = Random;
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1)
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                docs.Remove(termToDel);
                assertTrue(null != toDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0);
            assertEquals(ir.NumDocs, docs.size());
            ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) };

            IDictionary      dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputEnumerator inputIterator = dictionary.GetEntryEnumerator();

            while (inputIterator.MoveNext())
            {
                string   field = inputIterator.Current.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                assertTrue(inputIterator.Current.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, w2 + w1);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(docs.Count == 0);
            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 43
0
        static void Main()
        {
            // default AzureDirectory stores cache in local temp folder
            CloudStorageAccount cloudStorageAccount;
            CloudStorageAccount.TryParse(CloudConfigurationManager.GetSetting("blobStorage"), out cloudStorageAccount);
            //AzureDirectory azureDirectory = new AzureDirectory(cloudStorageAccount, "TestTest", new RAMDirectory());
            //AzureDirectory azureDirectory = new AzureDirectory(cloudStorageAccount, "TestTest", FSDirectory.Open(@"c:\test"));
            var azureDirectory = new AzureDirectory(cloudStorageAccount, "TestTest" /* default is FSDirectory.Open(@"%temp%/AzureDirectory/TestTest"); */ );

            IndexWriter indexWriter = null;
            while (indexWriter == null)
            {
                try
                {
                    var config = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_CURRENT, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
                    indexWriter = new IndexWriter(azureDirectory, config);
                }
                catch (LockObtainFailedException)
                {
                    Console.WriteLine("Lock is taken, waiting for timeout...");
                    Thread.Sleep(1000);
                }
            }
            Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index");
            //indexWriter.setRAMBufferSizeMB(10.0);
            //indexWriter.SetUseCompoundFile(false);
            //indexWriter.SetMaxMergeDocs(10000);
            //indexWriter.SetMergeFactor(100);

            for (int iDoc = 0; iDoc < 10000; iDoc++)
            {
                if (iDoc % 10 == 0)
                    Console.WriteLine(iDoc);
                var doc = new Document();
                doc.add(new TextField("id", DateTime.Now.ToFileTimeUtc().ToString(CultureInfo.InvariantCulture), Field.Store.YES));
                doc.add(new TextField("Title", GeneratePhrase(10), Field.Store.YES));
                doc.add(new TextField("Body", GeneratePhrase(40), Field.Store.YES));
                indexWriter.addDocument(doc);
            }
            Console.WriteLine("Total docs is {0}", indexWriter.numDocs());

            Console.Write("Flushing and disposing writer...");
            // Potentially Expensive: this ensures that all writes are commited to blob storage
            indexWriter.commit();
            indexWriter.close();

            Console.WriteLine("done");
            Console.WriteLine("Hit Key to search again");
            Console.ReadKey();

            IndexSearcher searcher;
            using (new AutoStopWatch("Creating searcher"))
            {
                searcher = new IndexSearcher(DirectoryReader.open(azureDirectory));
            }
            SearchForPhrase(searcher, "dog");
            SearchForPhrase(searcher, Random.Next(32768).ToString(CultureInfo.InvariantCulture));
            SearchForPhrase(searcher, Random.Next(32768).ToString(CultureInfo.InvariantCulture));
            Console.WriteLine("Hit a key to dispose and exit");
            Console.ReadKey();
        }
Ejemplo n.º 44
0
 /// <summary>
 /// Open internal index writer, which contains the taxonomy data.
 /// <para>
 /// Extensions may provide their own <seealso cref="IndexWriter"/> implementation or instance. 
 /// <br><b>NOTE:</b> the instance this method returns will be closed upon calling
 /// to <seealso cref="#close()"/>.
 /// <br><b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See
 /// comment in <seealso cref="#createIndexWriterConfig(IndexWriterConfig.OpenMode)"/> for the logic behind this.
 ///  
 /// </para>
 /// </summary>
 /// <seealso cref= #createIndexWriterConfig(IndexWriterConfig.OpenMode)
 /// </seealso>
 /// <param name="directory">
 ///          the <seealso cref="Directory"/> on top of which an <seealso cref="IndexWriter"/>
 ///          should be opened. </param>
 /// <param name="config">
 ///          configuration for the internal index writer. </param>
 protected virtual IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config)
 {
     return new IndexWriter(directory, config);
 }
Ejemplo n.º 45
0
        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int           numDocs     = AtLeast(150);
            int           numTerms    = TestUtil.NextInt32(Random, 1, numDocs / 5);
            ISet <string> randomTerms = new HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random));
            }
            terms = new List <string>(randomTerms);
            long seed = Random.NextInt64();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));

            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2);

            for (int i = 0; i < numDocs; ++i)
            {
                if (Random.nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomPicks.RandomFrom(Random, terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random.nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();

            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.IndexWriter.AddDocument(doc2);
            iw2.IndexWriter.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates)
            {
                // update NDV of docs belonging to one term (covers many documents)
                long   value = Random.NextInt64();
                string term  = RandomPicks.RandomFrom(Random, terms);
                iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader       = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }
 public void TestCustomMergeScheduler()
 {
     // we don't really need to execute anything, just to make sure the custom MS
     // compiles. But ensure that it can be used as well, e.g., no other hidden
     // dependencies or something. Therefore, don't use any random API !
     Directory dir = new RAMDirectory();
     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.SetMergeScheduler(new ReportingMergeScheduler());
     IndexWriter writer = new IndexWriter(dir, conf);
     writer.AddDocument(new Document());
     writer.Commit(); // trigger flush
     writer.AddDocument(new Document());
     writer.Commit(); // trigger flush
     writer.ForceMerge(1);
     writer.Dispose();
     dir.Dispose();
 }
Ejemplo n.º 47
0
        private void DoTest(DocValuesType type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case DocValuesType.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw new InvalidOperationException();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.SetInt64Value(i);
                switch (type)
                {
                case DocValuesType.SORTED:
                case DocValuesType.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random, 20);
                    } while (((string)vals[i]).Length == 0);
                    f.SetBytesValue(new BytesRef((string)vals[i]));
                    break;

                case DocValuesType.NUMERIC:
                    int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31);     // keep it an int
                    vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue));
                    f.SetInt64Value((long)vals[i]);
                    break;
                }
                iw.AddDocument(document);
                if (Random.NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case DocValuesType.BINARY:
                case DocValuesType.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case DocValuesType.NUMERIC:
                    vs = new Int64FieldSource("dv");
                    break;

                default:
                    throw new InvalidOperationException();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is Int64FieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is long?);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }

                    object expected = vals[ids.Int32Val(i)];
                    switch (type)
                    {
                    case DocValuesType.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd >= 1);
                        goto case DocValuesType.BINARY;

                    case DocValuesType.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case DocValuesType.NUMERIC:
                        assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }