/// <summary> /// Annotates the given sequence of <see cref="Document"/> objects by adding a <b>_highlight</b> field; /// the <b>_highlight</b> field will contain the best matching text fragment from the <see cref="Document"/> /// object's full-text field. /// </summary> /// <param name="hits">The sequence of <see cref="Document"/> objects.</param> /// <param name="criteria">The search criteria that produced the hits.</param> /// <returns> /// The original sequence of Document objects, with a <b>_highlight</b> field added to each Document. /// </returns> public static IEnumerable<Document> GenerateHighlights(this IEnumerable<Document> hits, SearchCriteria criteria) { if (hits == null) throw new ArgumentNullException(nameof(hits)); if (criteria == null) throw new ArgumentNullException(nameof(criteria)); if (String.IsNullOrWhiteSpace(criteria.Query)) throw new ArgumentException("SearchCriteria.Query cannot be empty"); var documents = hits.ToList(); try { var indexDirectory = new RAMDirectory(); var analyzer = new FullTextAnalyzer(); var config = new IndexWriterConfig(analyzer).SetRAMBufferSizeMB(_ramBufferSizeMB); var writer = new IndexWriter(indexDirectory, config); BuidIndex(documents, writer); GenerateHighlights(documents, writer, criteria); writer.DeleteAll(); writer.Commit(); writer.Close(); indexDirectory.Close(); } catch (Exception ex) { _log.Error(ex); } return documents; }
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) { this.input = input; this.dir1 = dir1; this.dir2 = dir2; this.docsInFirstIndex = docsInFirstIndex; this.config1 = config1; this.config2 = config2; }
public override void SetUp() { base.SetUp(); Dir = NewFSDirectory(CreateTempDir("testDFBlockSize")); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); Iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); Iw = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)Iwc.Clone()); Iw.RandomForceMerge = false; // we will ourselves }
private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf) { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain // files conf.SetMergePolicy(logByteSizeMergePolicy); IndexWriter writer = new IndexWriter(dir, conf); return writer; }
public ThumbnailIndexer(IndexPreferences indexPreferences) { this.indexPreferences = indexPreferences; if (!System.IO.Directory.Exists(Preferences.Instance.ThumbIndexFolder)) { logger.Info("Creating thumbs folder: '{0}'", Preferences.Instance.ThumbIndexFolder); System.IO.Directory.CreateDirectory(Preferences.Instance.ThumbIndexFolder); } var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing()); writer = new IndexWriter( FSDirectory.open(new java.io.File(Preferences.Instance.ThumbIndexFolder)), config); writer.commit(); searchManager = new SearchManager(writer.getDirectory()); tasks.Add(Task.Run(DequeueFiles)); tasks.Add(Task.Run(DequeueFiles)); tasks.Add(Task.Run(CommitTask)); }
public DocumentIndexer() { if (!System.IO.Directory.Exists(Preferences.Instance.MainIndexFolder)) { logger.Info("Creating main index folder: '{0}'", Preferences.Instance.MainIndexFolder); System.IO.Directory.CreateDirectory(Preferences.Instance.MainIndexFolder); } else { logger.Info("Updating index at '{0}'", Preferences.Instance.MainIndexFolder); } var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing()); mainWriter = new IndexWriter( FSDirectory.open(new java.io.File(Preferences.Instance.MainIndexFolder)), config); taxonomyWriter = new DirectoryTaxonomyWriter( FSDirectory.open(new java.io.File(Preferences.Instance.FacetIndexFolder)), IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexPreferences = new IndexPreferences(mainWriter); thumbnailIndexer = new ThumbnailIndexer(indexPreferences); startTime = DateTime.Now; }
/// <summary> /// Build the suggest index, using up to the specified /// amount of temporary RAM while building. Note that /// the weights for the suggestions are ignored. /// </summary> public virtual void Build(IInputEnumerator enumerator, double ramBufferSizeMB) { // LUCENENET: Added guard clause for null if (enumerator is null) { throw new ArgumentNullException(nameof(enumerator)); } if (enumerator.HasPayloads) { throw new ArgumentException("this suggester doesn't support payloads"); } if (enumerator.HasContexts) { throw new ArgumentException("this suggester doesn't support contexts"); } string prefix = this.GetType().Name; var directory = OfflineSorter.DefaultTempDir(); // LUCENENET specific - using GetRandomFileName() instead of picking a random int DirectoryInfo tempIndexPath; // LUCENENET: IDE0059: Remove unnecessary value assignment while (true) { tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName()))); tempIndexPath.Create(); if (System.IO.Directory.Exists(tempIndexPath.FullName)) { break; } } Directory dir = FSDirectory.Open(tempIndexPath); try { #pragma warning disable 612, 618 IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer); #pragma warning restore 612, 618 iwc.SetOpenMode(OpenMode.CREATE); iwc.SetRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); var ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: if only we had IndexOptions.TERMS_ONLY... ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; ft.Freeze(); Document doc = new Document(); Field field = new Field("body", "", ft); doc.Add(field); totTokens = 0; IndexReader reader = null; bool success = false; count = 0; try { while (enumerator.MoveNext()) { BytesRef surfaceForm = enumerator.Current; field.SetStringValue(surfaceForm.Utf8ToString()); writer.AddDocument(doc); count++; } reader = DirectoryReader.Open(writer, false); Terms terms = MultiFields.GetTerms(reader, "body"); if (terms is null) { throw new ArgumentException("need at least one suggestion"); } // Move all ngrams into an FST: TermsEnum termsEnum = terms.GetEnumerator(null); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); Int32sRef scratchInts = new Int32sRef(); while (termsEnum.MoveNext()) { BytesRef term = termsEnum.Term; int ngramCount = CountGrams(term); if (ngramCount > grams) { throw new ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams); } if (ngramCount == 1) { totTokens += termsEnum.TotalTermFreq; } builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq)); } fst = builder.Finish(); if (fst is null) { throw new ArgumentException("need at least one suggestion"); } //System.out.println("FST: " + fst.getNodeCount() + " nodes"); /* * PrintWriter pw = new PrintWriter("/x/tmp/out.dot"); * Util.toDot(fst, pw, true, true); * pw.close(); */ success = true; } finally { if (success) { IOUtils.Dispose(writer, reader); } else { IOUtils.DisposeWhileHandlingException(writer, reader); } } } finally { try { IOUtils.Dispose(dir); } finally { // LUCENENET specific - since we are removing the entire directory anyway, // it doesn't make sense to first do a loop in order remove the files. // Let the System.IO.Directory.Delete() method handle that. // We also need to dispose the Directory instance first before deleting from disk. try { System.IO.Directory.Delete(tempIndexPath.FullName, true); } catch (Exception e) { throw IllegalStateException.Create("failed to remove " + tempIndexPath, e); } } } }
public IndexWriterWrapper(LDirectory directory, IndexWriterConfig config) : base(directory, config) { IsClosing = false; }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: //int ordLimit = TestNightly ? 100000 : 6000; // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under // the 1 hour free limit of Azure DevOps, this was reduced to 30000. int ordLimit = TestNightly ? 30000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousClass(stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.IsTrue(result.ChildCount > 0); Assert.IsTrue(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (Verbose) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, dir); }
public virtual void TestChangeCodecAndMerge() { Directory dir = NewDirectory(); if (VERBOSE) { Console.WriteLine("TEST: make new index"); } IndexWriterConfig iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetCodec(new MockCodec()); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = NewWriter(dir, iwconf); AddDocs(writer, 10); writer.Commit(); AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: addDocs3"); } AddDocs3(writer, 10); writer.Commit(); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "aaa"), dir, 10); Codec codec = iwconf.Codec; iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetOpenMode(OpenMode.APPEND).SetCodec(codec); // ((LogMergePolicy)iwconf.getMergePolicy()).setNoCFSRatio(0.0); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwconf.SetCodec(new MockCodec2()); // uses standard for field content writer = NewWriter(dir, iwconf); // swap in new codec for currently written segments if (VERBOSE) { Console.WriteLine("TEST: add docs w/ Standard codec for content field"); } AddDocs2(writer, 10); writer.Commit(); codec = iwconf.Codec; Assert.AreEqual(30, writer.MaxDoc); AssertQuery(new Term("content", "bbb"), dir, 10); AssertQuery(new Term("content", "ccc"), dir, 10); //// AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: add more docs w/ new codec"); } AddDocs2(writer, 10); writer.Commit(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); Assert.AreEqual(40, writer.MaxDoc); if (VERBOSE) { Console.WriteLine("TEST: now optimize"); } writer.ForceMerge(1); Assert.AreEqual(40, writer.MaxDoc); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); dir.Dispose(); }
/// <summary> /// Open internal index writer, which contains the taxonomy data. /// <para> /// Extensions may provide their own <seealso cref="IndexWriter"/> implementation or instance. /// <br><b>NOTE:</b> the instance this method returns will be closed upon calling /// to <seealso cref="#close()"/>. /// <br><b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See /// comment in <seealso cref="#createIndexWriterConfig(IndexWriterConfig.OpenMode)"/> for the logic behind this. /// /// </para> /// </summary> /// <seealso cref= #createIndexWriterConfig(IndexWriterConfig.OpenMode) /// </seealso> /// <param name="directory"> /// the <seealso cref="Directory"/> on top of which an <seealso cref="IndexWriter"/> /// should be opened. </param> /// <param name="config"> /// configuration for the internal index writer. </param> protected virtual IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config) { return(new IndexWriter(directory, config)); }
public async Task CreateIndexFileAsync(bool forceCreate) { SpinWait.SpinUntil(() => 0 == Interlocked.Read(ref this.ActiveSearchCount)); Interlocked.Exchange(ref this.IsSearcherReady, 0); this.OnIndexCreating(); DirectoryInfo indexDirectoryInfo = new DirectoryInfo(this.IndexPath); if (!forceCreate && this.CanReuseIndex(indexDirectoryInfo)) { return; } this.Dispose(true); var files = indexDirectoryInfo.GetFiles(); Array.ForEach(files, file => { if (!file.Name.Equals("placeholder.txt", StringComparison.OrdinalIgnoreCase) && !file.Name.Equals("work.lock", StringComparison.OrdinalIgnoreCase)) { file.Delete(); } }); using (SimpleFSDirectory fsDirectory = new SimpleFSDirectory(indexDirectoryInfo)) { using (var analyzer = new AdrivaAnalyzer(LuceneVersion.LUCENE_48)) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer) { }; using (IndexWriter writer = new IndexWriter(fsDirectory, indexWriterConfig)) { RawDataResult result = new RawDataResult { HasMore = false }; do { result = await this.GetRawDataAsync(result.LastRowId); var rawDataEnumerator = result.Items.GetEnumerator(); while (rawDataEnumerator.MoveNext()) { Document document = this.ResolveDocument(rawDataEnumerator.Current); if (null != document) { writer.AddDocument(document, analyzer); } } } while (result.HasMore); writer.Commit(); } } } this.OnIndexCreated(); }
public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes) { this.OuterInstance = outerInstance; MyNodeID = nodeID; Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase")); // TODO: set warmer MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); if (VERBOSE) { iwc.InfoStream = new PrintStreamInfoStream(Console.Out); } Writer = new IndexWriter(Dir, iwc); Mgr = new SearcherManager(Writer, true, null); Searchers = new SearcherLifetimeManager(); // Init w/ 0s... caller above will do initial // "broadcast" by calling initSearcher: CurrentNodeVersions = new long[numNodes]; }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.MergePolicy"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="mergePolicy"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetMergePolicy(this IndexWriterConfig config, MergePolicy mergePolicy) { config.MergePolicy = mergePolicy; return(config); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.WriteLockTimeout"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="writeLockTimeout"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetWriteLockTimeout(this IndexWriterConfig config, long writeLockTimeout) { config.WriteLockTimeout = writeLockTimeout; return(config); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.MergeScheduler"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="mergeScheduler"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetMergeScheduler(this IndexWriterConfig config, IMergeScheduler mergeScheduler) { config.MergeScheduler = mergeScheduler; return(config); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.Similarity"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="similarity"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetSimilarity(this IndexWriterConfig config, Similarity similarity) { config.Similarity = similarity; return(config); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.IndexCommit"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="commit"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetIndexCommit(this IndexWriterConfig config, IndexCommit commit) { config.IndexCommit = commit; return(config); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.IndexDeletionPolicy"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="deletionPolicy"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetIndexDeletionPolicy(this IndexWriterConfig config, IndexDeletionPolicy deletionPolicy) { config.IndexDeletionPolicy = deletionPolicy; return(config); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.OpenMode"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="openMode"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetOpenMode(this IndexWriterConfig config, OpenMode openMode) { config.OpenMode = openMode; return(config); }
public override void Dispose() { lock (this) { // files that we tried to delete, but couldn't because readers were open. // all that matters is that we tried! (they will eventually go away) ISet<string> pendingDeletions = new HashSet<string>(OpenFilesDeleted); MaybeYield(); if (OpenFiles == null) { OpenFiles = new Dictionary<string, int>(); OpenFilesDeleted = new HashSet<string>(); } if (OpenFiles.Count > 0) { // print the first one as its very verbose otherwise Exception cause = null; IEnumerator<Exception> stacktraces = OpenFileHandles.Values.GetEnumerator(); if (stacktraces.MoveNext()) { cause = stacktraces.Current; } // RuntimeException instead ofSystem.IO.IOException because // super() does not throwSystem.IO.IOException currently: throw new Exception("MockDirectoryWrapper: cannot close: there are still open files: " + String.Join(" ,", OpenFiles.ToArray().Select(x => x.Key)), cause); } if (OpenLocks.Count > 0) { throw new Exception("MockDirectoryWrapper: cannot close: there are still open locks: " + String.Join(" ,", OpenLocks.ToArray())); } IsOpen = false; if (CheckIndexOnClose) { RandomIOExceptionRate_Renamed = 0.0; RandomIOExceptionRateOnOpen_Renamed = 0.0; if (DirectoryReader.IndexExists(this)) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nNOTE: MockDirectoryWrapper: now crush"); } Crash(); // corrupt any unsynced-files if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nNOTE: MockDirectoryWrapper: now run CheckIndex"); } TestUtil.CheckIndex(this, CrossCheckTermVectorsOnClose); // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles if (AssertNoUnreferencedFilesOnClose) { // now look for unreferenced files: discount ones that we tried to delete but could not HashSet<string> allFiles = new HashSet<string>(Arrays.AsList(ListAll())); allFiles.RemoveAll(pendingDeletions); string[] startFiles = allFiles.ToArray(/*new string[0]*/); IndexWriterConfig iwc = new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null); iwc.SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); (new IndexWriter(@in, iwc)).Rollback(); string[] endFiles = @in.ListAll(); ISet<string> startSet = new SortedSet<string>(Arrays.AsList(startFiles)); ISet<string> endSet = new SortedSet<string>(Arrays.AsList(endFiles)); if (pendingDeletions.Contains("segments.gen") && endSet.Contains("segments.gen")) { // this is possible if we hit an exception while writing segments.gen, we try to delete it // and it ends out in pendingDeletions (but IFD wont remove this). startSet.Add("segments.gen"); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MDW: Unreferenced check: Ignoring segments.gen that we could not delete."); } } // its possible we cannot delete the segments_N on windows if someone has it open and // maybe other files too, depending on timing. normally someone on windows wouldnt have // an issue (IFD would nuke this stuff eventually), but we pass NoDeletionPolicy... foreach (string file in pendingDeletions) { if (file.StartsWith("segments") && !file.Equals("segments.gen") && endSet.Contains(file)) { startSet.Add(file); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MDW: Unreferenced check: Ignoring segments file: " + file + " that we could not delete."); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(@in, file); } catch (System.IO.IOException ioe) { // OK: likely some of the .si files were deleted } try { ISet<string> ghosts = new HashSet<string>(sis.Files(@in, false)); foreach (string s in ghosts) { if (endSet.Contains(s) && !startSet.Contains(s)) { Debug.Assert(pendingDeletions.Contains(s)); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MDW: Unreferenced check: Ignoring referenced file: " + s + " " + "from " + file + " that we could not delete."); } startSet.Add(s); } } } catch (Exception t) { Console.Error.WriteLine("ERROR processing leftover segments file " + file + ":"); Console.WriteLine(t.ToString()); Console.Write(t.StackTrace); } } } startFiles = startSet.ToArray(/*new string[0]*/); endFiles = endSet.ToArray(/*new string[0]*/); if (!Arrays.Equals(startFiles, endFiles)) { IList<string> removed = new List<string>(); foreach (string fileName in startFiles) { if (!endSet.Contains(fileName)) { removed.Add(fileName); } } IList<string> added = new List<string>(); foreach (string fileName in endFiles) { if (!startSet.Contains(fileName)) { added.Add(fileName); } } string extras; if (removed.Count != 0) { extras = "\n\nThese files were removed: " + removed; } else { extras = ""; } if (added.Count != 0) { extras += "\n\nThese files were added (waaaaaaaaaat!): " + added; } if (pendingDeletions.Count != 0) { extras += "\n\nThese files we had previously tried to delete, but couldn't: " + pendingDeletions; } Debug.Assert(false, "unreferenced files: before delete:\n " + Arrays.ToString(startFiles) + "\n after delete:\n " + Arrays.ToString(endFiles) + extras); } DirectoryReader ir1 = DirectoryReader.Open(this); int numDocs1 = ir1.NumDocs; ir1.Dispose(); (new IndexWriter(this, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null))).Dispose(); DirectoryReader ir2 = DirectoryReader.Open(this); int numDocs2 = ir2.NumDocs; ir2.Dispose(); Debug.Assert(numDocs1 == numDocs2, "numDocs changed after opening/closing IW: before=" + numDocs1 + " after=" + numDocs2); } } } @in.Dispose(); } }
// NOTE: not a test; just here to make sure the code frag // in the javadocs is correct! public virtual void VerifyCompiles() { Analyzer analyzer = null; Directory fsDir = FSDirectory.Open(new DirectoryInfo("/path/to/index")); NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); IndexWriter writer = new IndexWriter(cachedFSDir, conf); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.Codec"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="codec"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetCodec(this IndexWriterConfig config, Codec codec) { config.Codec = codec; return(config); }
// -- IndexWriterConfig.cs (members) /// <summary> /// Builder method for <see cref="IndexWriterConfig.DefaultWriteLockTimeout"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="writeLockTimeout"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static void SetDefaultWriteLockTimeout(this IndexWriterConfig config, long writeLockTimeout) { IndexWriterConfig.DefaultWriteLockTimeout = writeLockTimeout; }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.IndexerThreadPool"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="threadPool"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> internal static IndexWriterConfig SetIndexerThreadPool(this IndexWriterConfig config, DocumentsWriterPerThreadPool threadPool) { config.IndexerThreadPool = threadPool; return(config); }
/// <summary> /// Construct a Taxonomy writer. /// </summary> /// <param name="directory"> /// The <seealso cref="Directory"/> in which to store the taxonomy. Note that /// the taxonomy is written directly to that directory (not to a /// subdirectory of it). </param> /// <param name="openMode"> /// Specifies how to open a taxonomy for writing: <code>APPEND</code> /// means open an existing index for append (failing if the index does /// not yet exist). <code>CREATE</code> means create a new index (first /// deleting the old one if it already existed). /// <code>APPEND_OR_CREATE</code> appends to an existing index if there /// is one, otherwise it creates a new index. </param> /// <param name="cache"> /// A <seealso cref="TaxonomyWriterCache"/> implementation which determines /// the in-memory caching policy. See for example /// <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>. /// If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param> /// <exception cref="CorruptIndexException"> /// if the taxonomy is corrupted. </exception> /// <exception cref="LockObtainFailedException"> /// if the taxonomy is locked by another writer. If it is known /// that no other concurrent writer is active, the lock might /// have been left around by an old dead process, and should be /// removed using <seealso cref="#unlock(Directory)"/>. </exception> /// <exception cref="IOException"> /// if another error occurred. </exception> public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache) { dir = directory; IndexWriterConfig config = CreateIndexWriterConfig(openMode); indexWriter = OpenIndexWriter(dir, config); // verify (to some extent) that merge policy in effect would preserve category docids if (indexWriter != null) { Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed"); } // after we opened the writer, and the index is locked, it's safe to check // the commit data and read the index epoch openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND; if (!DirectoryReader.IndexExists(directory)) { indexEpoch = 1; } else { string epochStr = null; IDictionary <string, string> commitData = ReadCommitData(directory); if (commitData != null && commitData.ContainsKey(INDEX_EPOCH)) { epochStr = commitData[INDEX_EPOCH]; } // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack // of a better value. indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16); } if (openMode == OpenMode.CREATE) { ++indexEpoch; } FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.OmitNorms = true; parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft); fullPathField = new StringField(Consts.FULL, "", Field.Store.YES); if (indexWriter == null) { return; } nextID = indexWriter.MaxDoc; if (cache == null) { cache = DefaultTaxonomyWriterCache(); } this.cache = cache; if (nextID == 0) { cacheIsComplete = true; // Make sure that the taxonomy always contain the root category // with category id 0. AddCategory(new FacetLabel()); } else { // There are some categories on the disk, which we have not yet // read into the cache, and therefore the cache is incomplete. // We choose not to read all the categories into the cache now, // to avoid terrible performance when a taxonomy index is opened // to add just a single category. We will do it later, after we // notice a few cache misses. cacheIsComplete = false; } }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.MaxThreadStates"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="maxThreadStates"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetMaxThreadStates(this IndexWriterConfig config, int maxThreadStates) { config.MaxThreadStates = maxThreadStates; return(config); }
public virtual void TestPostings() { Directory dir = NewFSDirectory(CreateTempDir("postings")); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetCodec(Codec.ForName("Lucene40")); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); // id field FieldType idType = new FieldType(StringField.TYPE_NOT_STORED); idType.StoreTermVectors = true; Field idField = new Field("id", "", idType); doc.Add(idField); // title field: short text field FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED); titleType.StoreTermVectors = true; titleType.StoreTermVectorPositions = true; titleType.StoreTermVectorOffsets = true; titleType.IndexOptions = IndexOptions(); Field titleField = new Field("title", "", titleType); doc.Add(titleField); // body field: long text field FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED); bodyType.StoreTermVectors = true; bodyType.StoreTermVectorPositions = true; bodyType.StoreTermVectorOffsets = true; bodyType.IndexOptions = IndexOptions(); Field bodyField = new Field("body", "", bodyType); doc.Add(bodyField); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { idField.SetStringValue(Convert.ToString(i)); titleField.SetStringValue(FieldValue(1)); bodyField.SetStringValue(FieldValue(3)); iw.AddDocument(doc); if (Random().Next(20) == 0) { iw.DeleteDocuments(new Term("id", Convert.ToString(i))); } } if (Random().NextBoolean()) { // delete 1-100% of docs iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)])); } iw.Dispose(); dir.Dispose(); // checkindex }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.UseReaderPooling"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="readerPooling"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetReaderPooling(this IndexWriterConfig config, bool readerPooling) { config.UseReaderPooling = readerPooling; return(config); }
public virtual void TestDocValuesIntegration() { AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.Add(new NumericDocValuesField("numeric", 42)); if (DefaultCodecSupportsSortedSet) { doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); } iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); BytesRef scratch = new BytesRef(); // Binary type: can be retrieved via getTerms() try { FieldCache.DEFAULT.GetInt32s(ar, "binary", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true); binary.Get(0, scratch); Assert.AreEqual("binary value", scratch.Utf8ToString()); try { FieldCache.DEFAULT.GetTermsIndex(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary"); Assert.IsTrue(bits.Get(0)); // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() try { FieldCache.DEFAULT.GetInt32s(ar, "sorted", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sorted"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true); binary.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted"); Assert.AreEqual(0, sorted.GetOrd(0)); Assert.AreEqual(1, sorted.ValueCount); sorted.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted"); Assert.IsTrue(bits.Get(0)); // Numeric type: can be retrieved via getInts() and so on Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false); Assert.AreEqual(42, numeric.Get(0)); try { FieldCache.DEFAULT.GetTerms(ar, "numeric", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric"); Assert.IsTrue(bits.Get(0)); // SortedSet type: can be retrieved via getDocTermOrds() if (DefaultCodecSupportsSortedSet) { try { FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTerms(ar, "sortedset", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(2, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset"); Assert.IsTrue(bits.Get(0)); } ir.Dispose(); dir.Dispose(); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.IndexingChain"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="indexingChain"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> internal static IndexWriterConfig SetIndexingChain(this IndexWriterConfig config, DocumentsWriterPerThread.IndexingChain indexingChain) { config.IndexingChain = indexingChain; return(config); }
public static IndexWriterConfig CreateWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) { // :Post-Release-Update-Version.LUCENE_XY: LuceneVersion version = (LuceneVersion)Enum.Parse(typeof(LuceneVersion), config.Get("writer.version", LuceneVersion.LUCENE_48.ToString())); IndexWriterConfig iwConf = new IndexWriterConfig(version, runData.Analyzer); iwConf.OpenMode = mode; IndexDeletionPolicy indexDeletionPolicy = GetIndexDeletionPolicy(config); iwConf.IndexDeletionPolicy = indexDeletionPolicy; if (commit != null) { iwConf.IndexCommit = commit; } string mergeScheduler = config.Get("merge.scheduler", "Lucene.Net.Index.ConcurrentMergeScheduler, Lucene.Net"); #if !FEATURE_CONCURRENTMERGESCHEDULER // LUCENENET specific - hack to get our TaskMergeScheduler // when a ConcurrentMergeScheduler is requested. if (mergeScheduler.Contains(".ConcurrentMergeScheduler,")) { mergeScheduler = "Lucene.Net.Index.TaskMergeScheduler, Lucene.Net"; } #endif Type mergeSchedulerType = Type.GetType(mergeScheduler); if (mergeSchedulerType == null) { throw new Exception("Unrecognized merge scheduler type '" + mergeScheduler + "'"); } else if (mergeSchedulerType.Equals(typeof(NoMergeScheduler))) { iwConf.MergeScheduler = NoMergeScheduler.INSTANCE; } else { try { iwConf.MergeScheduler = (IMergeScheduler)Activator.CreateInstance(mergeSchedulerType); } catch (Exception e) { throw new Exception("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e); } if (mergeScheduler.Equals("Lucene.Net.Index.ConcurrentMergeScheduler", StringComparison.Ordinal)) { #if FEATURE_CONCURRENTMERGESCHEDULER ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler)iwConf.MergeScheduler; int maxThreadCount = config.Get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.DEFAULT_MAX_THREAD_COUNT); int maxMergeCount = config.Get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.DEFAULT_MAX_MERGE_COUNT); #else TaskMergeScheduler cms = (TaskMergeScheduler)iwConf.MergeScheduler; int maxThreadCount = config.Get("concurrent.merge.scheduler.max.thread.count", 1); int maxMergeCount = config.Get("concurrent.merge.scheduler.max.merge.count", 2); #endif cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount); } } string defaultCodec = config.Get("default.codec", null); if (defaultCodec != null) { try { Type clazz = Type.GetType(defaultCodec); iwConf.Codec = (Codec)Activator.CreateInstance(clazz); } catch (Exception e) { throw new Exception("Couldn't instantiate Codec: " + defaultCodec, e); } } string mergePolicy = config.Get("merge.policy", "Lucene.Net.Index.LogByteSizeMergePolicy, Lucene.Net"); bool isCompound = config.Get("compound", true); Type mergePolicyType = Type.GetType(mergePolicy); if (mergePolicyType == null) { throw new Exception("Unrecognized merge policy type '" + mergePolicy + "'"); } else if (mergePolicyType.Equals(typeof(NoMergePolicy))) { iwConf.MergePolicy = isCompound ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES; } else { try { iwConf.MergePolicy = (MergePolicy)Activator.CreateInstance(mergePolicyType); } catch (Exception e) { throw new Exception("unable to instantiate class '" + mergePolicy + "' as merge policy", e); } iwConf.MergePolicy.NoCFSRatio = isCompound ? 1.0 : 0.0; if (iwConf.MergePolicy is LogMergePolicy) { LogMergePolicy logMergePolicy = (LogMergePolicy)iwConf.MergePolicy; logMergePolicy.MergeFactor = config.Get("merge.factor", OpenIndexTask.DEFAULT_MERGE_PFACTOR); } } double ramBuffer = config.Get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB); int maxBuffered = config.Get("max.buffered", OpenIndexTask.DEFAULT_MAX_BUFFERED); if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) { iwConf.RAMBufferSizeMB = ramBuffer; iwConf.MaxBufferedDocs = maxBuffered; } else { iwConf.MaxBufferedDocs = maxBuffered; iwConf.RAMBufferSizeMB = ramBuffer; } return(iwConf); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.FlushPolicy"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="flushPolicy"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> internal static IndexWriterConfig SetFlushPolicy(this IndexWriterConfig config, FlushPolicy flushPolicy) { config.FlushPolicy = flushPolicy; return(config); }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random, cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); int numDocs = TestUtil.NextInt32(Random, 100, 400); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList <BytesRef> ids = new List <BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random.Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.IndexWriter, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.RAMPerThreadHardLimitMB"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="perThreadHardLimitMB"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetRAMPerThreadHardLimitMB(this IndexWriterConfig config, int perThreadHardLimitMB) { config.RAMPerThreadHardLimitMB = perThreadHardLimitMB; return(config); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void createIndex(IndexWriterConfig config, org.apache.lucene.store.Directory target, IndexReader reader, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, bool negateFilter) { bool success = false; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final IndexWriter w = new IndexWriter(target, config); IndexWriter w = new IndexWriter(target, config); try { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.List<AtomicReaderContext> leaves = reader.leaves(); IList<AtomicReaderContext> leaves = reader.leaves(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final IndexReader[] subReaders = new IndexReader[leaves.size()]; IndexReader[] subReaders = new IndexReader[leaves.Count]; int i = 0; foreach (AtomicReaderContext ctx in leaves) { subReaders[i++] = new DocumentFilteredAtomicIndexReader(ctx, preserveFilter, negateFilter); } w.addIndexes(subReaders); success = true; } finally { if (success) { IOUtils.close(w); } else { IOUtils.closeWhileHandlingException(w); } } }
private void Populate(Directory directory, IndexWriterConfig config) { RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, config); for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) { Document document = new Document(); for (int f = 0; f < NUMBER_OF_FIELDS; f++) { document.Add(NewStringField("field" + f, Text, Field.Store.NO)); } writer.AddDocument(document); } writer.ForceMerge(1); writer.Dispose(); }
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) : this(input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2) { }
/// <summary> /// Initializes a new instance of the <see cref="LuceneIndex" /> class. /// </summary> /// <param name="indexPath">The path to the directory that will contain the Lucene index files.</param> /// <param name="schema">The schema.</param> /// <exception cref="System.ArgumentNullException"></exception> public LuceneIndex(string indexPath, Schema schema) { if (String.IsNullOrWhiteSpace(indexPath)) throw new ArgumentNullException(nameof(indexPath)); if (schema == null) throw new ArgumentNullException(nameof(schema)); IndexPath = indexPath; Schema = schema; if (System.IO.Directory.Exists(IndexPath)) { if (Schema.IsDefault()) throw new InvalidOperationException($"There is an existing index on '{IndexPath}'."); } else { System.IO.Directory.CreateDirectory(IndexPath); } _indexDirectory = new MMapDirectory(Paths.get(IndexPath)); var taxonomyIndexPath = System.IO.Path.Combine(IndexPath, "taxonomy"); if (!System.IO.Directory.Exists(taxonomyIndexPath)) System.IO.Directory.CreateDirectory(taxonomyIndexPath); _taxonomyDirectory = new MMapDirectory(Paths.get(taxonomyIndexPath)); _compositeAnalyzer = new CompositeAnalyzer(Schema); _ramBufferSizeMB = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.RAMBufferSizeMB"] ?? "128"); var config = new IndexWriterConfig(_compositeAnalyzer) .SetOpenMode(IndexWriterConfigOpenMode.CREATE_OR_APPEND) .SetRAMBufferSizeMB(_ramBufferSizeMB) .SetCommitOnClose(true); _indexWriter = new IndexWriter(_indexDirectory, config); _taxonomyWriter = new DirectoryTaxonomyWriter(_taxonomyDirectory, IndexWriterConfigOpenMode.CREATE_OR_APPEND); _searcherTaxonomyManager = new SearcherTaxonomyManager(_indexWriter, true, null, _taxonomyWriter); _facetBuilder = new LuceneFacetBuilder(_taxonomyWriter); _refreshIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexSearcher.RefreshIntervalSeconds"] ?? "0.5"); _commitIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.CommitIntervalSeconds"] ?? "60"); _writeAllowedFlag = new ManualResetEventSlim(true); _refreshTimer = new Timer(o => Refresh(), null, TimeSpan.FromSeconds(_refreshIntervalSeconds), TimeSpan.FromSeconds(_refreshIntervalSeconds)); _commitTimer = new Timer(o => Commit(), null, TimeSpan.FromSeconds(_commitIntervalSeconds), TimeSpan.FromSeconds(_commitIntervalSeconds)); }
public virtual void TestBooleanSpanQuery() { bool failed = false; int hits = 0; Directory directory = NewDirectory(); Analyzer indexerAnalyzer = new MockAnalyzer(Random()); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); IndexWriter writer = new IndexWriter(directory, config); string FIELD = "content"; Document d = new Document(); d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher searcher = NewSearcher(indexReader); BooleanQuery query = new BooleanQuery(); SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); query.Add(sq1, BooleanClause.Occur.SHOULD); query.Add(sq2, BooleanClause.Occur.SHOULD); TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(query, collector); hits = collector.TopDocs().ScoreDocs.Length; foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs) { Console.WriteLine(scoreDoc.Doc); } indexReader.Dispose(); Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries"); Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries"); directory.Dispose(); }
/// <summary>Index all text files under a directory.</summary> public static void Main(string[] args) { // The <CONSOLE_APP_NAME> should be the assembly name of the application // this code is compiled into. In .NET Framework, it is the name of the EXE file. // In .NET Core, you have the option of compiling this into either an EXE or a DLL // (see https://docs.microsoft.com/en-us/dotnet/core/deploying/index). // In the latter case, the <CONSOLE_APP_NAME> will be "dotnet <DLL_NAME>.dll". string usage = "Usage: <CONSOLE_APP_NAME> <INDEX_DIRECTORY> <SOURCE_DIRECTORY> " + "[-u|--update]\n\n" + "This indexes the documents in <SOURCE_DIRECTORY>, creating a Lucene index" + "in <INDEX_DIRECTORY> that can be searched with the search-files demo."; // Validate required arguments are present. // If not, show usage information. if (args.Length < 2) { Console.WriteLine(usage); Environment.Exit(1); } string indexPath = args[0]; string sourcePath = args[1]; bool create = true; for (int i = 0; i < args.Length; i++) { if ("-u".Equals(args[i], StringComparison.Ordinal) || "--update".Equals(args[i], StringComparison.Ordinal)) { create = false; } } DirectoryInfo sourceDirectory = new DirectoryInfo(sourcePath); if (!sourceDirectory.Exists) { Console.WriteLine("Source directory '" + sourcePath + "' does not exist, please check the path"); Environment.Exit(1); } DateTime start = DateTime.UtcNow; try { Console.WriteLine("Indexing to directory '" + indexPath + "'..."); Store.Directory dir = FSDirectory.Open(indexPath); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.OpenMode = OpenMode.CREATE; } else { // Add new documents to an existing index: iwc.OpenMode = OpenMode.CREATE_OR_APPEND; } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. // // iwc.RAMBufferSizeMB = 256.0; using (IndexWriter writer = new IndexWriter(dir, iwc)) { IndexDocs(writer, sourceDirectory); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.ForceMerge(1); } DateTime end = DateTime.UtcNow; Console.WriteLine((end - start).TotalMilliseconds + " total milliseconds"); } catch (IOException e) { Console.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public void TestWithDeletions() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); Random rand = Random; List <string> termsToDel = new List <string>(); foreach (Document doc in docs.Values) { if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) { termsToDel.Add(doc.Get(FIELD_NAME)); } writer.AddDocument(doc); } writer.Commit(); Term[] delTerms = new Term[termsToDel.size()]; for (int i = 0; i < termsToDel.size(); i++) { delTerms[i] = new Term(FIELD_NAME, termsToDel[i]); } foreach (Term delTerm in delTerms) { writer.DeleteDocuments(delTerm); } writer.Commit(); writer.Dispose(); foreach (string termToDel in termsToDel) { var toDel = docs[termToDel]; docs.Remove(termToDel); assertTrue(null != toDel); } IndexReader ir = DirectoryReader.Open(dir); assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0); assertEquals(ir.NumDocs, docs.size()); ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME); IInputEnumerator inputIterator = dictionary.GetEntryEnumerator(); while (inputIterator.MoveNext()) { string field = inputIterator.Current.Utf8ToString(); Document doc = docs[field]; docs.Remove(field); long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault(); long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault(); assertTrue(inputIterator.Current.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, w2 + w1); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); } assertTrue(docs.Count == 0); ir.Dispose(); dir.Dispose(); }
static void Main() { // default AzureDirectory stores cache in local temp folder CloudStorageAccount cloudStorageAccount; CloudStorageAccount.TryParse(CloudConfigurationManager.GetSetting("blobStorage"), out cloudStorageAccount); //AzureDirectory azureDirectory = new AzureDirectory(cloudStorageAccount, "TestTest", new RAMDirectory()); //AzureDirectory azureDirectory = new AzureDirectory(cloudStorageAccount, "TestTest", FSDirectory.Open(@"c:\test")); var azureDirectory = new AzureDirectory(cloudStorageAccount, "TestTest" /* default is FSDirectory.Open(@"%temp%/AzureDirectory/TestTest"); */ ); IndexWriter indexWriter = null; while (indexWriter == null) { try { var config = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_CURRENT, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT)); indexWriter = new IndexWriter(azureDirectory, config); } catch (LockObtainFailedException) { Console.WriteLine("Lock is taken, waiting for timeout..."); Thread.Sleep(1000); } } Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index"); //indexWriter.setRAMBufferSizeMB(10.0); //indexWriter.SetUseCompoundFile(false); //indexWriter.SetMaxMergeDocs(10000); //indexWriter.SetMergeFactor(100); for (int iDoc = 0; iDoc < 10000; iDoc++) { if (iDoc % 10 == 0) Console.WriteLine(iDoc); var doc = new Document(); doc.add(new TextField("id", DateTime.Now.ToFileTimeUtc().ToString(CultureInfo.InvariantCulture), Field.Store.YES)); doc.add(new TextField("Title", GeneratePhrase(10), Field.Store.YES)); doc.add(new TextField("Body", GeneratePhrase(40), Field.Store.YES)); indexWriter.addDocument(doc); } Console.WriteLine("Total docs is {0}", indexWriter.numDocs()); Console.Write("Flushing and disposing writer..."); // Potentially Expensive: this ensures that all writes are commited to blob storage indexWriter.commit(); indexWriter.close(); Console.WriteLine("done"); Console.WriteLine("Hit Key to search again"); Console.ReadKey(); IndexSearcher searcher; using (new AutoStopWatch("Creating searcher")) { searcher = new IndexSearcher(DirectoryReader.open(azureDirectory)); } SearchForPhrase(searcher, "dog"); SearchForPhrase(searcher, Random.Next(32768).ToString(CultureInfo.InvariantCulture)); SearchForPhrase(searcher, Random.Next(32768).ToString(CultureInfo.InvariantCulture)); Console.WriteLine("Hit a key to dispose and exit"); Console.ReadKey(); }
/// <summary> /// Open internal index writer, which contains the taxonomy data. /// <para> /// Extensions may provide their own <seealso cref="IndexWriter"/> implementation or instance. /// <br><b>NOTE:</b> the instance this method returns will be closed upon calling /// to <seealso cref="#close()"/>. /// <br><b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See /// comment in <seealso cref="#createIndexWriterConfig(IndexWriterConfig.OpenMode)"/> for the logic behind this. /// /// </para> /// </summary> /// <seealso cref= #createIndexWriterConfig(IndexWriterConfig.OpenMode) /// </seealso> /// <param name="directory"> /// the <seealso cref="Directory"/> on top of which an <seealso cref="IndexWriter"/> /// should be opened. </param> /// <param name="config"> /// configuration for the internal index writer. </param> protected virtual IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config) { return new IndexWriter(directory, config); }
private void CreateRandomIndexes() { dir1 = NewDirectory(); dir2 = NewDirectory(); int numDocs = AtLeast(150); int numTerms = TestUtil.NextInt32(Random, 1, numDocs / 5); ISet <string> randomTerms = new HashSet <string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random)); } terms = new List <string>(randomTerms); long seed = Random.NextInt64(); IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); iwc2.SetMergePolicy(NewSortingMergePolicy(sort)); RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1); RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2); for (int i = 0; i < numDocs; ++i) { if (Random.nextInt(5) == 0 && i != numDocs - 1) { string term = RandomPicks.RandomFrom(Random, terms); iw1.DeleteDocuments(new Term("s", term)); iw2.DeleteDocuments(new Term("s", term)); } Document doc = randomDocument(); iw1.AddDocument(doc); iw2.AddDocument(doc); if (Random.nextInt(8) == 0) { iw1.Commit(); iw2.Commit(); } } // Make sure we have something to merge iw1.Commit(); iw2.Commit(); Document doc2 = randomDocument(); // NOTE: don't use RIW.addDocument directly, since it sometimes commits // which may trigger a merge, at which case forceMerge may not do anything. // With field updates this is a problem, since the updates can go into the // single segment in the index, and threefore the index won't be sorted. // This hurts the assumption of the test later on, that the index is sorted // by SortingMP. iw1.IndexWriter.AddDocument(doc2); iw2.IndexWriter.AddDocument(doc2); if (DefaultCodecSupportsFieldUpdates) { // update NDV of docs belonging to one term (covers many documents) long value = Random.NextInt64(); string term = RandomPicks.RandomFrom(Random, terms); iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); } iw1.ForceMerge(1); iw2.ForceMerge(1); iw1.Dispose(); iw2.Dispose(); reader = DirectoryReader.Open(dir1); sortedReader = DirectoryReader.Open(dir2); }
public void TestCustomMergeScheduler() { // we don't really need to execute anything, just to make sure the custom MS // compiles. But ensure that it can be used as well, e.g., no other hidden // dependencies or something. Therefore, don't use any random API ! Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMergeScheduler(new ReportingMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.ForceMerge(1); writer.Dispose(); dir.Dispose(); }
private void DoTest(DocValuesType type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case DocValuesType.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case DocValuesType.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case DocValuesType.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw new InvalidOperationException(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.SetInt64Value(i); switch (type) { case DocValuesType.SORTED: case DocValuesType.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random, 20); } while (((string)vals[i]).Length == 0); f.SetBytesValue(new BytesRef((string)vals[i])); break; case DocValuesType.NUMERIC: int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31); // keep it an int vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue)); f.SetInt64Value((long)vals[i]); break; } iw.AddDocument(document); if (Random.NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case DocValuesType.BINARY: case DocValuesType.SORTED: vs = new BytesRefFieldSource("dv"); break; case DocValuesType.NUMERIC: vs = new Int64FieldSource("dv"); break; default: throw new InvalidOperationException(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is Int64FieldSource) { assertTrue(values.ObjectVal(i) is long?); assertTrue(values.BytesVal(i, bytes)); } else { throw new InvalidOperationException(); } object expected = vals[ids.Int32Val(i)]; switch (type) { case DocValuesType.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd >= 1); goto case DocValuesType.BINARY; case DocValuesType.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case DocValuesType.NUMERIC: assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i)); break; } } } rd.Dispose(); d.Dispose(); }