public void Init() { facetHandlers = new List <IFacetHandler>(); directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(LuceneVersion.LUCENE_48); selectionProperties = new Dictionary <string, string>(); IndexWriterConfig conf = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); conf.SetOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, conf); writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop5=val1")); writer.AddDocument(Doc("prop1=val2", "prop3=val1", "prop7=val7")); writer.AddDocument(Doc("prop1=val2", "prop3=val2", "prop3=val3")); writer.AddDocument(Doc("prop1=val1", "prop2=val1")); writer.AddDocument(Doc("prop1=val1", "prop2=val1")); writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop4=val2", "prop4=val3")); writer.Commit(); attributesFacetHandler = new AttributesFacetHandler(AttributeHandlerName, AttributeHandlerName, null, null, new Dictionary <string, string>()); facetHandlers.Add(attributesFacetHandler); DirectoryReader reader = DirectoryReader.Open(directory); boboReader = BoboMultiReader.GetInstance(reader, facetHandlers); foreach (BoboSegmentReader subReader in boboReader.GetSubReaders()) { attributesFacetHandler.LoadFacetData(subReader); } browser = new BoboBrowser(boboReader); }
#pragma warning disable IDE0060 // Remove unused parameter public NodeState(ShardSearchingTestBase shardSearchingTestBase, Random random, int nodeID, int numNodes) #pragma warning restore IDE0060 // Remove unused parameter { this.outerInstance = shardSearchingTestBase; MyNodeID = nodeID; Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase")); // TODO: set warmer MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetOpenMode(OpenMode.CREATE); if (Verbose) { iwc.SetInfoStream(new TextWriterInfoStream(Console.Out)); } Writer = new IndexWriter(Dir, iwc); Mgr = new SearcherManager(Writer, true, null); Searchers = new SearcherLifetimeManager(); // Init w/ 0s... caller above will do initial // "broadcast" by calling initSearcher: currentNodeVersions = new long[numNodes]; }
public virtual void TestStressPerFieldCodec() { Directory dir = NewDirectory(Random); const int docsPerRound = 97; int numRounds = AtLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.NextInt32(Random, 30, 60); IndexWriterConfig config = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)); config.SetOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = NewWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IsTokenized = Random.NextBoolean(); customType.OmitNorms = Random.NextBoolean(); Field field = NewField("" + k, TestUtil.RandomRealisticUnicodeString(Random, 128), customType); doc.Add(field); } writer.AddDocument(doc); } if (Random.NextBoolean()) { writer.ForceMerge(1); } writer.Commit(); Assert.AreEqual((i + 1) * docsPerRound, writer.MaxDoc); writer.Dispose(); } dir.Dispose(); }
/// <summary> /// Remove text from the existing index. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param> /// <param name="names">An array of unique names for the text.</param> public void RemoveText(DirectoryInfo directoryIndexInfo, string[] names) { Lucene.Net.Index.IndexWriter writer = null; Lucene.Net.Store.Directory directory = null; try { // If exists. if (names != null && names.Length > 0) { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); // Open existing or create new. writer = new IndexWriter(directory, indexConfig); // Create the query. List <Query> queries = new List <Query>(); // For each name. foreach (string name in names) { // Create the query. BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("textname", name.ToLower())), BooleanClause.Occur.MUST); // Add the query. queries.Add(query); } // Delete the text. writer.DeleteDocuments(queries.ToArray()); // Commit the index. writer.Commit(); } } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (directory != null) { directory.Dispose(); } } }
private void ModifiedSetup() { directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(LuceneVersion.LUCENE_48); IndexWriterConfig conf = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); conf.SetOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, conf); writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop5=val1")); writer.AddDocument(Doc("prop1=val2", "prop3=val1", "prop7=val7")); writer.AddDocument(Doc("prop1=val2", "prop3=val2", "prop3=val3")); writer.AddDocument(Doc("prop1=val1", "prop2=val1")); writer.AddDocument(Doc("prop1=val1", "prop2=val1")); writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop4=val2", "prop4=val3")); writer.Commit(); IDictionary <string, string> facetProps = new Dictionary <string, string>(); facetProps.Put(AttributesFacetHandler.MAX_FACETS_PER_KEY_PROP_NAME, "1"); attributesFacetHandler = new AttributesFacetHandler(AttributeHandlerName, AttributeHandlerName, null, null, facetProps); facetHandlers.Add(attributesFacetHandler); DirectoryReader reader = DirectoryReader.Open(directory); boboReader = BoboMultiReader.GetInstance(reader, facetHandlers); foreach (BoboSegmentReader subReader in boboReader.GetSubReaders()) { attributesFacetHandler.LoadFacetData(subReader); } browser = new BoboBrowser(boboReader); }
public override void TearDown() { Iw.Dispose(); TestUtil.CheckIndex(Dir); // for some extra coverage, checkIndex before we forceMerge Iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); IndexWriter iw = new IndexWriter(Dir, (IndexWriterConfig)Iwc.Clone()); iw.ForceMerge(1); iw.Dispose(); Dir.Dispose(); // just force a checkindex for now base.TearDown(); }
/// <summary> /// Remove facet documents from the existing index. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param> /// <param name="directoryFacetInfo">The directory infomation where the facet files are to be placed.</param> /// <param name="textNames">The array of names for text data.</param> /// <param name="filePaths">The array of full paths (without root 'C:\'. e.g. 'temp/http/index.html') for file documents.</param> public void RemoveMultiFacetDocuments(DirectoryInfo directoryIndexInfo, DirectoryInfo directoryFacetInfo, string[] textNames, string[] filePaths) { Lucene.Net.Index.IndexWriter writer = null; DirectoryTaxonomyWriter facetWriter = null; Lucene.Net.Store.Directory directory = null; Lucene.Net.Store.Directory directoryFacet = null; try { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); // Open existing or create new. writer = new IndexWriter(directory, indexConfig); // Create the facet writer. directoryFacet = FSDirectory.Open(directoryFacetInfo); facetWriter = new DirectoryTaxonomyWriter(directoryFacet, IndexWriterConfig.OpenMode_e.APPEND); // Create the delet query. FacetFilter filter = new FacetFilter(); Query[] queries = filter.RemoveDocuments(textNames, filePaths); writer.DeleteDocuments(queries); // Commit the index. writer.Commit(); facetWriter.Commit(); } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (directory != null) { directory.Dispose(); } } }
public void LongPointTest() { var analyzer = new WhitespaceAnalyzer(); var iwc = new IndexWriterConfig(analyzer); iwc.SetOpenMode(IndexWriterConfigOpenMode.CREATE); //インデックス作成--------------------------------------------- DateTime baseDate = DateTime.Parse("2020/07/16 08:00:00"); var ram = new RAMDirectory(); var writer = new IndexWriter(ram, iwc); try { for (int i = 0; i < 10; i++) { var doc = new Document(); doc.Add(new TextField("text", "hoge foo", FieldStore.YES)); DateTime tmp = baseDate.AddDays(i); long l = long.Parse(tmp.ToString("yyyyMMddHHmmss")); doc.Add(new LongPoint("date", l)); doc.Add(new StoredField("date", l)); writer.AddDocument(doc); } } finally { writer.Close(); } //検索------------------------------------------------------------ TermQuery tq = new TermQuery(new Term("text", "foo")); Query rq = LongPoint.NewRangeQuery("date", 20200717000000, 20200719000000); BooleanQueryBuilder b = new BooleanQueryBuilder(); b.Add(tq, BooleanClauseOccur.MUST); //AND条件 b.Add(rq, BooleanClauseOccur.FILTER); //AND条件(スコアリングに関与しない) Query q = b.Build(); DirectoryReader dr = DirectoryReader.Open(ram); IndexSearcher searcher = new IndexSearcher(dr); ScoreDoc[] hits = searcher.Search(q, 100).ScoreDocs; for (int i = 0; i < hits.Length; i++) { var doc = searcher.Doc(hits[i].Doc); Debug.WriteLine(DateTime.ParseExact(doc.Get("date"), "yyyyMMddHHmmss", null)); } Assert.AreEqual(hits.Length, 2); }
/// <summary> /// Override this to customize index settings, e.g. which /// codec to use. /// </summary> protected internal virtual IndexWriterConfig GetIndexWriterConfig(LuceneVersion matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode_e openMode) { IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer); iwc.SetCodec(new Lucene46Codec()); iwc.SetOpenMode(openMode); // This way all merged segments will be sorted at // merge time, allow for per-segment early termination // when those segments are searched: iwc.SetMergePolicy(new SortingMergePolicy(iwc.MergePolicy, SORT)); return(iwc); }
//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Ensures that open writer. </summary> /// /// <remarks> Semantika d.o.o.,. </remarks> /// /// <param name="recreateIndex"> (Optional) True to recreate index. </param> /// <param name="indexType"> (Optional) Type of the index. </param> //////////////////////////////////////////////////////////////////////////////////////////////////// private void EnsureOpenWriter(bool recreateIndex = false, IndexType?indexType = null) { if (IsReadOnly) { throw new InvalidOperationException("The Index has been open as read only!"); } //If we get the type of index we need, we open just that type, otherwise we open all supported types IEnumerable <IndexType> typesToOpen = GetIndexTypesArray(indexType); //lock (m_indexIOLock) { //Go through al lrequested index types foreach (var currentIndexType in typesToOpen) { var currentManagementObjects = m_managementObjects[currentIndexType]; //Create writer instances if (currentManagementObjects.Writer == null || currentManagementObjects.Writer.IsClosed) { //Configure the basic index configuration to use the main analyzer, and commit everything on close IndexWriterConfig iwic = new IndexWriterConfig(LuceneVersion.LUCENE_48, new StandardAnalyzer(LuceneVersion.LUCENE_48)); if (recreateIndex) { iwic.SetOpenMode(OpenMode.CREATE); } else { iwic.SetOpenMode(OpenMode.CREATE_OR_APPEND); } iwic.SetMaxThreadStates(12); currentManagementObjects.Writer = new IndexWriter(currentManagementObjects.Directory, iwic); } } } }
/// <summary> /// Remove documents from the existing index. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param> /// <param name="directoryInfo">The top level relative directory information where all the files that are to be removed are located.</param> /// <param name="files">The array of all files that are to be removed relative to the directory info.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be removed.</param> public void RemoveDocuments(DirectoryInfo directoryIndexInfo, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents) { Lucene.Net.Index.IndexWriter writer = null; Lucene.Net.Store.Directory directory = null; try { if (documents != null) { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); // Open existing or create new. writer = new IndexWriter(directory, indexConfig); // Create the directory filter. DirectoryFilter filter = new DirectoryFilter(); Query[] queries = filter.RemoveDocuments(directoryInfo, files, documents); writer.DeleteDocuments(queries); // Commit the index. writer.Commit(); } } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (directory != null) { directory.Dispose(); } } }
private void Write(string indexName, Action <IndexWriter> action, bool close = false) { var writer = _writers.GetOrAdd(indexName, name => { var directory = CreateDirectory(indexName); var config = new IndexWriterConfig(LuceneVersion, new StandardAnalyzer(LuceneVersion)); config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND); return(new IndexWriter(directory, config)); }); action?.Invoke(writer); if (close) { _writers.TryRemove(indexName, out writer); writer.Dispose(); } }
public void Init() { facetHandlers = new List <IFacetHandler>(); directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(LuceneVersion.LUCENE_48); IndexWriterConfig config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); config.SetOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); Document doc = new Document(); AddMetaDataField(doc, PathHandlerName, new String[] { "/a/b/c", "/a/b/d" }); writer.AddDocument(doc); writer.Commit(); PathFacetHandler pathHandler = new PathFacetHandler("path", true); facetHandlers.Add(pathHandler); }
/// <summary> /// Create a new index store within the specified directory. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are to be placed.</param> public void CreateIndex(DirectoryInfo directoryIndexInfo) { Lucene.Net.Index.IndexWriter writer = null; Lucene.Net.Store.Directory directory = null; try { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); // Create the new index. writer = new IndexWriter(directory, indexConfig); // Commit the index. writer.Commit(); } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (directory != null) { directory.Dispose(); } } }
public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes) { this.OuterInstance = outerInstance; MyNodeID = nodeID; Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase")); // TODO: set warmer MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetOpenMode(OpenMode.CREATE); if (VERBOSE) { iwc.SetInfoStream(new PrintStreamInfoStream(Console.Out)); } Writer = new IndexWriter(Dir, iwc); Mgr = new SearcherManager(Writer, true, null); Searchers = new SearcherLifetimeManager(); // Init w/ 0s... caller above will do initial // "broadcast" by calling initSearcher: CurrentNodeVersions = new long[numNodes]; }
/// <summary> /// Add text to the existing index. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param> /// <param name="addTextData">The text data to add.</param> public void AddText(DirectoryInfo directoryIndexInfo, AddTextData[] addTextData) { Lucene.Net.Index.IndexWriter writer = null; Lucene.Net.Store.Directory directory = null; long totalTextLength = 0; long maxTextLengthBeforeCommit = 30000000L; try { // If text exists. if (addTextData != null && addTextData.Length > 0) { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); // Open existing or create new. writer = new IndexWriter(directory, indexConfig); // Add the text. FieldType nameFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // Add the text. FieldType completeFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // Add the text. FieldType textFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = false, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each text. foreach (AddTextData data in addTextData) { // Should the data be stored. completeFieldType.Stored = data.StoreText; // Create the document. Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Field textName = new Field("textname", data.Name.ToLower(), nameFieldType); Lucene.Net.Documents.Field textComplete = new Field("textcomplete", data.Text.ToLower(), completeFieldType); document.Add(textName); document.Add(textComplete); // Split the white spaces from the text. string[] words = data.Text.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field textData = new Field("text", word, textFieldType); document.Add(textData); } } } // Add the document. writer.AddDocument(document.Fields); // Commit after a set number of documents. totalTextLength += (long)data.Text.Length; if (totalTextLength > maxTextLengthBeforeCommit) { // Commit the index. writer.Commit(); totalTextLength = 0; } } // Commit the index. writer.Commit(); } } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (directory != null) { directory.Dispose(); } } }
/// <summary> /// Add documents to the existing index. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param> /// <param name="directoryFacetInfo">The directory infomation where the facet files are to be placed.</param> /// <param name="facetData">The complete facet information used to build the index information.</param> public void AddMultiFacetDocuments(DirectoryInfo directoryIndexInfo, DirectoryInfo directoryFacetInfo, FacetData facetData) { Lucene.Net.Index.IndexWriter writer = null; DirectoryTaxonomyWriter facetWriter = null; Lucene.Net.Store.Directory directory = null; Lucene.Net.Store.Directory directoryFacet = null; try { if (facetData != null) { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); // Open existing or create new. writer = new IndexWriter(directory, indexConfig); // Create the facet writer. directoryFacet = FSDirectory.Open(directoryFacetInfo); facetWriter = new DirectoryTaxonomyWriter(directoryFacet, IndexWriterConfig.OpenMode_e.APPEND); // Create the facet filter. FacetFilter filter = new FacetFilter(); filter.AddDocuments(writer, facetWriter, facetData); // Commit the index. writer.Commit(); facetWriter.Commit(); } } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (facetWriter != null) { facetWriter.Dispose(); } if (directory != null) { directory.Dispose(); } if (directoryFacet != null) { directoryFacet.Dispose(); } } }
public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes) { this.OuterInstance = outerInstance; MyNodeID = nodeID; Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase")); // TODO: set warmer MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); if (VERBOSE) { iwc.InfoStream = new PrintStreamInfoStream(Console.Out); } Writer = new IndexWriter(Dir, iwc); Mgr = new SearcherManager(Writer, true, null); Searchers = new SearcherLifetimeManager(); // Init w/ 0s... caller above will do initial // "broadcast" by calling initSearcher: CurrentNodeVersions = new long[numNodes]; }
// LUCENE-5461 public virtual void TestCRTReopen() { //test behaving badly //should be high enough int maxStaleSecs = 20; //build crap data just to store it. string s = " abcdefghijklmnopqrstuvwxyz "; char[] chars = s.ToCharArray(); StringBuilder builder = new StringBuilder(2048); for (int i = 0; i < 2048; i++) { builder.Append(chars[Random().Next(chars.Length)]); } string content = builder.ToString(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new MockAnalyzer(Random())); config.SetIndexDeletionPolicy(sdp); config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(dir, config); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); TrackingIndexWriter tiw = new TrackingIndexWriter(iw); ControlledRealTimeReopenThread <IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread <IndexSearcher>(tiw, sm, maxStaleSecs, 0); controlledRealTimeReopenThread.SetDaemon(true); controlledRealTimeReopenThread.Start(); IList <Thread> commitThreads = new List <Thread>(); for (int i = 0; i < 500; i++) { if (i > 0 && i % 50 == 0) { Thread commitThread = new Thread(new RunnableAnonymousInnerClassHelper(this, sdp, dir, iw)); commitThread.Start(); commitThreads.Add(commitThread); } Document d = new Document(); d.Add(new TextField("count", i + "", Field.Store.NO)); d.Add(new TextField("content", content, Field.Store.YES)); long start = DateTime.Now.Millisecond; long l = tiw.AddDocument(d); controlledRealTimeReopenThread.WaitForGeneration(l); long wait = DateTime.Now.Millisecond - start; Assert.IsTrue(wait < (maxStaleSecs * 1000), "waited too long for generation " + wait); IndexSearcher searcher = sm.Acquire(); TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10); sm.Release(searcher); Assert.AreEqual(1, td.TotalHits); } foreach (Thread commitThread in commitThreads) { commitThread.Join(); } controlledRealTimeReopenThread.Dispose(); sm.Dispose(); iw.Dispose(); dir.Dispose(); }
/// <summary> /// Build the suggest index, using up to the specified /// amount of temporary RAM while building. Note that /// the weights for the suggestions are ignored. /// </summary> public virtual void Build(IInputEnumerator enumerator, double ramBufferSizeMB) { if (enumerator.HasPayloads) { throw new ArgumentException("this suggester doesn't support payloads"); } if (enumerator.HasContexts) { throw new ArgumentException("this suggester doesn't support contexts"); } string prefix = this.GetType().Name; var directory = OfflineSorter.DefaultTempDir(); // LUCENENET specific - using GetRandomFileName() instead of picking a random int DirectoryInfo tempIndexPath = null; while (true) { tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName()))); tempIndexPath.Create(); if (System.IO.Directory.Exists(tempIndexPath.FullName)) { break; } } Directory dir = FSDirectory.Open(tempIndexPath); try { #pragma warning disable 612, 618 IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer); #pragma warning restore 612, 618 iwc.SetOpenMode(OpenMode.CREATE); iwc.SetRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); var ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: if only we had IndexOptions.TERMS_ONLY... ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; ft.Freeze(); Document doc = new Document(); Field field = new Field("body", "", ft); doc.Add(field); totTokens = 0; IndexReader reader = null; bool success = false; count = 0; try { while (enumerator.MoveNext()) { BytesRef surfaceForm = enumerator.Current; field.SetStringValue(surfaceForm.Utf8ToString()); writer.AddDocument(doc); count++; } reader = DirectoryReader.Open(writer, false); Terms terms = MultiFields.GetTerms(reader, "body"); if (terms == null) { throw new ArgumentException("need at least one suggestion"); } // Move all ngrams into an FST: TermsEnum termsEnum = terms.GetEnumerator(null); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); Int32sRef scratchInts = new Int32sRef(); while (termsEnum.MoveNext()) { BytesRef term = termsEnum.Term; int ngramCount = CountGrams(term); if (ngramCount > grams) { throw new ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams); } if (ngramCount == 1) { totTokens += termsEnum.TotalTermFreq; } builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq)); } fst = builder.Finish(); if (fst == null) { throw new ArgumentException("need at least one suggestion"); } //System.out.println("FST: " + fst.getNodeCount() + " nodes"); /* * PrintWriter pw = new PrintWriter("/x/tmp/out.dot"); * Util.toDot(fst, pw, true, true); * pw.close(); */ success = true; } finally { if (success) { IOUtils.Dispose(writer, reader); } else { IOUtils.DisposeWhileHandlingException(writer, reader); } } } finally { try { IOUtils.Dispose(dir); } finally { // LUCENENET specific - since we are removing the entire directory anyway, // it doesn't make sense to first do a loop in order remove the files. // Let the System.IO.Directory.Delete() method handle that. // We also need to dispose the Directory instance first before deleting from disk. try { System.IO.Directory.Delete(tempIndexPath.FullName, true); } catch (Exception e) { throw new InvalidOperationException("failed to remove " + tempIndexPath, e); } } } }
public virtual void TestCRTReopen() { //test behaving badly //should be high enough int maxStaleSecs = 20; //build crap data just to store it. string s = " abcdefghijklmnopqrstuvwxyz "; char[] chars = s.ToCharArray(); StringBuilder builder = new StringBuilder(2048); for (int i = 0; i < 2048; i++) { builder.Append(chars[Random.Next(chars.Length)]); } string content = builder.ToString(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128); IndexWriterConfig config = new IndexWriterConfig( #pragma warning disable 612, 618 Version.LUCENE_46, #pragma warning restore 612, 618 new MockAnalyzer(Random)); config.SetIndexDeletionPolicy(sdp); config.SetOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(dir, config); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); TrackingIndexWriter tiw = new TrackingIndexWriter(iw); ControlledRealTimeReopenThread <IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread <IndexSearcher>(tiw, sm, maxStaleSecs, 0); controlledRealTimeReopenThread.IsBackground = (true); controlledRealTimeReopenThread.Start(); IList <ThreadJob> commitThreads = new JCG.List <ThreadJob>(); for (int i = 0; i < 500; i++) { if (i > 0 && i % 50 == 0) { ThreadJob commitThread = new RunnableAnonymousClass(this, sdp, dir, iw); commitThread.Start(); commitThreads.Add(commitThread); } Document d = new Document(); d.Add(new TextField("count", i + "", Field.Store.NO)); d.Add(new TextField("content", content, Field.Store.YES)); long start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results long l = tiw.AddDocument(d); controlledRealTimeReopenThread.WaitForGeneration(l); long wait = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - start; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results assertTrue("waited too long for generation " + wait, wait < (maxStaleSecs * 1000)); IndexSearcher searcher = sm.Acquire(); TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10); sm.Release(searcher); assertEquals(1, td.TotalHits); } foreach (ThreadJob commitThread in commitThreads) { commitThread.Join(); } controlledRealTimeReopenThread.Dispose(); sm.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(Random, dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.StoreTermVectors = true; docsOnlyType.IndexOptions = IndexOptions.DOCS_ONLY; FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.StoreTermVectors = true; docsAndFreqsType.IndexOptions = IndexOptions.DOCS_AND_FREQS; FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.StoreTermVectors = true; positionsType.StoreTermVectorPositions = true; positionsType.StoreTermVectorOffsets = true; positionsType.StoreTermVectorPayloads = true; FieldType offsetsType = new FieldType(positionsType); offsetsType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.Add(field1); doc.Add(field2); doc.Add(field3); doc.Add(field4); doc.Add(field5); doc.Add(field6); doc.Add(field7); doc.Add(field8); for (int i = 0; i < MAXDOC; i++) { string stringValue = Convert.ToString(i) + " verycommon " + English.Int32ToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random); field1.SetStringValue(stringValue); field2.SetStringValue(stringValue); field3.SetStringValue(stringValue); field4.SetStringValue(stringValue); field5.SetStringValue(stringValue); field6.SetStringValue(stringValue); field7.SetStringValue(stringValue); field8.SetStringValue(stringValue); iw.AddDocument(doc); } iw.Dispose(); Verify(dir); TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.SetOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); iw2.ForceMerge(1); iw2.Dispose(); Verify(dir); dir.Dispose(); }
/// <summary> /// Build the suggest index, using up to the specified /// amount of temporary RAM while building. Note that /// the weights for the suggestions are ignored. /// </summary> public virtual void Build(IInputIterator iterator, double ramBufferSizeMB) { if (iterator.HasPayloads) { throw new System.ArgumentException("this suggester doesn't support payloads"); } if (iterator.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } string prefix = this.GetType().Name; var directory = OfflineSorter.DefaultTempDir(); // TODO: messy ... java7 has Files.createTempDirectory // ... but 4.x is java6: DirectoryInfo tempIndexPath = null; Random random = new Random(); while (true) { tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + random.Next(int.MaxValue))); tempIndexPath.Create(); if (System.IO.Directory.Exists(tempIndexPath.FullName)) { break; } } using (Directory dir = FSDirectory.Open(tempIndexPath)) { #pragma warning disable 612, 618 IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer); #pragma warning restore 612, 618 iwc.SetOpenMode(OpenMode.CREATE); iwc.SetRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); var ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: if only we had IndexOptions.TERMS_ONLY... ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; ft.Freeze(); Document doc = new Document(); Field field = new Field("body", "", ft); doc.Add(field); totTokens = 0; IndexReader reader = null; bool success = false; count = 0; try { while (true) { BytesRef surfaceForm = iterator.Next(); if (surfaceForm == null) { break; } field.SetStringValue(surfaceForm.Utf8ToString()); writer.AddDocument(doc); count++; } reader = DirectoryReader.Open(writer, false); Terms terms = MultiFields.GetTerms(reader, "body"); if (terms == null) { throw new System.ArgumentException("need at least one suggestion"); } // Move all ngrams into an FST: TermsEnum termsEnum = terms.GetIterator(null); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); Int32sRef scratchInts = new Int32sRef(); while (true) { BytesRef term = termsEnum.Next(); if (term == null) { break; } int ngramCount = CountGrams(term); if (ngramCount > grams) { throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams); } if (ngramCount == 1) { totTokens += termsEnum.TotalTermFreq; } builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq)); } fst = builder.Finish(); if (fst == null) { throw new System.ArgumentException("need at least one suggestion"); } //System.out.println("FST: " + fst.getNodeCount() + " nodes"); /* * PrintWriter pw = new PrintWriter("/x/tmp/out.dot"); * Util.toDot(fst, pw, true, true); * pw.close(); */ success = true; } finally { try { if (success) { IOUtils.Dispose(writer, reader); } else { IOUtils.DisposeWhileHandlingException(writer, reader); } } finally { foreach (string file in dir.ListAll()) { FileInfo path = new FileInfo(Path.Combine(tempIndexPath.FullName, file)); try { path.Delete(); } catch (Exception e) { throw new InvalidOperationException("failed to remove " + path, e); } } try { tempIndexPath.Delete(); } catch (Exception e) { throw new InvalidOperationException("failed to remove " + tempIndexPath, e); } } } } }