public static void BeforeClassCountingFacetsAggregatorTest() { indexDir = NewDirectory(); taxoDir = NewDirectory(); // create an index which has: // 1. Segment with no categories, but matching results // 2. Segment w/ categories, but no results // 3. Segment w/ categories and results // 4. Segment w/ categories, but only some results IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); //conf.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges, so we can control the index segments IndexWriter indexWriter = new IndexWriter(indexDir, conf); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); allExpectedCounts = newCounts(); termExpectedCounts = newCounts(); // segment w/ no categories IndexDocsNoFacets(indexWriter); // segment w/ categories, no content IndexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and content IndexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and some content IndexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); IOUtils.Close(indexWriter, taxoWriter); }
private void CreateIndex(EditViewArticle article) { //using (var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48)) //{ var options = new Lucene.Net.Index.IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, null) { OpenMode = Lucene.Net.Index.OpenMode.CREATE }; //using (var indexWriter = new Lucene.Net.Index.IndexWriter(_directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED)) using (var indexWriter = new Lucene.Net.Index.IndexWriter(_directory, options)) { var document = new Lucene.Net.Documents.Document { new Lucene.Net.Documents.TextField("Id", article.Id.ToString(), Lucene.Net.Documents.Field.Store.YES), new Lucene.Net.Documents.TextField("Title", article.Title, Lucene.Net.Documents.Field.Store.YES), // HTML文本 // old版本//document.Add(new Lucene.Net.Documents.Field("Contents", article.Contents, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); //document.Add(new Lucene.Net.Documents.TextField("Contents", article.Contents, Lucene.Net.Documents.Field.Store.YES)); // 纯文本 new Lucene.Net.Documents.TextField("TContents", article.Summary, Lucene.Net.Documents.Field.Store.YES), new Lucene.Net.Documents.TextField("CreateTime", article.CreateTime.ToString(), Lucene.Net.Documents.Field.Store.YES) }; indexWriter.AddDocument(document, this.CreateAnalyzer()); indexWriter.Commit(); } //} }
public override void SetUp() { base.SetUp(); Dir = NewFSDirectory(CreateTempDir("testDFBlockSize")); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); Iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); Iw = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)Iwc.Clone()); Iw.RandomForceMerge = false; // we will ourselves }
public void TestCustomMergeScheduler() { // we don't really need to execute anything, just to make sure the custom MS // compiles. But ensure that it can be used as well, e.g., no other hidden // dependencies or something. Therefore, don't use any random API ! Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMergeScheduler(new ReportingMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.ForceMerge(1); writer.Dispose(); dir.Dispose(); }
} // End Function GetWrappedAnalyzer private static void BuildIndex(string indexPath, System.Collections.Generic.IEnumerable <string> dataToIndex) { Lucene.Net.Util.LuceneVersion version = Lucene.Net.Util.LuceneVersion.LUCENE_48; Lucene.Net.Store.Directory luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(version); // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version); // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Core.KeywordAnalyzer(); Lucene.Net.Analysis.Analyzer analyzer = GetWrappedAnalyzer(); Lucene.Net.Index.IndexWriterConfig writerConfig = new Lucene.Net.Index.IndexWriterConfig(version, analyzer); writerConfig.OpenMode = Lucene.Net.Index.OpenMode.CREATE; // Overwrite, if exists using (Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, writerConfig)) { foreach (string thisValue in dataToIndex) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); string directory_name = System.IO.Path.GetDirectoryName(thisValue); string file_name = System.IO.Path.GetFileName(thisValue); string filename_no_extension = System.IO.Path.GetFileNameWithoutExtension(thisValue); string extension = System.IO.Path.GetExtension(thisValue); // StringField indexes but doesn't tokenize doc.Add(new Lucene.Net.Documents.StringField("full_name", thisValue, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("directory_name", directory_name, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("file_name", file_name, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("filename_no_extension", filename_no_extension, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("extension", extension, Lucene.Net.Documents.Field.Store.YES)); // doc.Add( new Lucene.Net.Documents.TextField("favoritePhrase", thisValue, Lucene.Net.Documents.Field.Store.YES) ); writer.AddDocument(doc); } // Next thisValue // writer.Optimize(); writer.Flush(true, true); } // Dispose needs to be called, otherwise the index cannot be read ... } // End Sub BuildIndex
public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes) { this.OuterInstance = outerInstance; MyNodeID = nodeID; Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase")); // TODO: set warmer MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); if (VERBOSE) { iwc.InfoStream = new PrintStreamInfoStream(Console.Out); } Writer = new IndexWriter(Dir, iwc); Mgr = new SearcherManager(Writer, true, null); Searchers = new SearcherLifetimeManager(); // Init w/ 0s... caller above will do initial // "broadcast" by calling initSearcher: CurrentNodeVersions = new long[numNodes]; }
protected override IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config) { return iw; }
protected override IndexWriterConfig CreateIndexWriterConfig(IndexWriterConfig.OpenMode_e openMode) { IndexWriterConfig conf = base.CreateIndexWriterConfig(openMode); LogMergePolicy lmp = (LogMergePolicy)conf.MergePolicy; lmp.MergeFactor = 2; return conf; }
public virtual void TestBooleanSpanQuery() { bool failed = false; int hits = 0; Directory directory = NewDirectory(); Analyzer indexerAnalyzer = new MockAnalyzer(Random()); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); IndexWriter writer = new IndexWriter(directory, config); string FIELD = "content"; Document d = new Document(); d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher searcher = NewSearcher(indexReader); BooleanQuery query = new BooleanQuery(); SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); query.Add(sq1, BooleanClause.Occur.SHOULD); query.Add(sq2, BooleanClause.Occur.SHOULD); TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(query, collector); hits = collector.TopDocs().ScoreDocs.Length; foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs) { Console.WriteLine(scoreDoc.Doc); } indexReader.Dispose(); Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries"); Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries"); directory.Dispose(); }
public LatchedIndexWriter(Directory d, IndexWriterConfig conf, CountDownLatch latch, CountDownLatch signal) : base(d, conf) { this.Latch = latch; this.Signal = signal; }
// LUCENE-5461 public virtual void TestCRTReopen() { //test behaving badly //should be high enough int maxStaleSecs = 20; //build crap data just to store it. string s = " abcdefghijklmnopqrstuvwxyz "; char[] chars = s.ToCharArray(); StringBuilder builder = new StringBuilder(2048); for (int i = 0; i < 2048; i++) { builder.Append(chars[Random().Next(chars.Length)]); } string content = builder.ToString(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new MockAnalyzer(Random())); config.SetIndexDeletionPolicy(sdp); config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(dir, config); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); TrackingIndexWriter tiw = new TrackingIndexWriter(iw); ControlledRealTimeReopenThread<IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread<IndexSearcher>(tiw, sm, maxStaleSecs, 0); controlledRealTimeReopenThread.SetDaemon(true); controlledRealTimeReopenThread.Start(); IList<Thread> commitThreads = new List<Thread>(); for (int i = 0; i < 500; i++) { if (i > 0 && i % 50 == 0) { Thread commitThread = new Thread(new RunnableAnonymousInnerClassHelper(this, sdp, dir, iw)); commitThread.Start(); commitThreads.Add(commitThread); } Document d = new Document(); d.Add(new TextField("count", i + "", Field.Store.NO)); d.Add(new TextField("content", content, Field.Store.YES)); long start = DateTime.Now.Millisecond; long l = tiw.AddDocument(d); controlledRealTimeReopenThread.WaitForGeneration(l); long wait = DateTime.Now.Millisecond - start; Assert.IsTrue(wait < (maxStaleSecs * 1000), "waited too long for generation " + wait); IndexSearcher searcher = sm.Acquire(); TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10); sm.Release(searcher); Assert.AreEqual(1, td.TotalHits); } foreach (Thread commitThread in commitThreads) { commitThread.Join(); } controlledRealTimeReopenThread.Dispose(); sm.Dispose(); iw.Dispose(); dir.Dispose(); }
private static void Populate(Directory directory, IndexWriterConfig config) { RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, config); for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) { Document document = new Document(); for (int f = 0; f < NUMBER_OF_FIELDS; f++) { document.Add(NewStringField("field" + f, Text, Field.Store.NO)); } writer.AddDocument(document); } writer.ForceMerge(1); writer.Dispose(); }
private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf) { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain // files conf.SetMergePolicy(logByteSizeMergePolicy); IndexWriter writer = new IndexWriter(dir, conf); return writer; }
public override void Dispose() { lock (this) { // files that we tried to delete, but couldn't because readers were open. // all that matters is that we tried! (they will eventually go away) ISet<string> pendingDeletions = new HashSet<string>(OpenFilesDeleted); MaybeYield(); if (OpenFiles == null) { OpenFiles = new Dictionary<string, int>(); OpenFilesDeleted = new HashSet<string>(); } if (OpenFiles.Count > 0) { // print the first one as its very verbose otherwise Exception cause = null; IEnumerator<Exception> stacktraces = OpenFileHandles.Values.GetEnumerator(); if (stacktraces.MoveNext()) { cause = stacktraces.Current; } // RuntimeException instead ofSystem.IO.IOException because // super() does not throwSystem.IO.IOException currently: throw new Exception("MockDirectoryWrapper: cannot close: there are still open files: " + String.Join(" ,", OpenFiles.ToArray().Select(x => x.Key)), cause); } if (OpenLocks.Count > 0) { throw new Exception("MockDirectoryWrapper: cannot close: there are still open locks: " + String.Join(" ,", OpenLocks.ToArray())); } IsOpen = false; if (CheckIndexOnClose) { RandomIOExceptionRate_Renamed = 0.0; RandomIOExceptionRateOnOpen_Renamed = 0.0; if (DirectoryReader.IndexExists(this)) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nNOTE: MockDirectoryWrapper: now crush"); } Crash(); // corrupt any unsynced-files if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nNOTE: MockDirectoryWrapper: now run CheckIndex"); } TestUtil.CheckIndex(this, CrossCheckTermVectorsOnClose); // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles if (AssertNoUnreferencedFilesOnClose) { // now look for unreferenced files: discount ones that we tried to delete but could not HashSet<string> allFiles = new HashSet<string>(Arrays.AsList(ListAll())); allFiles.RemoveAll(pendingDeletions); string[] startFiles = allFiles.ToArray(/*new string[0]*/); IndexWriterConfig iwc = new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null); iwc.SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); (new IndexWriter(@in, iwc)).Rollback(); string[] endFiles = @in.ListAll(); ISet<string> startSet = new SortedSet<string>(Arrays.AsList(startFiles)); ISet<string> endSet = new SortedSet<string>(Arrays.AsList(endFiles)); if (pendingDeletions.Contains("segments.gen") && endSet.Contains("segments.gen")) { // this is possible if we hit an exception while writing segments.gen, we try to delete it // and it ends out in pendingDeletions (but IFD wont remove this). startSet.Add("segments.gen"); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MDW: Unreferenced check: Ignoring segments.gen that we could not delete."); } } // its possible we cannot delete the segments_N on windows if someone has it open and // maybe other files too, depending on timing. normally someone on windows wouldnt have // an issue (IFD would nuke this stuff eventually), but we pass NoDeletionPolicy... foreach (string file in pendingDeletions) { if (file.StartsWith("segments") && !file.Equals("segments.gen") && endSet.Contains(file)) { startSet.Add(file); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MDW: Unreferenced check: Ignoring segments file: " + file + " that we could not delete."); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(@in, file); } catch (System.IO.IOException ioe) { // OK: likely some of the .si files were deleted } try { ISet<string> ghosts = new HashSet<string>(sis.Files(@in, false)); foreach (string s in ghosts) { if (endSet.Contains(s) && !startSet.Contains(s)) { Debug.Assert(pendingDeletions.Contains(s)); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MDW: Unreferenced check: Ignoring referenced file: " + s + " " + "from " + file + " that we could not delete."); } startSet.Add(s); } } } catch (Exception t) { Console.Error.WriteLine("ERROR processing leftover segments file " + file + ":"); Console.WriteLine(t.ToString()); Console.Write(t.StackTrace); } } } startFiles = startSet.ToArray(/*new string[0]*/); endFiles = endSet.ToArray(/*new string[0]*/); if (!Arrays.Equals(startFiles, endFiles)) { IList<string> removed = new List<string>(); foreach (string fileName in startFiles) { if (!endSet.Contains(fileName)) { removed.Add(fileName); } } IList<string> added = new List<string>(); foreach (string fileName in endFiles) { if (!startSet.Contains(fileName)) { added.Add(fileName); } } string extras; if (removed.Count != 0) { extras = "\n\nThese files were removed: " + removed; } else { extras = ""; } if (added.Count != 0) { extras += "\n\nThese files were added (waaaaaaaaaat!): " + added; } if (pendingDeletions.Count != 0) { extras += "\n\nThese files we had previously tried to delete, but couldn't: " + pendingDeletions; } Debug.Assert(false, "unreferenced files: before delete:\n " + Arrays.ToString(startFiles) + "\n after delete:\n " + Arrays.ToString(endFiles) + extras); } DirectoryReader ir1 = DirectoryReader.Open(this); int numDocs1 = ir1.NumDocs; ir1.Dispose(); (new IndexWriter(this, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null))).Dispose(); DirectoryReader ir2 = DirectoryReader.Open(this); int numDocs2 = ir2.NumDocs; ir2.Dispose(); Debug.Assert(numDocs1 == numDocs2, "numDocs changed after opening/closing IW: before=" + numDocs1 + " after=" + numDocs2); } } } @in.Dispose(); } }
// NOTE: not a test; just here to make sure the code frag // in the javadocs is correct! public virtual void VerifyCompiles() { Analyzer analyzer = null; Directory fsDir = FSDirectory.Open(new DirectoryInfo("/path/to/index")); NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); IndexWriter writer = new IndexWriter(cachedFSDir, conf); }