private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document()); } w.Commit(); w.DeleteDocuments(new MatchAllDocsQuery()); w.Commit(); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open(d, true); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
override public void Run() { try { for (int j = 0; j < Lucene.Net.Index.TestThreadedOptimize.NUM_ITER2; j++) { writerFinal.Optimize(false, null); for (int k = 0; k < 17 * (1 + iFinal); k++) { Document d = new Document(); d.Add(new Field("id", iterFinal + "_" + iFinal + "_" + j + "_" + k, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(iFinal + k), Field.Store.NO, Field.Index.ANALYZED)); writerFinal.AddDocument(d, null); } for (int k = 0; k < 9 * (1 + iFinal); k++) { writerFinal.DeleteDocuments(null, new Term("id", iterFinal + "_" + iFinal + "_" + j + "_" + k)); } writerFinal.Optimize(null); } } catch (System.Exception t) { Enclosing_Instance.setFailed(); System.Console.Out.WriteLine(ThreadClass.Current().Name + ": hit exception"); System.Console.Out.WriteLine(t.StackTrace); } }
/// <summary> /// 任务执行器 /// </summary> public void IndexWriteHandler() { //创建文档索引编写器 writer = new Lucene.Net.Index.IndexWriter(Directorys.IndexDirectory, new ThesaurusAnalyzer(), !File.Exists(Directorys.IndexDirectory + "segments.gen")); //设置最大碎片缓冲 writer.SetMaxBufferedDocs(maxBufferLength); //首次启动优化 writer.Optimize(); int count = 0; //处理循环 while (true) { //处理删除队列 while (deleteQueue.Count > 0 && count < maxBufferLength) { count++; writer.DeleteDocuments(deleteQueue.Dequeue()); } //处理更新队列 while (updateQueue.Count > 0 && count < maxBufferLength) { count++; Lucene.Net.Documents.Document doc = updateQueue.Dequeue(); writer.UpdateDocument(new Lucene.Net.Index.Term("id", doc.Get("id")), doc); } //处理新增队列 while (addQueue.Count > 0 && count < maxBufferLength) { count++; writer.AddDocument(addQueue.Dequeue()); } //如果有入档则保存碎片 if (writer.NumRamDocs() > 0) { writer.Flush(); } //检测处理次数是否达到最大缓冲数,当超过最大缓冲数时优化碎片,否则线程暂停100毫秒 if (count >= maxBufferLength) { writer.Optimize(); count = 0; } else { Thread.Sleep(100); } } }
// test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 public void TestSparseIndex() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.Add(new Field("id", d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.DeleteDocuments(new Term("id", "0")); writer.Close(); IndexReader reader = IndexReader.Open(dir, true); IndexSearcher Search = new IndexSearcher(reader); Assert.True(reader.HasDeletions); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); FieldCacheRangeFilter <sbyte?> fcrf; result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(40, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); }
public virtual void TestQuery() { Directory dir = NewDirectory(); IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, Analyzer).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy())); AddDoc("one", iw, 1f); AddDoc("two", iw, 20f); AddDoc("three four", iw, 300f); IndexReader ir = DirectoryReader.Open(iw, true); IndexSearcher @is = NewSearcher(ir); ScoreDoc[] hits; hits = @is.Search(new MatchAllDocsQuery(), null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual("one", @is.Doc(hits[0].Doc).Get("key")); Assert.AreEqual("two", @is.Doc(hits[1].Doc).Get("key")); Assert.AreEqual("three four", @is.Doc(hits[2].Doc).Get("key")); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = @is.Search(bq, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = @is.Search(bq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); iw.DeleteDocuments(new Term("key", "one")); ir.Dispose(); ir = DirectoryReader.Open(iw, true); @is = NewSearcher(ir); hits = @is.Search(new MatchAllDocsQuery(), null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); iw.Dispose(); ir.Dispose(); dir.Dispose(); }
public bool RemoveMember(int memberId) { try { Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(_indexFileLocation, false); Lucene.Net.Index.IndexWriter indexWriter = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), false); Lucene.Net.Index.Term idTerm = new Lucene.Net.Index.Term("MemberID", memberId.ToString()); indexWriter.DeleteDocuments(idTerm); indexWriter.Commit(); indexWriter.Close(); } catch (Exception er) { return(false); } return(true); }
// test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 public void TestSparseIndex() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.Add(new Field("id",d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body","body", Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.DeleteDocuments(new Term("id","0")); writer.Close(); IndexReader reader = IndexReader.Open(dir, true); IndexSearcher Search = new IndexSearcher(reader); Assert.True(reader.HasDeletions); ScoreDoc[] result; Query q = new TermQuery(new Term("body","body")); FieldCacheRangeFilter<sbyte?> fcrf; result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(40, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual( 20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual( 20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual( 11, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual( 11, result.Length, "find all"); }
/// <summary> /// ����ִ���� /// </summary> public void IndexWriteHandler() { //�����ĵ�������д�� writer = new Lucene.Net.Index.IndexWriter(Directorys.IndexDirectory, new ThesaurusAnalyzer(), !File.Exists(Directorys.IndexDirectory + "segments.gen")); //���������Ƭ���� writer.SetMaxBufferedDocs(maxBufferLength); //�״�����Ż� writer.Optimize(); int count = 0; //����ѭ�� while (true) { //����ɾ������ while (deleteQueue.Count > 0 && count < maxBufferLength) { count++; writer.DeleteDocuments(deleteQueue.Dequeue()); } //������¶��� while (updateQueue.Count > 0 && count < maxBufferLength) { count++; Lucene.Net.Documents.Document doc = updateQueue.Dequeue(); writer.UpdateDocument(new Lucene.Net.Index.Term("id", doc.Get("id")), doc); } //������������ while (addQueue.Count > 0 && count < maxBufferLength) { count++; writer.AddDocument(addQueue.Dequeue()); } //������뵵����Ƭ if (writer.NumRamDocs() > 0) { writer.Flush(); } //��������Ƿ�ﵽ�����,�����������ʱ�Ż���Ƭ,�����߳���ͣ100���� if (count >= maxBufferLength) { writer.Optimize(); count = 0; } else { Thread.Sleep(100); } } }
public virtual void TestSparseIndex() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.Add(NewStringField("id", Convert.ToString(d), Field.Store.NO)); doc.Add(NewStringField("body", "body", Field.Store.NO)); writer.AddDocument(doc); } writer.ForceMerge(1); writer.DeleteDocuments(new Term("id", "0")); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher search = NewSearcher(reader); Assert.IsTrue(reader.HasDeletions); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); result = search.Search(q, FieldCacheRangeFilter.NewByteRange("id", (sbyte?)-20, (sbyte?)20, T, T), 100).ScoreDocs; Assert.AreEqual(40, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewByteRange("id", (sbyte?)0, (sbyte?)20, T, T), 100).ScoreDocs; Assert.AreEqual(20, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewByteRange("id", (sbyte?)-20, (sbyte?)0, T, T), 100).ScoreDocs; Assert.AreEqual(20, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewByteRange("id", (sbyte?)10, (sbyte?)20, T, T), 100).ScoreDocs; Assert.AreEqual(11, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewByteRange("id", (sbyte?)-20, (sbyte?)-10, T, T), 100).ScoreDocs; Assert.AreEqual(11, result.Length, "find all"); reader.Dispose(); dir.Dispose(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document()); } w.Commit(); w.DeleteDocuments(new MatchAllDocsQuery()); w.Commit(); if (0 < numDeletedDocs) Assert.IsTrue(w.HasDeletions(), "writer has no deletions"); Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open(d, true); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return d; }
public bool RemoveMember(int memberId) { try { Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(_indexFileLocation, false); Lucene.Net.Index.IndexWriter indexWriter = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), false); Lucene.Net.Index.Term idTerm = new Lucene.Net.Index.Term("MemberID", memberId.ToString()); indexWriter.DeleteDocuments(idTerm); indexWriter.Commit(); indexWriter.Close(); } catch (Exception er) { return false; } return true; }
public virtual void TestPerFieldCodec() { int NUM_DOCS = AtLeast(173); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } using (BaseDirectoryWrapper dir = NewDirectory()) { dir.CheckIndexOnClose = false; // we use a custom codec provider using (IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(new CustomPerFieldCodec()).SetMergePolicy(NewLogMergePolicy(3)))) { Documents.Document doc = new Documents.Document(); // uses default codec: doc.Add(NewTextField("field1", "this field uses the standard codec as the test", Field.Store.NO)); // uses pulsing codec: Field field2 = NewTextField("field2", "this field uses the pulsing codec as the test", Field.Store.NO); doc.Add(field2); Field idField = NewStringField("id", "", Field.Store.NO); doc.Add(idField); for (int i = 0; i < NUM_DOCS; i++) { idField.StringValue = "" + i; w.AddDocument(doc); if ((i + 1) % 10 == 0) { w.Commit(); } } if (VERBOSE) { Console.WriteLine("TEST: now delete id=77"); } w.DeleteDocuments(new Term("id", "77")); using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 1, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); } if (VERBOSE) { Console.WriteLine("\nTEST: now delete 2nd doc"); } w.DeleteDocuments(new Term("id", "44")); if (VERBOSE) { Console.WriteLine("\nTEST: now force merge"); } w.ForceMerge(1); if (VERBOSE) { Console.WriteLine("\nTEST: now open reader"); } using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 2, r.MaxDoc); Assert.AreEqual(NUM_DOCS - 2, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("id", "76")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "77")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "44")), 1).TotalHits); if (VERBOSE) { Console.WriteLine("\nTEST: now close NRT reader"); } } } } }