AddIndexesNoOptimize() public method

Merges all segments from an array of indexes into this index.

This may be used to parallelize batch indexing. A large document collection can be broken into sub-collections. Each sub-collection can be indexed in parallel, on a different thread, process or machine. The complete index can then be created by merging sub-collection indexes with this method.

NOTE: the index in each Directory must not be changed (opened by a writer) while this method is running. This method does not acquire a write lock in each input Directory, so it is up to the caller to enforce this.

NOTE: while this is running, any attempts to add or delete documents (with another thread) will be paused until this method completes.

This method is transactional in how Exceptions are handled: it does not commit a new segments_N file until all indexes are added. This means if an Exception occurs (for example disk full), then either no indexes will have been added or they all will have been.

Note that this requires temporary free space in the Directory up to 2X the sum of all input indexes (including the starting index). If readers/searchers are open against the starting index, then temporary free space required will be higher by the size of the starting index (see Optimize() for details).

Once this completes, the final size of the index will be less than the sum of all input index sizes (including the starting index). It could be quite a bit smaller (if there were many pending deletes) or just slightly smaller.

This requires this index not be among those to be added.

NOTE: if this method hits an OutOfMemoryError you should immediately close the writer. See above for details.

public AddIndexesNoOptimize ( ) : void
return void
 public virtual void  TestLucene()
 {
     
     int num = 100;
     
     Directory indexA = new MockRAMDirectory();
     Directory indexB = new MockRAMDirectory();
     
     FillIndex(indexA, 0, num);
     Assert.IsFalse(VerifyIndex(indexA, 0), "Index a is invalid");
     
     FillIndex(indexB, num, num);
     Assert.IsFalse(VerifyIndex(indexB, num), "Index b is invalid");
     
     Directory merged = new MockRAMDirectory();
     
     IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
     writer.MergeFactor = 2;
     
     writer.AddIndexesNoOptimize(new []{indexA, indexB});
     writer.Optimize();
     writer.Close();
     
     var fail = VerifyIndex(merged, 0);
     merged.Close();
     
     Assert.IsFalse(fail, "The merged index is invalid");
 }
Beispiel #2
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < docFields.Length; i++)
            {
                Document document = new Document();
                document.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document, null);
            }
            writer.Close();
            searcher = new IndexSearcher(directory, true, null);

            // Make big index
            dir2 = new MockRAMDirectory(directory);

            // First multiply small test index:
            mulFactor = 1;
            int docCount = 0;

            do
            {
                Directory   copy        = new RAMDirectory(dir2, null);
                IndexWriter indexWriter = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);
                indexWriter.AddIndexesNoOptimize(null, new[] { copy });
                docCount = indexWriter.MaxDoc();
                indexWriter.Close();
                mulFactor *= 2;
            } while (docCount < 3000);

            IndexWriter w   = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);
            Document    doc = new Document();

            doc.Add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc, null);
            }
            doc = new Document();
            doc.Add(new Field("field2", "big bad bug", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc, null);
            }
            // optimize to 1 segment
            w.Optimize(null);
            reader = w.GetReader(null);
            w.Close();
            bigSearcher = new IndexSearcher(reader);
        }
Beispiel #3
0
        public virtual void  TestNoCopySegments()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(9);
            writer.MergeFactor = 4;
            AddDocs(writer, 2);

            writer.AddIndexesNoOptimize(new Directory[] { aux });
            Assert.AreEqual(1032, writer.MaxDoc());
            Assert.AreEqual(2, writer.GetSegmentCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1032);
        }
        public virtual void  TestSimpleCase()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // two auxiliary directories
            Directory aux  = new RAMDirectory();
            Directory aux2 = new RAMDirectory();

            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            // add 100 documents
            AddDocs(writer, 100);
            Assert.AreEqual(100, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            // add 40 documents in separate files
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux2, true);
            // add 40 documents in compound files
            AddDocs2(writer, 50);
            Assert.AreEqual(50, writer.DocCount());
            writer.Close();

            // test doc count before segments are merged
            writer = NewWriter(dir, false);
            Assert.AreEqual(100, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 });
            Assert.AreEqual(190, writer.DocCount());
            writer.Close();

            // make sure the old index is correct
            VerifyNumDocs(aux, 40);

            // make sure the new index is correct
            VerifyNumDocs(dir, 190);

            // now add another set in.
            Directory aux3 = new RAMDirectory();

            writer = NewWriter(aux3, true);
            // add 40 documents
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.DocCount());
            writer.Close();

            // test doc count before segments are merged/index is optimized
            writer = NewWriter(dir, false);
            Assert.AreEqual(190, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux3 });
            Assert.AreEqual(230, writer.DocCount());
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now optimize it.
            writer = NewWriter(dir, false);
            writer.Optimize();
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now add a single document
            Directory aux4 = new RAMDirectory();

            writer = NewWriter(aux4, true);
            AddDocs2(writer, 1);
            writer.Close();

            writer = NewWriter(dir, false);
            Assert.AreEqual(230, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux4 });
            Assert.AreEqual(231, writer.DocCount());
            writer.Close();

            VerifyNumDocs(dir, 231);

            VerifyTermDocs(dir, new Term("content", "bbb"), 51);
        }
		public virtual void  TestNorms()
		{
			// tmp dir
			System.String tempDir = System.IO.Path.GetTempPath();
			if (tempDir == null)
			{
				throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
			}
			
			// test with a single index: index1
			System.IO.DirectoryInfo indexDir1 = new System.IO.DirectoryInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1"));
			Directory dir1 = FSDirectory.Open(indexDir1);
			IndexWriter.Unlock(dir1);
			
			norms = new System.Collections.ArrayList();
			modifiedNorms = new System.Collections.ArrayList();
			
			CreateIndex(dir1);
			DoTestNorms(dir1);
			
			// test with a single index: index2
			System.Collections.ArrayList norms1 = norms;
			System.Collections.ArrayList modifiedNorms1 = modifiedNorms;
			int numDocNorms1 = numDocNorms;
			
			norms = new System.Collections.ArrayList();
			modifiedNorms = new System.Collections.ArrayList();
			numDocNorms = 0;
			
			System.IO.DirectoryInfo indexDir2 = new System.IO.DirectoryInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2"));
			Directory dir2 = FSDirectory.Open(indexDir2);
			
			CreateIndex(dir2);
			DoTestNorms(dir2);
			
			// add index1 and index2 to a third index: index3
			System.IO.DirectoryInfo indexDir3 = new System.IO.DirectoryInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3"));
			Directory dir3 = FSDirectory.Open(indexDir3);
			
			CreateIndex(dir3);
			IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);
			iw.SetMaxBufferedDocs(5);
			iw.MergeFactor = 3;
			iw.AddIndexesNoOptimize(new Directory[]{dir1, dir2});
            iw.Optimize();
			iw.Close();
			
			norms1.AddRange(norms);
			norms = norms1;
			modifiedNorms1.AddRange(modifiedNorms);
			modifiedNorms = modifiedNorms1;
			numDocNorms += numDocNorms1;
			
			// test with index3
			VerifyIndex(dir3);
			DoTestNorms(dir3);
			
			// now with optimize
			iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);
			iw.SetMaxBufferedDocs(5);
			iw.MergeFactor = 3;
			iw.Optimize();
			iw.Close();
			VerifyIndex(dir3);
			
			dir1.Close();
			dir2.Close();
			dir3.Close();
		}
		public virtual void  TestAddIndexOnDiskFull()
		{
			int START_COUNT = 57;
			int NUM_DIR = 50;
			int END_COUNT = START_COUNT + NUM_DIR * 25;
			
			bool debug = false;
			
			// Build up a bunch of dirs that have indexes which we
			// will then merge together by calling addIndexes(*):
			Directory[] dirs = new Directory[NUM_DIR];
			long inputDiskUsage = 0;
			for (int i = 0; i < NUM_DIR; i++)
			{
				dirs[i] = new RAMDirectory();
				IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true);
				for (int j = 0; j < 25; j++)
				{
					AddDocWithIndex(writer, 25 * i + j);
				}
				writer.Close();
				System.String[] files = dirs[i].List();
				for (int j = 0; j < files.Length; j++)
				{
					inputDiskUsage += dirs[i].FileLength(files[j]);
				}
			}
			
			// Now, build a starting index that has START_COUNT docs.  We
			// will then try to addIndexes into a copy of this:
			RAMDirectory startDir = new RAMDirectory();
			IndexWriter writer2 = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
			for (int j = 0; j < START_COUNT; j++)
			{
				AddDocWithIndex(writer2, j);
			}
			writer2.Close();
			
			// Make sure starting index seems to be working properly:
			Term searchTerm = new Term("content", "aaa");
			IndexReader reader = IndexReader.Open(startDir);
			Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq");
			
			IndexSearcher searcher = new IndexSearcher(reader);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(57, hits.Length(), "first number of hits");
			searcher.Close();
			reader.Close();
			
			// Iterate with larger and larger amounts of free
			// disk space.  With little free disk space,
			// addIndexes will certainly run out of space &
			// fail.  Verify that when this happens, index is
			// not corrupt and index in fact has added no
			// documents.  Then, we increase disk space by 2000
			// bytes each iteration.  At some point there is
			// enough free disk space and addIndexes should
			// succeed and index should show all documents were
			// added.
			
			// String[] files = startDir.list();
			long diskUsage = startDir.SizeInBytes();
			
			long startDiskUsage = 0;
			System.String[] files2 = startDir.List();
			for (int i = 0; i < files2.Length; i++)
			{
				startDiskUsage += startDir.FileLength(files2[i]);
			}
			
			for (int iter = 0; iter < 6; iter++)
			{
				
				if (debug)
					System.Console.Out.WriteLine("TEST: iter=" + iter);
				
				// Start with 100 bytes more than we are currently using:
				long diskFree = diskUsage + 100;
				
				bool autoCommit = iter % 2 == 0;
				int method = iter / 2;
				
				bool success = false;
				bool done = false;
				
				System.String methodName;
				if (0 == method)
				{
					methodName = "addIndexes(Directory[])";
				}
				else if (1 == method)
				{
					methodName = "addIndexes(IndexReader[])";
				}
				else
				{
					methodName = "addIndexesNoOptimize(Directory[])";
				}
				
				while (!done)
				{
					
					// Make a new dir that will enforce disk usage:
					MockRAMDirectory dir = new MockRAMDirectory(startDir);
					writer2 = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
					System.IO.IOException err = null;
					
					MergeScheduler ms = writer2.GetMergeScheduler();
					for (int x = 0; x < 2; x++)
					{
						if (ms is ConcurrentMergeScheduler)
						// This test intentionally produces exceptions
						// in the threads that CMS launches; we don't
						// want to pollute test output with these.
							if (0 == x)
								((ConcurrentMergeScheduler)ms).SetSuppressExceptions_ForNUnitTest();
							else
								((ConcurrentMergeScheduler) ms).ClearSuppressExceptions_ForNUnitTest();
						
						// Two loops: first time, limit disk space &
						// throw random IOExceptions; second time, no
						// disk space limit:
						
						double rate = 0.05;
						double diskRatio = ((double) diskFree) / diskUsage;
						long thisDiskFree;
						
						System.String testName = null;
						
						if (0 == x)
						{
							thisDiskFree = diskFree;
							if (diskRatio >= 2.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 4.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 6.0)
							{
								rate = 0.0;
							}
							if (debug)
								testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes autoCommit=" + autoCommit;
						}
						else
						{
							thisDiskFree = 0;
							rate = 0.0;
							if (debug)
								testName = "disk full test " + methodName + " with unlimited disk space autoCommit=" + autoCommit;
						}
						
						if (debug)
							System.Console.Out.WriteLine("\ncycle: " + testName);
						
						dir.SetMaxSizeInBytes(thisDiskFree);
						dir.SetRandomIOExceptionRate(rate, diskFree);
						
						try
						{
							
							if (0 == method)
							{
								writer2.AddIndexes(dirs);
							}
							else if (1 == method)
							{
								IndexReader[] readers = new IndexReader[dirs.Length];
								for (int i = 0; i < dirs.Length; i++)
								{
									readers[i] = IndexReader.Open(dirs[i]);
								}
								try
								{
									writer2.AddIndexes(readers);
								}
								finally
								{
									for (int i = 0; i < dirs.Length; i++)
									{
										readers[i].Close();
									}
								}
							}
							else
							{
								writer2.AddIndexesNoOptimize(dirs);
							}
							
							success = true;
							if (debug)
							{
								System.Console.Out.WriteLine("  success!");
							}
							
							if (0 == x)
							{
								done = true;
							}
						}
						catch (System.IO.IOException e)
						{
							success = false;
							err = e;
							if (debug)
							{
								System.Console.Out.WriteLine("  hit IOException: " + e);
								System.Console.Out.WriteLine(e.StackTrace);
							}
							
							if (1 == x)
							{
								System.Console.Out.WriteLine(e.StackTrace);
								Assert.Fail(methodName + " hit IOException after disk space was freed up");
							}
						}
						
						// Make sure all threads from
						// ConcurrentMergeScheduler are done
						_TestUtil.SyncConcurrentMerges(writer2);
						
						if (autoCommit)
						{
							
							// Whether we succeeded or failed, check that
							// all un-referenced files were in fact
							// deleted (ie, we did not create garbage).
							// Only check this when autoCommit is true:
							// when it's false, it's expected that there
							// are unreferenced files (ie they won't be
							// referenced until the "commit on close").
							// Just create a new IndexFileDeleter, have it
							// delete unreferenced files, then verify that
							// in fact no files were deleted:
							
							System.String successStr;
							if (success)
							{
								successStr = "success";
							}
							else
							{
								successStr = "IOException";
							}
							System.String message = methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes)";
							AssertNoUnreferencedFiles(dir, message);
						}
						
						if (debug)
						{
							System.Console.Out.WriteLine("  now test readers");
						}
						
						// Finally, verify index is not corrupt, and, if
						// we succeeded, we see all docs added, and if we
						// failed, we see either all docs or no docs added
						// (transactional semantics):
						try
						{
							reader = IndexReader.Open(dir);
						}
						catch (System.IO.IOException e)
						{
							System.Console.Out.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when creating IndexReader: " + e);
						}
						int result = reader.DocFreq(searchTerm);
						if (success)
						{
							if (autoCommit && result != END_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
							}
							else if (!autoCommit && result != START_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]");
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result != START_COUNT && result != END_COUNT)
							{
								System.Console.Out.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
							}
						}
						
						searcher = new IndexSearcher(reader);
						try
						{
							hits = searcher.Search(new TermQuery(searchTerm));
						}
						catch (System.IO.IOException e)
						{
							System.Console.Out.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when searching: " + e);
						}
						int result2 = hits.Length();
						if (success)
						{
							if (result2 != result)
							{
								Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result2 != result)
							{
								System.Console.Out.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
							}
						}
						
						searcher.Close();
						reader.Close();
						if (debug)
						{
							System.Console.Out.WriteLine("  count is " + result);
						}
						
						if (done || result == END_COUNT)
						{
							break;
						}
					}
					
					if (debug)
					{
						System.Console.Out.WriteLine("  start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.GetMaxUsedSizeInBytes());
					}
					
					if (done)
					{
						// Javadocs state that temp free Directory space
						// required is at most 2X total input size of
						// indices so let's make sure:
						Assert.IsTrue(
							(dir.GetMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage),
							"max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.GetMaxUsedSizeInBytes() - startDiskUsage) + " bytes; " + "starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes"
						);
					}
					
					writer2.Close();
					
					// Wait for all BG threads to finish else
					// dir.close() will throw IOException because
					// there are still open files
					_TestUtil.SyncConcurrentMerges(ms);
					
					dir.Close();
					
					// Try again with 2000 more bytes of free space:
					diskFree += 2000;
				}
			}
			
			startDir.Close();
		}
 public virtual void  TestAddIndexes2()
 {
     bool optimize = false;
     
     Directory dir1 = new MockRAMDirectory();
     IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
     writer.SetInfoStream(infoStream);
     
     // create a 2nd index
     Directory dir2 = new MockRAMDirectory();
     IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
     writer2.SetInfoStream(infoStream);
     CreateIndexNoClose(!optimize, "index2", writer2);
     writer2.Close();
     
     writer.AddIndexesNoOptimize(new Directory[]{dir2});
     writer.AddIndexesNoOptimize(new Directory[]{dir2});
     writer.AddIndexesNoOptimize(new Directory[]{dir2});
     writer.AddIndexesNoOptimize(new Directory[]{dir2});
     writer.AddIndexesNoOptimize(new Directory[]{dir2});
     
     IndexReader r1 = writer.GetReader();
     Assert.AreEqual(500, r1.MaxDoc);
     
     r1.Close();
     writer.Close();
     dir1.Close();
 }
 public virtual void  TestAddIndexes()
 {
     bool optimize = false;
     
     Directory dir1 = new MockRAMDirectory();
     IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
     writer.SetInfoStream(infoStream);
     // create the index
     CreateIndexNoClose(!optimize, "index1", writer);
     writer.Flush(false, true, true);
     
     // create a 2nd index
     Directory dir2 = new MockRAMDirectory();
     IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
     writer2.SetInfoStream(infoStream);
     CreateIndexNoClose(!optimize, "index2", writer2);
     writer2.Close();
     
     IndexReader r0 = writer.GetReader();
     Assert.IsTrue(r0.IsCurrent());
     writer.AddIndexesNoOptimize(new Directory[]{dir2});
     Assert.IsFalse(r0.IsCurrent());
     r0.Close();
     
     IndexReader r1 = writer.GetReader();
     Assert.IsTrue(r1.IsCurrent());
     
     writer.Commit();
     Assert.IsFalse(r1.IsCurrent());
     
     Assert.AreEqual(200, r1.MaxDoc);
     
     int index2df = r1.DocFreq(new Term("indexname", "index2"));
     
     Assert.AreEqual(100, index2df);
     
     // verify the docs are from different indexes
     Document doc5 = r1.Document(5);
     Assert.AreEqual("index1", doc5.Get("indexname"));
     Document doc150 = r1.Document(150);
     Assert.AreEqual("index2", doc150.Get("indexname"));
     r1.Close();
     writer.Close();
     dir1.Close();
 }
		public virtual void  TestHangOnClose()
		{
			
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMergePolicy(new LogByteSizeMergePolicy(writer));
			writer.SetMaxBufferedDocs(5);
			writer.UseCompoundFile = false;
			writer.MergeFactor = 100;
			
			Document doc = new Document();
			doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			for (int i = 0; i < 60; i++)
				writer.AddDocument(doc);
			writer.SetMaxBufferedDocs(200);
			Document doc2 = new Document();
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			for (int i = 0; i < 10; i++)
				writer.AddDocument(doc2);
			writer.Close();
			
			Directory dir2 = new MockRAMDirectory();
			writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer);
			lmp.MinMergeMB = 0.0001;
			writer.SetMergePolicy(lmp);
			writer.MergeFactor = 4;
			writer.UseCompoundFile = false;
			writer.SetMergeScheduler(new SerialMergeScheduler());
			writer.AddIndexesNoOptimize(new Directory[]{dir});
			writer.Close();
			dir.Close();
			dir2.Close();
		}
        public virtual void  TestNorms()
        {
            // tmp dir
            System.String tempDir = System.IO.Path.GetTempPath();
            if (tempDir == null)
            {
                throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
            }

            // test with a single index: index1
            System.IO.DirectoryInfo indexDir1 = new System.IO.DirectoryInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1"));
            Directory dir1 = FSDirectory.Open(indexDir1);

            IndexWriter.Unlock(dir1);

            norms         = new System.Collections.ArrayList();
            modifiedNorms = new System.Collections.ArrayList();

            CreateIndex(dir1);
            DoTestNorms(dir1);

            // test with a single index: index2
            System.Collections.ArrayList norms1         = norms;
            System.Collections.ArrayList modifiedNorms1 = modifiedNorms;
            int numDocNorms1 = numDocNorms;

            norms         = new System.Collections.ArrayList();
            modifiedNorms = new System.Collections.ArrayList();
            numDocNorms   = 0;

            System.IO.DirectoryInfo indexDir2 = new System.IO.DirectoryInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2"));
            Directory dir2 = FSDirectory.Open(indexDir2);

            CreateIndex(dir2);
            DoTestNorms(dir2);

            // add index1 and index2 to a third index: index3
            System.IO.DirectoryInfo indexDir3 = new System.IO.DirectoryInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3"));
            Directory dir3 = FSDirectory.Open(indexDir3);

            CreateIndex(dir3);
            IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(5);
            iw.MergeFactor = 3;
            iw.AddIndexesNoOptimize(new Directory[] { dir1, dir2 });
            iw.Optimize();
            iw.Close();

            norms1.AddRange(norms);
            norms = norms1;
            modifiedNorms1.AddRange(modifiedNorms);
            modifiedNorms = modifiedNorms1;
            numDocNorms  += numDocNorms1;

            // test with index3
            VerifyIndex(dir3);
            DoTestNorms(dir3);

            // now with optimize
            iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);
            iw.SetMaxBufferedDocs(5);
            iw.MergeFactor = 3;
            iw.Optimize();
            iw.Close();
            VerifyIndex(dir3);

            dir1.Close();
            dir2.Close();
            dir3.Close();
        }
        public virtual void TestDuringAddIndexes_LuceneNet()
        {
            MockRAMDirectory dir1 = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.SetInfoStream(infoStream);
            writer.SetMergeFactor(2);

            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();

            Directory[] dirs = new Directory[10];
            for (int i = 0; i < 10; i++)
            {
                dirs[i] = new MockRAMDirectory(dir1);
            }

            IndexReader r = writer.GetReader();

            int NUM_THREAD = 5;
            float SECONDS = 3;

            long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);
            System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));
            
            System.Threading.Thread[] threads = new System.Threading.Thread[NUM_THREAD];
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i] = new System.Threading.Thread(() =>
                {
                    while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
                    {
                        try
                        {
                            writer.AddIndexesNoOptimize(dirs);
                        }
                        catch (System.Exception t)
                        {
                            excs.Add(t);
                            throw new System.SystemException("", t);
                        }
                    }
                });
                threads[i].IsBackground = true;
                threads[i].Start();
            }

            int lastCount = 0;
            while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
            {
                using (IndexReader r2 = writer.GetReader())
                {
                    Query q = new TermQuery(new Term("indexname", "test"));
                    int count = new IndexSearcher(r2).Search(q, 10).TotalHits;
                    Assert.IsTrue(count >= lastCount);
                    lastCount = count;
                }
            }

            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i].Join();
            }

            Assert.AreEqual(0, excs.Count);
            r.Close();
            Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count);
            writer.Close();

            _TestUtil.CheckIndex(dir1);

            dir1.Close();
        }
		public virtual void  TestDuringAddIndexes()
		{
            MockRAMDirectory dir1 = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.SetInfoStream(infoStream);
			writer.SetMergeFactor(2);
			
			// create the index
			CreateIndexNoClose(false, "test", writer);
			writer.Commit();
			
			Directory[] dirs = new Directory[10];
			for (int i = 0; i < 10; i++)
			{
				dirs[i] = new MockRAMDirectory(dir1);
			}
			
			IndexReader r = writer.GetReader();
			
			int NUM_THREAD = 5;
			float SECONDS = 3;
			
			long endTime = (long) ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);
			System.Collections.IList excs = (System.Collections.IList) System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));

            System.Threading.Thread[] threads = new System.Threading.Thread[NUM_THREAD];
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i] = new System.Threading.Thread(() =>
                {
                    while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
                    {
                        try
                        {
                            writer.AddIndexesNoOptimize(dirs);
                        }
                        catch (System.Exception t)
                        {
                            excs.Add(t);
                            throw new System.SystemException("", t);
                        }
                    }
                });
                threads[i].IsBackground = true;
                threads[i].Start();
            }
			
			int lastCount = 0;
			while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
			{
                IndexReader r2 = r.Reopen();
                if (r2 != r)
                {
                    r.Close();
                    r = r2;
                }
                Query q = new TermQuery(new Term("indexname", "test"));
                int count = new IndexSearcher(r).Search(q, 10).TotalHits;
                Assert.IsTrue(count >= lastCount);
                lastCount = count;
			}
			
			for (int i = 0; i < NUM_THREAD; i++)
			{
				threads[i].Join();
			}
			
			Assert.AreEqual(0, excs.Count);
            r.Close();
            try
            {
                Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count);
            }
            catch
            {
                //DIGY: 
                //I think this is an expected behaviour.
                //There isn't any pending files to be deleted after "writer.Close()". 
                //But, since lucene.java's test case is designed that way
                //and I might be wrong, I will add a warning
                Assert.Inconclusive("Is this really a bug?", 0, dir1.GetOpenDeletedFiles().Count);
            }
			writer.Close();
			
			_TestUtil.CheckIndex(dir1);
			
			dir1.Close();
		}
        public void TestHangOnClose()
        {
            Directory dir = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true);
            writer.SetMergePolicy(new LogByteSizeMergePolicy());
            writer.SetMaxBufferedDocs(5);
            writer.SetUseCompoundFile(false);
            writer.SetMergeFactor(100);

            Document doc = new Document();
            doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            for (int i = 0; i < 60; i++)
                writer.AddDocument(doc);
            writer.SetMaxBufferedDocs(200);
            Document doc2 = new Document();
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            for (int i = 0; i < 10; i++)
                writer.AddDocument(doc2);
            writer.Close();

            Directory dir2 = new MockRAMDirectory();
            writer = new IndexWriter(dir2, false, new WhitespaceAnalyzer(), true);
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
            lmp.SetMinMergeMB(0.0001);
            writer.SetMergePolicy(lmp);
            writer.SetMergeFactor(4);
            writer.SetUseCompoundFile(false);
            writer.SetMergeScheduler(new SerialMergeScheduler());
            writer.AddIndexesNoOptimize(new Directory[] { dir });
            writer.Close();

            dir.Close();
            dir2.Close();
        }
Beispiel #14
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            for (int i = 0; i < docFields.Length; i++)
            {
                Document document = new Document();
                document.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document);
            }
            writer.Close();
            searcher = new IndexSearcher(directory, true);

            // Make big index
            dir2 = new MockRAMDirectory(directory);

            // First multiply small test index:
            mulFactor = 1;
            int docCount = 0;
            do
            {
                Directory copy = new RAMDirectory(dir2);
                IndexWriter indexWriter = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
                indexWriter.AddIndexesNoOptimize(new[] {copy});
                docCount = indexWriter.MaxDoc();
                indexWriter.Close();
                mulFactor *= 2;
            } while (docCount < 3000);

            IndexWriter w = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            Document doc = new Document();
            doc.Add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc);
            }
            doc = new Document();
            doc.Add(new Field("field2", "big bad bug", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc);
            }
            // optimize to 1 segment
            w.Optimize();
            reader = w.GetReader();
            w.Close();
            bigSearcher = new IndexSearcher(reader);
        }
        private static void MoveIndexFiles(string impDomain, string impUser, string impPass, string destIndexPath, string tempIndexPath, Analyzer analyzer)
        {
            Directory[] readers = new Directory[1];
            IndexWriter writer = null;

            Directory finalIndexDir = FSDirectory.Open(new System.IO.DirectoryInfo(destIndexPath));
            System.IO.DirectoryInfo tempIndexDir = new System.IO.DirectoryInfo(tempIndexPath);
            try
            {
                if (IndexWriter.IsLocked(finalIndexDir)) IndexWriter.Unlock(finalIndexDir);
                // re-generate the index
                writer = new IndexWriter(finalIndexDir, analyzer, true, new IndexWriter.MaxFieldLength(2500000));
                readers[0] = FSDirectory.Open(tempIndexDir);
                writer.AddIndexesNoOptimize(readers);

                // optimize and close
                if (writer != null)
                {
                    try
                    {
                        writer.Optimize();
                    }
                    catch { }
                }
                if (writer != null)
                {
                    try
                    {
                        writer.Close();
                    }
                    catch { }
                }

            }
            catch (Exception ex)
            {
                if (writer != null)
                {
                    writer.Optimize();
                    writer.Commit();
                    writer.Close();
                }
                throw ex;
            }
        }
Beispiel #16
0
        /// <summary>
        /// Creates the index for the bz2 file on a separate thread.
        /// </summary>
        private void CreateIndexAsync()
        {
            bool failed = false;
            startTime = DateTime.Now;

            try
            {
                // Close any searchers

                if (searcher != null)
                {
                    searcher.Close();

                    searcher = null;
                }

                indexExists = false;

                // Create the index writer
                FSDirectory idxDir = FSDirectory.Open(new DirectoryInfo(indexPath));
                indexer = new IndexWriter(idxDir, textAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
                memoryIndexer = new IndexWriter(new RAMDirectory(), textAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

                memoryIndexer.SetMaxBufferedDocs(1000);
                memoryIndexer.SetMergeFactor(100);

                indexer.SetMaxBufferedDocs(1000);
                indexer.SetMergeFactor(100);

                // Locate the bzip2 blocks in the file

                LocateBlocks();

                // Two times more than the first block but not less than 100 bytes

                long bufSize = ((ends[0] - beginnings[0]) / 8) * 2 + 100;

                // Buffers for the current and next block

                blockBuf = new byte[bufSize];
                charBuf = new char[bufSize];

                // Whether there was a Wiki topic carryover from current block to the next one

                char[] charCarryOver = new char[0];

                // The length of the currently loaded data

                long loadedLength = 0;

                StringBuilder sb = new StringBuilder();

                // Starting indexing

                startTime = DateTime.Now;
                elapsed = new TimeSpan(0);
                ReportProgress(0, IndexingProgress.State.Running, Properties.Resources.ProgressIndexing);
                for (long i = 0; i < totalBlocks && !abortIndexing; i++)
                {
                    ReportProgress((int)((double)(i * 100) / (double)totalBlocks), IndexingProgress.State.Running, String.Empty);

                    #region Indexing logic

                    loadedLength = LoadBlock(beginnings[i], ends[i], ref blockBuf);

                    if (charBuf.Length < blockBuf.Length)
                    {
                        charBuf = new char[blockBuf.Length];
                    }

                    int bytesUsed = 0;
                    int charsUsed = 0;
                    bool completed = false;

                    // Convert the text to UTF8

                    utf8.Convert(blockBuf, 0, (int)loadedLength, charBuf, 0, charBuf.Length, i == totalBlocks - 1, out bytesUsed, out charsUsed, out completed);

                    if (!completed)
                    {
                        throw new Exception(Properties.Resources.UTFDecoderError);
                    }

                    // Construct a current string

                    sb.Length = 0;

                    if (charCarryOver.Length > 0)
                    {
                        sb.Append(charCarryOver);
                    }

                    sb.Append(charBuf, 0, charsUsed);

                    int carryOverLength = charCarryOver.Length;

                    int charsMatched = IndexString(sb.ToString(), beginnings[i], ends[i], carryOverLength, i == totalBlocks - 1);

                    // There's a Wiki topic carryover, let's store the characters which need to be carried over

                    if (charsMatched > 0)
                    {
                        charCarryOver = new char[charsMatched];

                        sb.CopyTo(charsUsed + carryOverLength - charsMatched, charCarryOver, 0, charsMatched);
                    }
                    else
                    {
                        charCarryOver = new char[0];
                    }

                    #endregion
                }

                // Wait till all the threads finish
                while (activeThreads != 0)
                {
                    ReportProgress(0, IndexingProgress.State.Running, String.Format(Properties.Resources.WaitingForTokenizers, activeThreads));

                    Thread.Sleep(TimeSpan.FromSeconds(5));
                }
                ReportProgress(0, IndexingProgress.State.Running, Properties.Resources.FlushingDocumentsToDisk);

                Lucene.Net.Store.Directory dir = memoryIndexer.GetDirectory();

                memoryIndexer.Close();

                indexer.AddIndexesNoOptimize(new Lucene.Net.Store.Directory[] { dir });

                memoryIndexer = null;
                ReportProgress(0, IndexingProgress.State.Running, Properties.Resources.OptimizingIndex);

                indexer.Optimize();

                indexExists = true;
            }
            catch (Exception ex)
            {
                ReportProgress(0, IndexingProgress.State.Failure, ex.ToString());

                failed = true;
            }

            // Try to release some memory

            if (indexer != null)
            {
                indexer.Close();

                indexer = null;
            }

            if (failed ||
                abortIndexing)
            {
                Directory.Delete(indexPath, true);

                indexExists = false;
            }
            else
            {
                if (indexExists)
                {
                    FSDirectory idxDir = FSDirectory.Open(new DirectoryInfo(indexPath));
                    searcher = new IndexSearcher(idxDir, true);
                }
            }
            ReportProgress(0, IndexingProgress.State.Finished, String.Empty);
        }