private void  SetUpDirs(Directory dir, Directory aux)
        {
            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            writer.SetMaxBufferedDocs(1000);
            // add 1000 documents in 1 segment
            AddDocs(writer, 1000);
            Assert.AreEqual(1000, writer.DocCount());
            Assert.AreEqual(1, writer.GetSegmentCount());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            writer.SetMaxBufferedDocs(100);
            writer.SetMergeFactor(10);
            // add 30 documents in 3 segments
            for (int i = 0; i < 3; i++)
            {
                AddDocs(writer, 10);
                writer.Close();
                writer = NewWriter(aux, false);
                writer.SetUseCompoundFile(false);                 // use one without a compound file
                writer.SetMaxBufferedDocs(100);
                writer.SetMergeFactor(10);
            }
            Assert.AreEqual(30, writer.DocCount());
            Assert.AreEqual(3, writer.GetSegmentCount());
            writer.Close();
        }
        public virtual void  TestMoreMerges()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux  = new RAMDirectory();
            Directory aux2 = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(aux2, true);

            writer.SetMaxBufferedDocs(100);
            writer.SetMergeFactor(10);
            writer.AddIndexesNoOptimize(new Directory[] { aux });
            Assert.AreEqual(30, writer.DocCount());
            Assert.AreEqual(3, writer.GetSegmentCount());
            writer.Close();

            IndexReader reader = IndexReader.Open(aux);

            for (int i = 0; i < 27; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(3, reader.NumDocs());
            reader.Close();

            reader = IndexReader.Open(aux2);
            for (int i = 0; i < 8; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(22, reader.NumDocs());
            reader.Close();

            writer = NewWriter(dir, false);
            writer.SetMaxBufferedDocs(6);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 });
            Assert.AreEqual(1025, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1025);
        }
        public virtual void  TestMergeAfterCopy()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexReader reader = IndexReader.Open(aux);

            for (int i = 0; i < 20; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(10, reader.NumDocs());
            reader.Close();

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(4);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
            Assert.AreEqual(1020, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1020);
        }
        public virtual void  TestAddSelf()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            // add 100 documents
            AddDocs(writer, 100);
            Assert.AreEqual(100, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            writer.SetMaxBufferedDocs(1000);
            // add 140 documents in separate files
            AddDocs(writer, 40);
            writer.Close();
            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            writer.SetMaxBufferedDocs(1000);
            AddDocs(writer, 100);
            writer.Close();

            writer = NewWriter(dir, false);
            try
            {
                // cannot add self
                writer.AddIndexesNoOptimize(new Directory[] { aux, dir });
                Assert.IsTrue(false);
            }
            catch (System.ArgumentException e)
            {
                Assert.AreEqual(100, writer.DocCount());
            }
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 100);
        }
Beispiel #5
0
		public virtual void  TestDocCount()
		{
			Directory dir = new RAMDirectory();
			
			IndexWriter writer = null;
			IndexReader reader = null;
			int i;
			
			try
			{
				writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
				
				// add 100 documents
				for (i = 0; i < 100; i++)
				{
					AddDoc(writer);
				}
				Assert.AreEqual(100, writer.DocCount());
				writer.Close();
				
				// delete 40 documents
				reader = IndexReader.Open(dir);
				for (i = 0; i < 40; i++)
				{
					reader.Delete(i);
				}
				reader.Close();
				
				// test doc count before segments are merged/index is optimized
				writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
				Assert.AreEqual(100, writer.DocCount());
				writer.Close();
				
				reader = IndexReader.Open(dir);
				Assert.AreEqual(100, reader.MaxDoc());
				Assert.AreEqual(60, reader.NumDocs());
				reader.Close();
				
				// optimize the index and check that the new doc count is correct
				writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
				writer.Optimize();
				Assert.AreEqual(60, writer.DocCount());
				writer.Close();
				
				// check that the index reader gives the same numbers.
				reader = IndexReader.Open(dir);
				Assert.AreEqual(60, reader.MaxDoc());
				Assert.AreEqual(60, reader.NumDocs());
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
			}
		}
Beispiel #6
0
        public virtual void  TestDocCount()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = null;
            IndexReader reader = null;
            int         i;

            try
            {
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);

                // add 100 documents
                for (i = 0; i < 100; i++)
                {
                    AddDoc(writer);
                }
                Assert.AreEqual(100, writer.DocCount());
                writer.Close();

                // delete 40 documents
                reader = IndexReader.Open(dir);
                for (i = 0; i < 40; i++)
                {
                    reader.Delete(i);
                }
                reader.Close();

                // test doc count before segments are merged/index is optimized
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
                Assert.AreEqual(100, writer.DocCount());
                writer.Close();

                reader = IndexReader.Open(dir);
                Assert.AreEqual(100, reader.MaxDoc());
                Assert.AreEqual(60, reader.NumDocs());
                reader.Close();

                // optimize the index and check that the new doc count is correct
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
                writer.Optimize();
                Assert.AreEqual(60, writer.DocCount());
                writer.Close();

                // check that the index reader gives the same numbers.
                reader = IndexReader.Open(dir);
                Assert.AreEqual(60, reader.MaxDoc());
                Assert.AreEqual(60, reader.NumDocs());
                reader.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
            }
        }
Beispiel #7
0
 /// <summary> Returns the number of documents currently in this index.</summary>
 /// <seealso cref="IndexWriter#DocCount()">
 /// </seealso>
 /// <seealso cref="IndexReader#NumDocs()">
 /// </seealso>
 /// <throws>  IllegalStateException if the index is closed </throws>
 public virtual int DocCount()
 {
     lock (directory)
     {
         AssureOpen();
         if (indexWriter != null)
         {
             return(indexWriter.DocCount());
         }
         else
         {
             return(indexReader.NumDocs());
         }
     }
 }
        public virtual void  TestNoMergeAfterCopy()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
            Assert.AreEqual(1060, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1060);
        }
Beispiel #9
0
        public virtual void  TestCrashAfterReopen()
        {
            IndexWriter      writer = InitIndex();
            MockRAMDirectory dir    = (MockRAMDirectory)writer.GetDirectory();

            writer.Close();
            writer = InitIndex(dir);
            Assert.AreEqual(314, writer.DocCount());
            Crash(writer);

            /*
             * System.out.println("\n\nTEST: open reader");
             * String[] l = dir.list();
             * Arrays.sort(l);
             * for(int i=0;i<l.length;i++)
             * System.out.println("file " + i + " = " + l[i] + " " +
             * dir.fileLength(l[i]) + " bytes");
             */

            IndexReader reader = IndexReader.Open(dir);

            Assert.IsTrue(reader.NumDocs() >= 157);
        }
        public virtual void  CreateIndex(System.String dirName, bool doCFS)
        {
            RmDir(dirName);

            dirName = FullDir(dirName);

            Directory   dir    = FSDirectory.Open(new System.IO.FileInfo(dirName));
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(doCFS);
            writer.SetMaxBufferedDocs(10);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
            writer.Close();

            // open fresh writer so we get no prx file in the added segment
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.SetUseCompoundFile(doCFS);
            writer.SetMaxBufferedDocs(10);
            AddNoProxDoc(writer);
            writer.Close();

            // Delete one doc so we get a .del file:
            IndexReader reader     = IndexReader.Open(dir);
            Term        searchTerm = new Term("id", "7");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

            // Set one norm so we get a .s0 file:
            reader.SetNorm(21, "content", (float)1.5);
            reader.Close();
        }
        public virtual void  TestMergeDocCount0()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true);

            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(100);

            for (int i = 0; i < 250; i++)
            {
                AddDoc(writer);
                CheckInvariants(writer);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir);

            reader.DeleteDocuments(new Term("content", "aaa"));
            reader.Close();

            writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false);
            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(5);

            // merge factor is changed, so check invariants after all adds
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
            }
            CheckInvariants(writer);
            Assert.AreEqual(10, writer.DocCount());

            writer.Close();
        }
Beispiel #12
0
        static void Main(string[] args)
        {
            // get settings from azure settings or app.config
            CloudStorageAccount.SetConfigurationSettingPublisher((configName, configSetter) =>
            {
                try
                {
                    configSetter(RoleEnvironment.GetConfigurationSettingValue(configName));
                }
                catch (Exception)
                {
                    // for a console app, reading from App.config
                    configSetter(System.Configuration.ConfigurationManager.AppSettings[configName]);
                }
            });

            // default AzureDirectory stores cache in local temp folder
            AzureDirectory azureDirectory = new AzureDirectory(CloudStorageAccount.FromConfigurationSetting("blobStorage"), "TestCatalog6");
            bool findexExists = IndexReader.IndexExists(azureDirectory);

            IndexWriter indexWriter = null;
            while (indexWriter == null)
            {
                try
                {
                    indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(), !IndexReader.IndexExists(azureDirectory));
                }
                catch (LockObtainFailedException)
                {
                    Console.WriteLine("Lock is taken, Hit 'Y' to clear the lock, or anything else to try again");
                    if (Console.ReadLine().ToLower().Trim() == "y" )
                        azureDirectory.ClearLock("write.lock");
                }
            };
            Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index");
            indexWriter.SetRAMBufferSizeMB(10.0);
            indexWriter.SetUseCompoundFile(false);
            indexWriter.SetMaxMergeDocs(10000);
            indexWriter.SetMergeFactor(100);

            for (int iDoc = 0; iDoc < 10000; iDoc++)
            {
                if (iDoc % 10 == 0)
                    Console.WriteLine(iDoc);
                Document doc = new Document();
                doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                indexWriter.AddDocument(doc);
            }
            Console.WriteLine("Total docs is {0}", indexWriter.DocCount());
            indexWriter.Close();

            IndexSearcher searcher;
            using (new AutoStopWatch("Creating searcher"))
            {
                searcher = new IndexSearcher(azureDirectory);
            }
            SearchForPhrase(searcher, "dog");
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            SearchForPhrase(searcher, _random.Next(32768).ToString());
        }
        public virtual void  TestExactFileNames()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                System.String outputDir = "lucene.backwardscompat0.index";
                RmDir(outputDir);

                try
                {
                    Directory dir = FSDirectory.Open(new System.IO.FileInfo(FullDir(outputDir)));

                    bool autoCommit = 0 == pass;

                    IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                    writer.SetRAMBufferSizeMB(16.0);
                    for (int i = 0; i < 35; i++)
                    {
                        AddDoc(writer, i);
                    }
                    Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
                    writer.Close();

                    // Delete one doc so we get a .del file:
                    IndexReader reader     = IndexReader.Open(dir);
                    Term        searchTerm = new Term("id", "7");
                    int         delCount   = reader.DeleteDocuments(searchTerm);
                    Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

                    // Set one norm so we get a .s0 file:
                    reader.SetNorm(21, "content", (float)1.5);
                    reader.Close();

                    // The numbering of fields can vary depending on which
                    // JRE is in use.  On some JREs we see content bound to
                    // field 0; on others, field 1.  So, here we have to
                    // figure out which field number corresponds to
                    // "content", and then set our expected file names below
                    // accordingly:
                    CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_0.cfs");
                    FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
                    int contentFieldIndex         = -1;
                    for (int i = 0; i < fieldInfos.Size(); i++)
                    {
                        FieldInfo fi = fieldInfos.FieldInfo(i);
                        if (fi.name_ForNUnit.Equals("content"))
                        {
                            contentFieldIndex = i;
                            break;
                        }
                    }
                    cfsReader.Close();
                    Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment");

                    // Now verify file names:
                    System.String[] expected;
                    expected = new System.String[] { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" };

                    System.String[] actual = dir.ListAll();
                    System.Array.Sort(expected);
                    System.Array.Sort(actual);
                    if (!SupportClass.CollectionsHelper.Equals(expected, actual))
                    {
                        Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) + "\n  actual:\n    " + AsString(actual));
                    }
                    dir.Close();
                }
                finally
                {
                    RmDir(outputDir);
                }
            }
        }
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexWithAdds(System.String dirName, bool autoCommit)
        {
            System.String origDirName = dirName;
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName));

            // open writer
            IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);

            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;

            if (Compare(origDirName, "24") < 0)
            {
                expected = 45;
            }
            else
            {
                expected = 46;
            }
            Assert.AreEqual(expected, writer.DocCount(), "wrong doc count");
            writer.Close();

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document   d    = searcher.Doc(hits[0].doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            TestHits(hits, 44, searcher.GetIndexReader());
            searcher.Close();

            // make sure we can do delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 43, searcher.GetIndexReader());
            searcher.Close();

            // optimize
            writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].doc);
            TestHits(hits, 43, searcher.GetIndexReader());
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            searcher.Close();

            dir.Close();
        }
		public virtual void  runTest(Directory directory, bool autoCommit, MergeScheduler merger)
		{
			
			IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true);
			writer.SetMaxBufferedDocs(2);
			if (merger != null)
				writer.SetMergeScheduler(merger);
			
			for (int iter = 0; iter < NUM_ITER; iter++)
			{
				int iterFinal = iter;
				
				writer.SetMergeFactor(1000);
				
				for (int i = 0; i < 200; i++)
				{
					Document d = new Document();
					d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
					d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
					writer.AddDocument(d);
				}
				
				writer.SetMergeFactor(4);
				//writer.setInfoStream(System.out);
				
				int docCount = writer.DocCount();
				
				SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREADS];
				
				for (int i = 0; i < NUM_THREADS; i++)
				{
					int iFinal = i;
					IndexWriter writerFinal = writer;
					threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this);
				}
				
				for (int i = 0; i < NUM_THREADS; i++)
					threads[i].Start();
				
				for (int i = 0; i < NUM_THREADS; i++)
					threads[i].Join();
				
				Assert.IsTrue(!failed);
				
				int expectedDocCount = (int) ((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS)));
				
				// System.out.println("TEST: now index=" + writer.segString());
				
				Assert.AreEqual(expectedDocCount, writer.DocCount());
				
				if (!autoCommit)
				{
					writer.Close();
					writer = new IndexWriter(directory, autoCommit, ANALYZER, false);
					writer.SetMaxBufferedDocs(2);
				}
				
				IndexReader reader = IndexReader.Open(directory);
				Assert.IsTrue(reader.IsOptimized());
				Assert.AreEqual(expectedDocCount, reader.NumDocs());
				reader.Close();
			}
			writer.Close();
		}
		public virtual void  CreateIndex(System.String dirName, bool doCFS)
		{
			
			RmDir(dirName);
			
			dirName = FullDir(dirName);
			
			Directory dir = FSDirectory.GetDirectory(dirName);
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetUseCompoundFile(doCFS);
			
			for (int i = 0; i < 35; i++)
			{
				AddDoc(writer, i);
			}
			Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
			writer.Close();
			
			// Delete one doc so we get a .del file:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "7");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "didn't delete the right number of documents");
			
			// Set one norm so we get a .s0 file:
			reader.SetNorm(21, "content", (float) 1.5);
			reader.Close();
		}
		public virtual void  TestCreateWithReader2()
		{
			System.String tempDir = System.IO.Path.GetTempPath();
			if (tempDir == null)
				throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
			System.IO.FileInfo indexDir = new System.IO.FileInfo(tempDir + "\\" + "lucenetestindexwriter");
			try
			{
				// add one document & close writer
				IndexWriter writer = new IndexWriter(indexDir, new WhitespaceAnalyzer(), true);
				AddDoc(writer);
				writer.Close();
				
				// now open reader:
				IndexReader reader = IndexReader.Open(indexDir);
				Assert.AreEqual(reader.NumDocs(), 1, "should be one document");
				
				// now open index for create:
				writer = new IndexWriter(indexDir, new WhitespaceAnalyzer(), true);
				Assert.AreEqual(writer.DocCount(), 0, "should be zero documents");
				AddDoc(writer);
				writer.Close();
				
				Assert.AreEqual(reader.NumDocs(), 1, "should be one document");
				IndexReader reader2 = IndexReader.Open(indexDir);
				Assert.AreEqual(reader2.NumDocs(), 1, "should be one document");
				reader.Close();
				reader2.Close();
			}
			finally
			{
				RmDir(indexDir);
			}
		}
		/* Open pre-lockless index, add docs, do a delete &
		* setNorm, and search */
		public virtual void  ChangeIndexWithAdds(System.String dirName, bool autoCommit)
		{
			
			dirName = FullDir(dirName);
			
			Directory dir = FSDirectory.GetDirectory(dirName);
			
			// open writer
			IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
			
			// add 10 docs
			for (int i = 0; i < 10; i++)
			{
				AddDoc(writer, 35 + i);
			}
			
			// make sure writer sees right total -- writer seems not to know about deletes in .del?
			Assert.AreEqual(45, writer.DocCount(), "wrong doc count");
			writer.Close();
			
			// make sure searching sees right # hits
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(new Term("content", "aaa")));
			Assert.AreEqual(44, hits.Length(), "wrong number of hits");
			Document d = hits.Doc(0);
			Assert.AreEqual("21", d.Get("id"), "wrong first document");
			searcher.Close();
			
			// make sure we can do delete & setNorm against this
			// pre-lockless segment:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "6");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "wrong delete count");
			reader.SetNorm(22, "content", (float) 2.0);
			reader.Close();
			
			// make sure they "took":
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")));
			Assert.AreEqual(43, hits.Length(), "wrong number of hits");
			d = hits.Doc(0);
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			searcher.Close();
			
			// optimize
			writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")));
			Assert.AreEqual(43, hits.Length(), "wrong number of hits");
			d = hits.Doc(0);
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			searcher.Close();
			
			dir.Close();
		}
Beispiel #19
0
		public virtual void  TestCreateWithReader3()
		{
			System.IO.FileInfo dirName = _TestUtil.GetTempDir("lucenetestindexwriter");
			try
			{
				
				// add one document & close writer
				IndexWriter writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
				AddDoc(writer);
				writer.Close();
				
				// now open reader:
				IndexReader reader = IndexReader.Open(dirName);
				Assert.AreEqual(reader.NumDocs(), 1, "should be one document");
				
				// now open index for create:
				writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
				Assert.AreEqual(writer.DocCount(), 0, "should be zero documents");
				AddDoc(writer);
				writer.Close();
				
				Assert.AreEqual(reader.NumDocs(), 1, "should be one document");
				IndexReader reader2 = IndexReader.Open(dirName);
				Assert.AreEqual(reader2.NumDocs(), 1, "should be one document");
				reader.Close();
				reader2.Close();
			}
			finally
			{
				RmDir(dirName);
			}
		}
		public virtual void  TestExactFileNames()
		{
			
			for (int pass = 0; pass < 2; pass++)
			{
				
				System.String outputDir = "lucene.backwardscompat0.index";
				RmDir(outputDir);
				
				try
				{
					Directory dir = FSDirectory.Open(new System.IO.FileInfo(FullDir(outputDir)));
					
					bool autoCommit = 0 == pass;
					
					IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
					writer.SetRAMBufferSizeMB(16.0);
					for (int i = 0; i < 35; i++)
					{
						AddDoc(writer, i);
					}
					Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
					writer.Close();
					
					// Delete one doc so we get a .del file:
					IndexReader reader = IndexReader.Open(dir);
					Term searchTerm = new Term("id", "7");
					int delCount = reader.DeleteDocuments(searchTerm);
					Assert.AreEqual(1, delCount, "didn't delete the right number of documents");
					
					// Set one norm so we get a .s0 file:
					reader.SetNorm(21, "content", (float) 1.5);
					reader.Close();
					
					// The numbering of fields can vary depending on which
					// JRE is in use.  On some JREs we see content bound to
					// field 0; on others, field 1.  So, here we have to
					// figure out which field number corresponds to
					// "content", and then set our expected file names below
					// accordingly:
					CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
					FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
					int contentFieldIndex = - 1;
					for (int i = 0; i < fieldInfos.Size(); i++)
					{
						FieldInfo fi = fieldInfos.FieldInfo(i);
						if (fi.name_ForNUnit.Equals("content"))
						{
							contentFieldIndex = i;
							break;
						}
					}
					cfsReader.Close();
					Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment");
					
					// Now verify file names:
					System.String[] expected;
					expected = new System.String[]{"_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen"};
					
					System.String[] actual = dir.ListAll();
					System.Array.Sort(expected);
					System.Array.Sort(actual);
					if (!SupportClass.CollectionsHelper.Equals(expected, actual))
					{
						Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) + "\n  actual:\n    " + AsString(actual));
					}
					dir.Close();
				}
				finally
				{
					RmDir(outputDir);
				}
			}
		}
		public virtual void  CreateIndex(System.String dirName, bool doCFS)
		{
			
			RmDir(dirName);
			
			dirName = FullDir(dirName);
			
			Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName));
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetUseCompoundFile(doCFS);
			writer.SetMaxBufferedDocs(10);
			
			for (int i = 0; i < 35; i++)
			{
				AddDoc(writer, i);
			}
			Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
			writer.Close();
			
			// open fresh writer so we get no prx file in the added segment
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.SetUseCompoundFile(doCFS);
			writer.SetMaxBufferedDocs(10);
			AddNoProxDoc(writer);
			writer.Close();
			
			// Delete one doc so we get a .del file:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "7");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "didn't delete the right number of documents");
			
			// Set one norm so we get a .s0 file:
			reader.SetNorm(21, "content", (float) 1.5);
			reader.Close();
		}
		/* Open pre-lockless index, add docs, do a delete &
		* setNorm, and search */
		public virtual void  ChangeIndexWithAdds(System.String dirName, bool autoCommit)
		{
			System.String origDirName = dirName;
			dirName = FullDir(dirName);
			
			Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName));
			
			// open writer
			IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
			
			// add 10 docs
			for (int i = 0; i < 10; i++)
			{
				AddDoc(writer, 35 + i);
			}
			
			// make sure writer sees right total -- writer seems not to know about deletes in .del?
			int expected;
			if (Compare(origDirName, "24") < 0)
			{
				expected = 45;
			}
			else
			{
				expected = 46;
			}
			Assert.AreEqual(expected, writer.DocCount(), "wrong doc count");
			writer.Close();
			
			// make sure searching sees right # hits
			IndexSearcher searcher = new IndexSearcher(dir);
			ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Document d = searcher.Doc(hits[0].doc);
			Assert.AreEqual("21", d.Get("id"), "wrong first document");
			TestHits(hits, 44, searcher.GetIndexReader());
			searcher.Close();
			
			// make sure we can do delete & setNorm against this
			// pre-lockless segment:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "6");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "wrong delete count");
			reader.SetNorm(22, "content", (float) 2.0);
			reader.Close();
			
			// make sure they "took":
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Assert.AreEqual(43, hits.Length, "wrong number of hits");
			d = searcher.Doc(hits[0].doc);
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			TestHits(hits, 43, searcher.GetIndexReader());
			searcher.Close();
			
			// optimize
			writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Assert.AreEqual(43, hits.Length, "wrong number of hits");
			d = searcher.Doc(hits[0].doc);
			TestHits(hits, 43, searcher.GetIndexReader());
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			searcher.Close();
			
			dir.Close();
		}
		public virtual void  TestCreateWithReader3()
		{
			System.String tempDir = SupportClass.AppSettings.Get("tempDir", "");
			if (tempDir == null)
				throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
			
			System.String dirName = tempDir + "/lucenetestindexwriter";
			try
			{
				
				// add one document & close writer
				IndexWriter writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true);
				AddDoc(writer);
				writer.Close();
				
				// now open reader:
				IndexReader reader = IndexReader.Open(dirName);
				Assert.AreEqual(reader.NumDocs(), 1, "should be one document");
				
				// now open index for create:
				writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true);
				Assert.AreEqual(writer.DocCount(), 0, "should be zero documents");
				AddDoc(writer);
				writer.Close();
				
				Assert.AreEqual(reader.NumDocs(), 1, "should be one document");
				IndexReader reader2 = IndexReader.Open(dirName);
				Assert.AreEqual(reader2.NumDocs(), 1, "should be one document");
				reader.Close();
				reader2.Close();
			}
			finally
			{
				RmDir(new System.IO.FileInfo(dirName));
			}
		}
        public virtual void  TestSimpleCase()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // two auxiliary directories
            Directory aux  = new RAMDirectory();
            Directory aux2 = new RAMDirectory();

            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            // add 100 documents
            AddDocs(writer, 100);
            Assert.AreEqual(100, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            // add 40 documents in separate files
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux2, true);
            // add 40 documents in compound files
            AddDocs2(writer, 50);
            Assert.AreEqual(50, writer.DocCount());
            writer.Close();

            // test doc count before segments are merged
            writer = NewWriter(dir, false);
            Assert.AreEqual(100, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 });
            Assert.AreEqual(190, writer.DocCount());
            writer.Close();

            // make sure the old index is correct
            VerifyNumDocs(aux, 40);

            // make sure the new index is correct
            VerifyNumDocs(dir, 190);

            // now add another set in.
            Directory aux3 = new RAMDirectory();

            writer = NewWriter(aux3, true);
            // add 40 documents
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.DocCount());
            writer.Close();

            // test doc count before segments are merged/index is optimized
            writer = NewWriter(dir, false);
            Assert.AreEqual(190, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux3 });
            Assert.AreEqual(230, writer.DocCount());
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now optimize it.
            writer = NewWriter(dir, false);
            writer.Optimize();
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now add a single document
            Directory aux4 = new RAMDirectory();

            writer = NewWriter(aux4, true);
            AddDocs2(writer, 1);
            writer.Close();

            writer = NewWriter(dir, false);
            Assert.AreEqual(230, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux4 });
            Assert.AreEqual(231, writer.DocCount());
            writer.Close();

            VerifyNumDocs(dir, 231);

            VerifyTermDocs(dir, new Term("content", "bbb"), 51);
        }
		public virtual void  TestDocCount()
		{
			Directory dir = new RAMDirectory();
			
			IndexWriter writer = null;
			IndexReader reader = null;
			int i;
			
			IndexWriter.SetDefaultWriteLockTimeout(2000);
			Assert.AreEqual(2000, IndexWriter.GetDefaultWriteLockTimeout());
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer());
			
			IndexWriter.SetDefaultWriteLockTimeout(1000);
			
			// add 100 documents
			for (i = 0; i < 100; i++)
			{
				AddDoc(writer);
			}
			Assert.AreEqual(100, writer.DocCount());
			writer.Close();
			
			// delete 40 documents
			reader = IndexReader.Open(dir);
			for (i = 0; i < 40; i++)
			{
				reader.DeleteDocument(i);
			}
			reader.Close();
			
			// test doc count before segments are merged/index is optimized
			writer = new IndexWriter(dir, new WhitespaceAnalyzer());
			Assert.AreEqual(100, writer.DocCount());
			writer.Close();
			
			reader = IndexReader.Open(dir);
			Assert.AreEqual(100, reader.MaxDoc());
			Assert.AreEqual(60, reader.NumDocs());
			reader.Close();
			
			// optimize the index and check that the new doc count is correct
			writer = new IndexWriter(dir, true, new WhitespaceAnalyzer());
			writer.Optimize();
			Assert.AreEqual(60, writer.DocCount());
			writer.Close();
			
			// check that the index reader gives the same numbers.
			reader = IndexReader.Open(dir);
			Assert.AreEqual(60, reader.MaxDoc());
			Assert.AreEqual(60, reader.NumDocs());
			reader.Close();
			
			// make sure opening a new index for create over
			// this existing one works correctly:
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			Assert.AreEqual(0, writer.DocCount());
			writer.Close();
		}
Beispiel #26
0
        public virtual void  runTest(Directory directory, bool autoCommit, MergeScheduler merger)
        {
            IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true);

            writer.SetMaxBufferedDocs(2);
            if (merger != null)
            {
                writer.SetMergeScheduler(merger);
            }

            for (int iter = 0; iter < NUM_ITER; iter++)
            {
                int iterFinal = iter;

                writer.SetMergeFactor(1000);

                for (int i = 0; i < 200; i++)
                {
                    Document d = new Document();
                    d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
                    writer.AddDocument(d);
                }

                writer.SetMergeFactor(4);
                //writer.setInfoStream(System.out);

                int docCount = writer.DocCount();

                SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    int         iFinal      = i;
                    IndexWriter writerFinal = writer;
                    threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                }

                Assert.IsTrue(!failed);

                int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS)));

                // System.out.println("TEST: now index=" + writer.segString());

                Assert.AreEqual(expectedDocCount, writer.DocCount());

                if (!autoCommit)
                {
                    writer.Close();
                    writer = new IndexWriter(directory, autoCommit, ANALYZER, false);
                    writer.SetMaxBufferedDocs(2);
                }

                IndexReader reader = IndexReader.Open(directory);
                Assert.IsTrue(reader.IsOptimized());
                Assert.AreEqual(expectedDocCount, reader.NumDocs());
                reader.Close();
            }
            writer.Close();
        }
Beispiel #27
0
        static void Main(string[] args)
        {
            // get settings from azure settings or app.config
            CloudStorageAccount.SetConfigurationSettingPublisher((configName, configSetter) =>
            {
                try
                {
                    configSetter(RoleEnvironment.GetConfigurationSettingValue(configName));
                }
                catch (Exception)
                {
                    // for a console app, reading from App.config
                    configSetter(System.Configuration.ConfigurationManager.AppSettings[configName]);
                }
            });

            // default AzureDirectory stores cache in local temp folder
            AzureDirectory azureDirectory = new AzureDirectory(CloudStorageAccount.FromConfigurationSetting("blobStorage"), "TestCatalog");
            bool findexExists = false;
            try
            {
                findexExists = IndexReader.IndexExists(azureDirectory);
                if ((findexExists) && IndexReader.IsLocked(azureDirectory))
                    azureDirectory.ClearLock("write.lock");
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
                return;
            }
            IndexWriter indexWriter = new IndexWriter(azureDirectory, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), !findexExists);
            indexWriter.SetRAMBufferSizeMB(10.0);
            indexWriter.SetUseCompoundFile(false);
            indexWriter.SetMaxMergeDocs(10000);
            indexWriter.SetMergeFactor(100);
            fExit = true;
            for (int iDoc = 0; iDoc < 100; iDoc++)
            {
                if (fExit)
                    break;
                if (iDoc % 10 == 0)
                    Console.WriteLine(iDoc);
                Document doc = new Document();
                doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                indexWriter.AddDocument(doc);
            }
            Console.WriteLine("Total docs is {0}", indexWriter.DocCount());
            indexWriter.Close();

            IndexSearcher searcher;
            using (new AutoStopWatch("Creating searcher"))
            {
                searcher = new IndexSearcher(azureDirectory); // IndexReader.Open(
            }
            SearchForPhrase(searcher, "dog");
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            SearchForPhrase(searcher, _random.Next(32768).ToString());
        }
		public virtual void  TestMergeDocCount0()
		{
			Directory dir = new RAMDirectory();
			
			IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true);
			writer.SetMergePolicy(new LogDocMergePolicy(writer));
			writer.SetMaxBufferedDocs(10);
			writer.SetMergeFactor(100);
			
			for (int i = 0; i < 250; i++)
			{
				AddDoc(writer);
				CheckInvariants(writer);
			}
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			reader.DeleteDocuments(new Term("content", "aaa"));
			reader.Close();
			
			writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false);
			writer.SetMergePolicy(new LogDocMergePolicy(writer));
			writer.SetMaxBufferedDocs(10);
			writer.SetMergeFactor(5);
			
			// merge factor is changed, so check invariants after all adds
			for (int i = 0; i < 10; i++)
			{
				AddDoc(writer);
			}
			CheckInvariants(writer);
			Assert.AreEqual(10, writer.DocCount());
			
			writer.Close();
		}
Beispiel #29
0
        /// <summary>
        /// This method indexes the content that is sent across to it. Each piece of content (or "document")
        /// that is indexed has to have a unique identifier (so that the caller can take action based on the
        /// document id). Therefore, this method accepts key-value pairs in the form of a dictionary. The key
        /// is a ulong which uniquely identifies the string to be indexed. The string itself is the value
        /// within the dictionary for that key. Be aware that stop words (like the, this, at, etc.) are _not_
        /// indexed.
        /// </summary>
        /// <param name="txtIdPairToBeIndexed">A dictionary of key-value pairs that are sent by the caller
        /// to uniquely identify each string that is to be indexed.</param>
        /// <returns>The number of documents indexed.</returns>
        public int Index(Dictionary<ulong, string> txtIdPairToBeIndexed, IndexContentType indexContentType)
        {
            IndexWriter indexWriter = new IndexWriter (_indexDir, new StandardAnalyzer (), true);
            indexWriter.SetUseCompoundFile (false);

            Dictionary<ulong, string>.KeyCollection keys = txtIdPairToBeIndexed.Keys;

            foreach (ulong id in keys) {
                string text = txtIdPairToBeIndexed[id];
                Document document = new Document ();
                Field bodyField = new Field (indexContentType.ToString (), text, Field.Store.YES, Field.Index.TOKENIZED);
                document.Add (bodyField);
                Field idField = new Field (DOC_ID_FIELD_NAME, (id).ToString (), Field.Store.YES, Field.Index.TOKENIZED);
                document.Add (idField);
                indexWriter.AddDocument (document);
            }

            int numIndexed = indexWriter.DocCount ();
            indexWriter.Optimize ();
            indexWriter.Close ();

            return numIndexed;
        }