void Index()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field f = null;

            doc = new Lucene.Net.Documents.Document();
            f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);
            
            doc = new Lucene.Net.Documents.Document();
            f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);
            
            wr.Close();
        }
		protected internal static Document Doc(Field[] fields)
		{
			Document doc = new Document();
			for (int i = 0; i < fields.Length; i++)
			{
				doc.Add(fields[i]);
			}
			return doc;
		}
		public virtual void  TestFlushExceptions()
		{
			
			MockRAMDirectory directory = new MockRAMDirectory();
			FailOnlyOnFlush failure = new FailOnlyOnFlush();
			directory.FailOn(failure);
			
			IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true);
			ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
			writer.SetMergeScheduler(cms);
			writer.SetMaxBufferedDocs(2);
			Document doc = new Document();
			Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
			doc.Add(idField);
			for (int i = 0; i < 10; i++)
			{
				for (int j = 0; j < 20; j++)
				{
					idField.SetValue(System.Convert.ToString(i * 20 + j));
					writer.AddDocument(doc);
				}
				
				writer.AddDocument(doc);
				
				failure.SetDoFail();
				try
				{
					writer.Flush();
					Assert.Fail("failed to hit IOException");
				}
				catch (System.IO.IOException ioe)
				{
					failure.ClearDoFail();
				}
			}
			
			writer.Close();
			IndexReader reader = IndexReader.Open(directory);
			Assert.AreEqual(200, reader.NumDocs());
			reader.Close();
			directory.Close();
		}
        private static void AssignFieldValues(SearchResult result, Field uriField, List<SitecoreItem> items)
        {
            var itemInfo = new SitecoreItem(new ItemUri(uriField.StringValue()));
            foreach (Field field in result.Document.GetFields())
            {
                itemInfo.Fields[field.Name()] = field.StringValue();
            }

            items.Add(itemInfo);
        }
		private static Document MakeDocument(System.String docText)
		{
			Document doc = new Document();
			Field f = new Field("f", docText, Field.Store.NO, Field.Index.ANALYZED);
			f.SetOmitNorms(true);
			doc.Add(f);
			return doc;
		}
		public override void  SetUp()
		{
			base.SetUp();
			RAMDirectory directory = new RAMDirectory();
			PayloadAnalyzer analyzer = new PayloadAnalyzer(this);
			IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetSimilarity(similarity);
			//writer.infoStream = System.out;
			for (int i = 0; i < 1000; i++)
			{
				Document doc = new Document();
				Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
				//noPayloadField.setBoost(0);
				doc.Add(noPayloadField);
				doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
				doc.Add(new Field("multiField", English.IntToEnglish(i) + "  " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
				writer.AddDocument(doc);
			}
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(directory);
			searcher.SetSimilarity(similarity);
		}
		private void  AddNoProxDoc(IndexWriter writer)
		{
			Document doc = new Document();
			Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
			f.SetOmitTf(true);
			doc.Add(f);
			f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
			f.SetOmitTf(true);
			doc.Add(f);
			writer.AddDocument(doc);
		}
Beispiel #8
0
 public virtual void  TestMixedRAM()
 {
     Directory ram = new MockRAMDirectory();
     Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
     IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     writer.SetMaxBufferedDocs(10);
     writer.MergeFactor = 2;
     Document d = new Document();
     
     // this field will have Tf
     Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
     d.Add(f1);
     
     // this field will NOT have Tf
     Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
     d.Add(f2);
     
     for (int i = 0; i < 5; i++)
         writer.AddDocument(d);
     
     f2.OmitTermFreqAndPositions = true;
     
     for (int i = 0; i < 20; i++)
         writer.AddDocument(d);
     
     // force merge
     writer.Optimize();
     
     // flush
     writer.Close();
     
     _TestUtil.CheckIndex(ram);
     
     SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
     FieldInfos fi = reader.FieldInfos();
     Assert.IsTrue(!fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should not be set.");
     Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
     
     reader.Close();
     ram.Close();
 }
		private void  AddDoc(IndexWriter writer, System.String text)
		{
			Document d = new Document();
			Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.TOKENIZED);
			d.Add(f);
			writer.AddDocument(d);
		}
		public virtual void  TestTermVectorCorruption3()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer());
			writer.SetMaxBufferedDocs(2);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			writer.SetMergeScheduler(new SerialMergeScheduler());
			writer.SetMergePolicy(new LogDocMergePolicy());
			
			Document document = new Document();
			
			document = new Document();
			Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
			document.Add(storedField);
			Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
			document.Add(termVectorField);
			for (int i = 0; i < 10; i++)
				writer.AddDocument(document);
			writer.Close();
			
			writer = new IndexWriter(dir, false, new StandardAnalyzer());
			writer.SetMaxBufferedDocs(2);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			writer.SetMergeScheduler(new SerialMergeScheduler());
			writer.SetMergePolicy(new LogDocMergePolicy());
			for (int i = 0; i < 6; i++)
				writer.AddDocument(document);
			
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			for (int i = 0; i < 10; i++)
			{
				reader.GetTermFreqVectors(i);
				reader.Document(i);
			}
			reader.Close();
			dir.Close();
		}
		public virtual void  TestNoWaitClose()
		{
			RAMDirectory directory = new MockRAMDirectory();
			
			Document doc = new Document();
			Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
			doc.Add(idField);
			
			for (int pass = 0; pass < 2; pass++)
			{
				bool autoCommit = pass == 0;
				IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true);
				
				for (int iter = 0; iter < 10; iter++)
				{
					ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
					writer.SetMergeScheduler(cms);
					writer.SetMaxBufferedDocs(2);
					writer.SetMergeFactor(100);
					
					for (int j = 0; j < 201; j++)
					{
						idField.SetValue(System.Convert.ToString(iter * 201 + j));
						writer.AddDocument(doc);
					}
					
					int delID = iter * 201;
					for (int j = 0; j < 20; j++)
					{
						writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID)));
						delID += 5;
					}
					
					// Force a bunch of merge threads to kick off so we
					// stress out aborting them on close:
					writer.SetMergeFactor(3);
					writer.AddDocument(doc);
					writer.Flush();
					
					writer.Close(false);
					
					IndexReader reader = IndexReader.Open(directory);
					Assert.AreEqual((1 + iter) * 182, reader.NumDocs());
					reader.Close();
					
					// Reopen
					writer = new IndexWriter(directory, autoCommit, ANALYZER, false);
				}
				writer.Close();
			}
			
			directory.Close();
		}
		public virtual void  TestEnablingNorms()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			// Enable norms for only 1 doc, pre flush
			for (int j = 0; j < 10; j++)
			{
				Document doc = new Document();
				Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED);
				if (j != 8)
				{
					f.SetOmitNorms(true);
				}
				doc.Add(f);
				writer.AddDocument(doc);
			}
			writer.Close();
			
			Term searchTerm = new Term("field", "aaa");
			
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(10, hits.Length());
			searcher.Close();
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			// Enable norms for only 1 doc, post flush
			for (int j = 0; j < 27; j++)
			{
				Document doc = new Document();
				Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED);
				if (j != 26)
				{
					f.SetOmitNorms(true);
				}
				doc.Add(f);
				writer.AddDocument(doc);
			}
			writer.Close();
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(27, hits.Length());
			searcher.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			reader.Close();
			
			dir.Close();
		}
		public virtual void  TestTermVectorCorruption2()
		{
			Directory dir = new MockRAMDirectory();
			for (int iter = 0; iter < 4; iter++)
			{
				bool autoCommit = 1 == iter / 2;
				IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer());
				writer.SetMaxBufferedDocs(2);
				writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
				writer.SetMergeScheduler(new SerialMergeScheduler());
				writer.SetMergePolicy(new LogDocMergePolicy());
				
				Document document = new Document();
				
				Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
				document.Add(storedField);
				writer.AddDocument(document);
				writer.AddDocument(document);
				
				document = new Document();
				document.Add(storedField);
				Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
				document.Add(termVectorField);
				writer.AddDocument(document);
				writer.Optimize();
				writer.Close();
				
				IndexReader reader = IndexReader.Open(dir);
				Assert.IsTrue(reader.GetTermFreqVectors(0) == null);
				Assert.IsTrue(reader.GetTermFreqVectors(1) == null);
				Assert.IsTrue(reader.GetTermFreqVectors(2) != null);
				reader.Close();
			}
			dir.Close();
		}
        public void TestDeletesNumDocs()
        {
            Directory dir = new MockRAMDirectory();
            IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(),
                                                       IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();
            doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED));
            Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
            doc.Add(id);
            id.SetValue("0");
            w.AddDocument(doc);
            id.SetValue("1");
            w.AddDocument(doc);
            IndexReader r = w.GetReader();
            Assert.AreEqual(2, r.NumDocs());
            r.Close();

            w.DeleteDocuments(new Term("id", "0"));
            r = w.GetReader();
            Assert.AreEqual(1, r.NumDocs());
            r.Close();

            w.DeleteDocuments(new Term("id", "1"));
            r = w.GetReader();
            Assert.AreEqual(0, r.NumDocs());
            r.Close();

            w.Close();
            dir.Close();
        }
Beispiel #15
0
 public virtual void  TestBasic()
 {
     Directory dir = new MockRAMDirectory();
     Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
     IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     writer.MergeFactor = 2;
     writer.SetMaxBufferedDocs(2);
     writer.SetSimilarity(new SimpleSimilarity());
     
     
     System.Text.StringBuilder sb = new System.Text.StringBuilder(265);
     System.String term = "term";
     for (int i = 0; i < 30; i++)
     {
         Document d = new Document();
         sb.Append(term).Append(" ");
         System.String content = sb.ToString();
         Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
         noTf.OmitTermFreqAndPositions = true;
         d.Add(noTf);
         
         Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
         d.Add(tf);
         
         writer.AddDocument(d);
         //System.out.println(d);
     }
     
     writer.Optimize();
     // flush
     writer.Close();
     _TestUtil.CheckIndex(dir);
     
     /*
     * Verify the index
     */
     Searcher searcher = new IndexSearcher(dir, true);
     searcher.Similarity = new SimpleSimilarity();
     
     Term a = new Term("noTf", term);
     Term b = new Term("tf", term);
     Term c = new Term("noTf", "notf");
     Term d2 = new Term("tf", "tf");
     TermQuery q1 = new TermQuery(a);
     TermQuery q2 = new TermQuery(b);
     TermQuery q3 = new TermQuery(c);
     TermQuery q4 = new TermQuery(d2);
     
     
     searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     
     
     
     searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     
     BooleanQuery bq = new BooleanQuery();
     bq.Add(q1, Occur.MUST);
     bq.Add(q4, Occur.MUST);
     
     searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
     Assert.IsTrue(15 == CountingHitCollector.GetCount());
     
     searcher.Close();
     dir.Close();
 }
Beispiel #16
0
 public virtual void  TestNoPrxFile()
 {
     Directory ram = new MockRAMDirectory();
     Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
     IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     writer.SetMaxBufferedDocs(3);
     writer.MergeFactor = 2;
     writer.UseCompoundFile = false;
     Document d = new Document();
     
     Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
     f1.OmitTermFreqAndPositions = true;
     d.Add(f1);
     
     for (int i = 0; i < 30; i++)
         writer.AddDocument(d);
     
     writer.Commit();
     
     AssertNoPrx(ram);
     
     // force merge
     writer.Optimize();
     // flush
     writer.Close();
     
     AssertNoPrx(ram);
     _TestUtil.CheckIndex(ram);
     ram.Close();
 }
Beispiel #17
0
		private void  Create()
		{
			
			// NOTE: put seed in here to make failures
			// deterministic, but do not commit with a seed (to
			// better test):
			dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMaxBufferedDocs(17);
			
			Document doc = new Document();
			Document doc2 = new Document();
			
			Field id = new Field("id", "", Field.Store.YES, Field.Index.NO);
			doc.Add(id);
			doc2.Add(id);
			
			Field contents = new Field("contents", "", Field.Store.NO, Field.Index.ANALYZED);
			doc.Add(contents);
			doc2.Add(contents);
			
			Field byteField = new Field("byte", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(byteField);
			doc2.Add(byteField);
			
			Field shortField = new Field("short", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(shortField);
			doc2.Add(shortField);
			
			Field intField = new Field("int", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(intField);
			doc2.Add(intField);
			
			Field longField = new Field("long", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(longField);
			doc2.Add(longField);
			
			Field floatField = new Field("float", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(floatField);
			doc2.Add(floatField);
			
			Field doubleField = new Field("double", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(doubleField);
			doc2.Add(doubleField);
			
			// we use two diff string fields so our FieldCache usage
			// is less suspicious to cache inspection
			Field stringField = new Field("string", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(stringField);
			Field stringFieldIdx = new Field("stringIdx", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(stringFieldIdx);
			// doc2 doesn't have stringField or stringFieldIdx, so we get nulls
			
			for (int i = 0; i < NUM_DOCS; i++)
			{
				id.SetValue("" + i);
				if (i % 1000 == 0)
				{
					contents.SetValue("a b c z");
				}
				else if (i % 100 == 0)
				{
					contents.SetValue("a b c y");
				}
				else if (i % 10 == 0)
				{
					contents.SetValue("a b c x");
				}
				else
				{
					contents.SetValue("a b c");
				}
				byteField.SetValue("" + NextInt((sbyte) System.SByte.MinValue, (sbyte) System.SByte.MaxValue));
				if (NextInt(10) == 3)
				{
					shortField.SetValue("" + System.Int16.MinValue);
				}
				else if (NextInt(10) == 7)
				{
					shortField.SetValue("" + System.Int16.MaxValue);
				}
				else
				{
					shortField.SetValue("" + NextInt(System.Int16.MinValue, System.Int16.MaxValue));
				}
				
				if (NextInt(10) == 3)
				{
					intField.SetValue("" + System.Int32.MinValue);
				}
				else if (NextInt(10) == 7)
				{
					intField.SetValue("" + System.Int32.MaxValue);
				}
				else
				{
					intField.SetValue("" + this.r.Next());
				}
				
				if (NextInt(10) == 3)
				{
					longField.SetValue("" + System.Int64.MinValue);
				}
				else if (NextInt(10) == 7)
				{
					longField.SetValue("" + System.Int64.MaxValue);
				}
				else
				{
					longField.SetValue("" + this.r.Next(System.Int32.MaxValue));
				}
				floatField.SetValue("" + (float) this.r.NextDouble());
				doubleField.SetValue("" + this.r.NextDouble());
				if (i % 197 == 0)
				{
					writer.AddDocument(doc2);
				}
				else
				{
					System.String r = RandomString(NextInt(20));
					stringField.SetValue(r);
					stringFieldIdx.SetValue(r);
					writer.AddDocument(doc);
				}
			}
			writer.Close();
			searcherMultiSegment = new IndexSearcher(dir);
			searcherMultiSegment.SetDefaultFieldSortScoring(true, true);
			
			dir2 = new MockRAMDirectory(dir);
			writer = new IndexWriter(dir2, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.Optimize();
			writer.Close();
			searcherSingleSegment = new IndexSearcher(dir2);
			searcherSingleSegment.SetDefaultFieldSortScoring(true, true);
			dir3 = new MockRAMDirectory(dir);
			writer = new IndexWriter(dir3, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.Optimize(3);
			writer.Close();
			searcherFewSegment = new IndexSearcher(dir3);
			searcherFewSegment.SetDefaultFieldSortScoring(true, true);
		}
		private void  AddDoc(System.String text, IndexWriter iw, float boost)
		{
			Document doc = new Document();
			Field f = new Field("key", text, Field.Store.YES, Field.Index.ANALYZED);
			f.SetBoost(boost);
			doc.Add(f);
			iw.AddDocument(doc);
		}
		public virtual void  TestDeleteMerging()
		{
			
			RAMDirectory directory = new MockRAMDirectory();
			
			IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true);
			ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
			writer.SetMergeScheduler(cms);
			
			LogDocMergePolicy mp = new LogDocMergePolicy(writer);
			writer.SetMergePolicy(mp);
			
			// Force degenerate merging so we can get a mix of
			// merging of segments with and without deletes at the
			// start:
			mp.SetMinMergeDocs(1000);
			
			Document doc = new Document();
			Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
			doc.Add(idField);
			for (int i = 0; i < 10; i++)
			{
				for (int j = 0; j < 100; j++)
				{
					idField.SetValue(System.Convert.ToString(i * 100 + j));
					writer.AddDocument(doc);
				}
				
				int delID = i;
				while (delID < 100 * (1 + i))
				{
					writer.DeleteDocuments(new Term("id", "" + delID));
					delID += 10;
				}
				
				writer.Flush();
			}
			
			writer.Close();
			IndexReader reader = IndexReader.Open(directory);
			// Verify that we did not lose any deletes...
			Assert.AreEqual(450, reader.NumDocs());
			reader.Close();
			directory.Close();
		}
		public virtual void  TestSubclassConcurrentMergeScheduler()
		{
			MockRAMDirectory dir = new MockRAMDirectory();
			dir.FailOn(new FailOnlyOnMerge());
			
			Document doc = new Document();
			Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
			doc.Add(idField);
			
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			MyMergeScheduler ms = new MyMergeScheduler(this);
			writer.SetMergeScheduler(ms);
			writer.SetMaxBufferedDocs(2);
			writer.SetRAMBufferSizeMB(Lucene.Net.Index.IndexWriter.DISABLE_AUTO_FLUSH);
			for (int i = 0; i < 20; i++)
				writer.AddDocument(doc);
			
			ms.Sync();
			writer.Close();
			
			Assert.IsTrue(mergeThreadCreated);
			Assert.IsTrue(mergeCalled);
			Assert.IsTrue(excCalled);
			dir.Close();
			Assert.IsTrue(ConcurrentMergeScheduler.AnyUnhandledExceptions());
		}
		public virtual void  TestOptimizeOverMerge()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer());
			writer.SetMaxBufferedDocs(2);
			writer.SetMergeFactor(100);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			
			Document document = new Document();
			
			document = new Document();
			Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
			document.Add(storedField);
			Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
			document.Add(termVectorField);
			for (int i = 0; i < 170; i++)
				writer.AddDocument(document);
			
			writer.Close();
			MyIndexWriter myWriter = new MyIndexWriter(this, dir);
			myWriter.Optimize();
			Assert.AreEqual(10, myWriter.mergeCount);
		}
Beispiel #22
0
 // Tests whether the DocumentWriter correctly enable the
 // omitTermFreqAndPositions bit in the FieldInfo
 public virtual void  TestOmitTermFreqAndPositions()
 {
     Directory ram = new MockRAMDirectory();
     Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
     IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     Document d = new Document();
     
     // this field will have Tf
     Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
     d.Add(f1);
     
     // this field will NOT have Tf
     Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
     f2.OmitTermFreqAndPositions = true;
     d.Add(f2);
     
     writer.AddDocument(d);
     writer.Optimize();
     // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
     // keep things constant
     d = new Document();
     
     // Reverese
     f1.OmitTermFreqAndPositions = true;
     d.Add(f1);
     
     f2.OmitTermFreqAndPositions = false;
     d.Add(f2);
     
     writer.AddDocument(d);
     // force merge
     writer.Optimize();
     // flush
     writer.Close();
     _TestUtil.CheckIndex(ram);
     
     SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
     FieldInfos fi = reader.FieldInfos();
     Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
     Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
     
     reader.Close();
     ram.Close();
 }
Beispiel #23
0
		// create the next document
		private Document NewDoc()
		{
			Document d = new Document();
			float boost = NextNorm();
			for (int i = 0; i < 10; i++)
			{
				Field f = new Field("f" + i, "v" + i, Field.Store.NO, Field.Index.NOT_ANALYZED);
				f.SetBoost(boost);
				d.Add(f);
			}
			return d;
		}
Beispiel #24
0
		static DocHelper()
		{
			textField1 = Field.Text(TEXT_FIELD_1_KEY, FIELD_1_TEXT, false);
			textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true);
			keyField = Field.Keyword(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
			unIndField = Field.UnIndexed(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
			unStoredField1 = Field.UnStored(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, false);
			unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true);
			{
				
				nameValues = new System.Collections.Hashtable();
				nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT;
				nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT;
				nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT;
				nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT;
				nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT;
				nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT;
			}
		}
		public virtual void  TestMultiSearcher()
		{
			//setup index 1
			RAMDirectory ramDir1 = new RAMDirectory();
			IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true);
			Document d = new Document();
			Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED);
			d.Add(f);
			writer1.AddDocument(d);
			writer1.Optimize();
			writer1.Close();
			IndexReader reader1 = IndexReader.Open(ramDir1);
			
			//setup index 2
			RAMDirectory ramDir2 = new RAMDirectory();
			IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true);
			d = new Document();
			f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED);
			d.Add(f);
			writer2.AddDocument(d);
			writer2.Optimize();
			writer2.Close();
			IndexReader reader2 = IndexReader.Open(ramDir2);
			
			
			
			IndexSearcher[] searchers = new IndexSearcher[2];
			searchers[0] = new IndexSearcher(ramDir1);
			searchers[1] = new IndexSearcher(ramDir2);
			MultiSearcher multiSearcher = new MultiSearcher(searchers);
			QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
            parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
			query = parser.Parse("multi*");
			System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
			//at this point the multisearcher calls combine(query[])
			hits = multiSearcher.Search(query);
			
			//query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer());
			Query[] expandedQueries = new Query[2];
			expandedQueries[0] = query.Rewrite(reader1);
			expandedQueries[1] = query.Rewrite(reader2);
			query = query.Combine(expandedQueries);
			
			
			//create an instance of the highlighter with the tags used to surround highlighted text
			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
			
			for (int i = 0; i < hits.Length(); i++)
			{
				System.String text = hits.Doc(i).Get(FIELD_NAME);
				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
				System.String highlightedText = highlighter.GetBestFragment(tokenStream, text);
				System.Console.Out.WriteLine(highlightedText);
			}
			Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found");
		}
Beispiel #26
0
		/// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)">
		/// </seealso>
		/// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
		/// </deprecated>
		public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
		{
			bool storedTermVector;
			bool storePositionWithTermVector;
			bool storeOffsetWithTermVector;
			
			if (tvSpec == Field.TermVector.NO)
			{
				storedTermVector = false;
				storePositionWithTermVector = false;
				storeOffsetWithTermVector = false;
			}
			else if (tvSpec == Field.TermVector.YES)
			{
				storedTermVector = true;
				storePositionWithTermVector = false;
				storeOffsetWithTermVector = false;
			}
			else if (tvSpec == Field.TermVector.WITH_POSITIONS)
			{
				storedTermVector = true;
				storePositionWithTermVector = true;
				storeOffsetWithTermVector = false;
			}
			else if (tvSpec == Field.TermVector.WITH_OFFSETS)
			{                                                                           
				storedTermVector = true;
				storePositionWithTermVector = false;
				storeOffsetWithTermVector = true;
			}
			else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
			{
				storedTermVector = true;
				storePositionWithTermVector = true;
				storeOffsetWithTermVector = true;
			}
			else
			{
				throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
			}
			
			// maintain a unique set of field names
			System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
			for (int i = 0; i < fieldInfos.Size(); i++)
			{
				FieldInfo fi = fieldInfos.FieldInfo(i);
				if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
				{
					fieldSet.Add(fi.name, fi.name);
				}
			}
			return fieldSet;
		}
        /*internal*/
        public Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();
            fieldsStream.Seek(position);

            Document doc = new Document();
            int numFields = fieldsStream.ReadVInt();
            for (int i = 0; i < numFields; i++)
            {
                int fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

                if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
                {
                    byte[] b = new byte[fieldsStream.ReadVInt()];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    if (compressed)
                        doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
                    else
                        doc.Add(new Field(fi.name, b, Field.Store.YES));
                }
                else
                {
                    Field.Index index;
                    Field.Store store = Field.Store.YES;

                    if (fi.isIndexed && tokenize)
                        index = Field.Index.TOKENIZED;
                    else if (fi.isIndexed && !tokenize)
                        index = Field.Index.UN_TOKENIZED;
                    else
                        index = Field.Index.NO;

                    Field.TermVector termVector = null;
                    if (fi.storeTermVector)
                    {
                        if (fi.storeOffsetWithTermVector)
                        {
                            if (fi.storePositionWithTermVector)
                            {
                                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
                            }
                            else
                            {
                                termVector = Field.TermVector.WITH_OFFSETS;
                            }
                        }
                        else if (fi.storePositionWithTermVector)
                        {
                            termVector = Field.TermVector.WITH_POSITIONS;
                        }
                        else
                        {
                            termVector = Field.TermVector.YES;
                        }
                    }
                    else
                    {
                        termVector = Field.TermVector.NO;
                    }

                    if (compressed)
                    {
                        store = Field.Store.COMPRESS;
                        byte[] b = new byte[fieldsStream.ReadVInt()];
                        fieldsStream.ReadBytes(b, 0, b.Length);
                        Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                    else
                    {
                        Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                }
            }

            return doc;
        }
Beispiel #28
0
		private static Document CreateDocument(System.String text, long time)
		{
			Document document = new Document();
			
			// Add the text field.
			Field textField = new Field(TEXT_FIELD, text, Field.Store.YES, Field.Index.ANALYZED);
			document.Add(textField);
			
			// Add the date/time field.
			System.String dateTimeString = DateTools.TimeToString(time, DateTools.Resolution.SECOND);
			Field dateTimeField = new Field(DATE_TIME_FIELD, dateTimeString, Field.Store.YES, Field.Index.NOT_ANALYZED);
			document.Add(dateTimeField);
			
			return document;
		}
		public virtual void  TestLUCENE_1590()
		{
			Document doc = new Document();
			// f1 has no norms
			doc.Add(new Field("f1", "v1", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
			doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NO));
			// f2 has no TF
			Field f = new Field("f2", "v1", Field.Store.NO, Field.Index.ANALYZED);
			f.SetOmitTermFreqAndPositions(true);
			doc.Add(f);
			doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NO));
			
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(doc);
			writer.Optimize(); // be sure to have a single segment
			writer.Close();
			
			_TestUtil.CheckIndex(dir);
			
			SegmentReader reader = SegmentReader.GetOnlySegmentReader(dir);
			FieldInfos fi = reader.FieldInfos();
			// f1
			Assert.IsFalse(reader.HasNorms("f1"), "f1 should have no norms");
			Assert.IsFalse(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should not be set for f1");
			// f2
			Assert.IsTrue(reader.HasNorms("f2"), "f2 should have norms");
			Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should be set for f2");
		}
		public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
		{
			return in_Renamed.GetIndexedFieldNames(tvSpec);
		}