Пример #1
0
        public virtual void TestEncodeDecode()
        {
            int iterations = RandomInts.NextIntBetween(Random(), 1, 1000);
            float AcceptableOverheadRatio = (float)Random().NextDouble();
            int[] values = new int[(iterations - 1) * Lucene41PostingsFormat.BLOCK_SIZE + ForUtil.MAX_DATA_SIZE];
            for (int i = 0; i < iterations; ++i)
            {
                int bpv = Random().Next(32);
                if (bpv == 0)
                {
                    int value = RandomInts.NextIntBetween(Random(), 0, int.MaxValue);
                    for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j)
                    {
                        values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = value;
                    }
                }
                else
                {
                    for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j)
                    {
                        values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = RandomInts.NextIntBetween(Random(), 0, (int)PackedInts.MaxValue(bpv));
                    }
                }
            }

            Directory d = new RAMDirectory();
            long endPointer;

            {
                // encode
                IndexOutput @out = d.CreateOutput("test.bin", IOContext.DEFAULT);
                ForUtil forUtil = new ForUtil(AcceptableOverheadRatio, @out);

                for (int i = 0; i < iterations; ++i)
                {
                    forUtil.WriteBlock(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, values.Length), new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], @out);
                }
                endPointer = @out.FilePointer;
                @out.Dispose();
            }

            {
                // decode
                IndexInput @in = d.OpenInput("test.bin", IOContext.READONCE);
                ForUtil forUtil = new ForUtil(@in);
                for (int i = 0; i < iterations; ++i)
                {
                    if (Random().NextBoolean())
                    {
                        forUtil.SkipBlock(@in);
                        continue;
                    }
                    int[] restored = new int[Lucene41.ForUtil.MAX_DATA_SIZE];
                    forUtil.ReadBlock(@in, new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], restored);
                    Assert.AreEqual(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, (i + 1) * Lucene41PostingsFormat.BLOCK_SIZE), Arrays.CopyOf(restored, Lucene41PostingsFormat.BLOCK_SIZE));
                }
                Assert.AreEqual(endPointer, @in.FilePointer);
                @in.Dispose();
            }
        }
Пример #2
0
		public virtual void  TestBasic()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Document doc = new Document();
			doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			writer.Close();
			
			TermQuery termQuery = new TermQuery(new Term("field", "value"));
			
			// should not throw exception with primitive query
			QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery);
			
			IndexSearcher searcher = new IndexSearcher(dir, true);
			TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10);
			Assert.AreEqual(1, hits.totalHits);
			
			// should not throw exception with complex primitive query
			BooleanQuery booleanQuery = new BooleanQuery();
			booleanQuery.Add(termQuery, Occur.MUST);
			booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT);
			qwf = new QueryWrapperFilter(termQuery);
			
			hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10);
			Assert.AreEqual(1, hits.totalHits);
			
			// should not throw exception with non primitive Query (doesn't implement
			// Query#createWeight)
			qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu")));
			
			hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10);
			Assert.AreEqual(1, hits.totalHits);
		}
Пример #3
0
		private Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = NewRandom();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Document doc = new Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
Пример #4
0
		public virtual void  SetUp()
		{
			directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			
			Document doc = new Document();
			doc.Add(Field.Text("Field", "one two three four five"));
			doc.Add(Field.Text("sorter", "b"));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(Field.Text("Field", "one two three four"));
			doc.Add(Field.Text("sorter", "d"));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(Field.Text("Field", "one two three y"));
			doc.Add(Field.Text("sorter", "a"));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(Field.Text("Field", "one two x"));
			doc.Add(Field.Text("sorter", "c"));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(directory);
			query = new TermQuery(new Term("Field", "three"));
			filter = new AnonymousClassFilter(this);
		}
		public virtual void  TestCachingWorks()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			
			MockFilter filter = new MockFilter();
			CachingWrapperFilter cacher = new CachingWrapperFilter(filter);
			
			// first time, nested filter is called
			cacher.GetDocIdSet(reader);
			Assert.IsTrue(filter.WasCalled(), "first time");
			
			// make sure no exception if cache is holding the wrong bitset
			cacher.Bits(reader);
			cacher.GetDocIdSet(reader);
			
			// second time, nested filter should not be called
			filter.Clear();
			cacher.GetDocIdSet(reader);
			Assert.IsFalse(filter.WasCalled(), "second time");
			
			reader.Close();
		}
Пример #6
0
 /*public TestCustomSearcherSort(System.String name):base(name)
 {
 }*/
 
 /*[STAThread]
 public static void  Main(System.String[] argv)
 {
     // TestRunner.run(suite()); // {{Aroush-2.9}} how is this done in NUnit?
 }*/
 
 /*public static Test suite()
 {
     return new TestSuite(typeof(TestCustomSearcherSort));
 }*/
 
 
 // create an index for testing
 private Directory GetIndex()
 {
     RAMDirectory indexStore = new RAMDirectory();
     IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
     RandomGen random = new RandomGen(this, NewRandom());
     for (int i = 0; i < INDEX_SIZE; ++i)
     {
         // don't decrease; if to low the problem doesn't show up
         Document doc = new Document();
         if ((i % 5) != 0)
         {
             // some documents must not have an entry in the first sort field
             doc.Add(new Field("publicationDate_", random.GetLuceneDate(), Field.Store.YES, Field.Index.NOT_ANALYZED));
         }
         if ((i % 7) == 0)
         {
             // some documents to match the query (see below) 
             doc.Add(new Field("content", "test", Field.Store.YES, Field.Index.ANALYZED));
         }
         // every document has a defined 'mandant' field
         doc.Add(new Field("mandant", System.Convert.ToString(i % 3), Field.Store.YES, Field.Index.NOT_ANALYZED));
         writer.AddDocument(doc);
     }
     writer.Optimize();
     writer.Close();
     return indexStore;
 }
Пример #7
0
		public override void  SetUp()
		{
			base.SetUp();
			Document doc;
			
			RAMDirectory rd1 = new RAMDirectory();
			IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			doc = new Document();
			doc.Add(new Field("field1", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("field2", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("field4", "", Field.Store.NO, Field.Index.ANALYZED));
			iw1.AddDocument(doc);
			
			iw1.Close();
			RAMDirectory rd2 = new RAMDirectory();
			IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			doc = new Document();
			doc.Add(new Field("field0", "", Field.Store.NO, Field.Index.ANALYZED));
			doc.Add(new Field("field1", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("field3", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED));
			iw2.AddDocument(doc);
			
			iw2.Close();
			
			this.ir1 = IndexReader.Open(rd1, true);
		    this.ir2 = IndexReader.Open(rd2, true);
		}
		public virtual void  TestQuery()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
			AddDoc("one", iw);
			AddDoc("two", iw);
			AddDoc("three four", iw);
			iw.Close();
			
			IndexSearcher is_Renamed = new IndexSearcher(dir);
			Hits hits = is_Renamed.Search(new MatchAllDocsQuery());
			Assert.AreEqual(3, hits.Length());
			
			// some artificial queries to trigger the use of skipTo():
			
			BooleanQuery bq = new BooleanQuery();
			bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
			bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
			hits = is_Renamed.Search(bq);
			Assert.AreEqual(3, hits.Length());
			
			bq = new BooleanQuery();
			bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
			bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST);
			hits = is_Renamed.Search(bq);
			Assert.AreEqual(1, hits.Length());
			
			// delete a document:
			is_Renamed.GetIndexReader().DeleteDocument(0);
			hits = is_Renamed.Search(new MatchAllDocsQuery());
			Assert.AreEqual(2, hits.Length());
			
			is_Renamed.Close();
		}
        public virtual void TestAllSegmentsSmall()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(1, sis.Size());
        }
Пример #10
0
		public virtual void  TestDemo_Renamed_Method()
		{
			
			Analyzer analyzer = new StandardAnalyzer();
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead (note that the 
			// parameter true will overwrite the index in that directory
			// if one exists):
			//Directory directory = FSDirectory.getDirectory("/tmp/testindex", true);
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
			iwriter.SetMaxFieldLength(25000);
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory);
			// Parse a simple query that searches for "text":
			Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			Hits hits = isearcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			// Iterate through the results:
			for (int i = 0; i < hits.Length(); i++)
			{
				Document hitDoc = hits.Doc(i);
				Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname"));
			}
			isearcher.Close();
			directory.Close();
		}
Пример #11
0
		public override void  SetUp()
		{
			base.SetUp();
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			long theLong = System.Int64.MaxValue;
			double theDouble = System.Double.MaxValue;
			sbyte theByte = (sbyte) System.SByte.MaxValue;
			short theShort = System.Int16.MaxValue;
			int theInt = System.Int32.MaxValue;
			float theFloat = System.Single.MaxValue;
			for (int i = 0; i < NUM_DOCS; i++)
			{
				Document doc = new Document();
				doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED));
				doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED));
				doc.Add(new Field("theByte", System.Convert.ToString((sbyte) theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED));
				doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED));
				doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED));
				doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED));
				writer.AddDocument(doc);
			}
			writer.Close();
			reader = IndexReader.Open(directory);
		}
Пример #12
0
		public virtual void  TestMethod()
		{
			RAMDirectory directory = new RAMDirectory();
			
			System.String[] values = new System.String[]{"1", "2", "3", "4"};
			
			try
			{
				IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
				for (int i = 0; i < values.Length; i++)
				{
					Document doc = new Document();
					doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
					writer.AddDocument(doc);
				}
				writer.Close();
				
				BooleanQuery booleanQuery1 = new BooleanQuery();
				booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), Occur.SHOULD);
				booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), Occur.SHOULD);
				
				BooleanQuery query = new BooleanQuery();
				query.Add(booleanQuery1, Occur.MUST);
				query.Add(new TermQuery(new Term(FIELD, "9")), Occur.MUST_NOT);
				
				IndexSearcher indexSearcher = new IndexSearcher(directory, true);
				ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs;
				Assert.AreEqual(2, hits.Length, "Number of matched documents");
			}
			catch (System.IO.IOException e)
			{
				Assert.Fail(e.Message);
			}
		}
Пример #13
0
		private static Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = new System.Random((System.Int32) (BASE_SEED + 42));
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
Пример #14
0
		public override void SetUp()
		{
			directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			
			Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED));
			doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.TOKENIZED));
			doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.TOKENIZED));
			doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.TOKENIZED));
			doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(directory);
			query = new TermQuery(new Term("field", "three"));
			filter = new AnonymousClassFilter();
		}
Пример #15
0
		public virtual void  TestDemo_Renamed()
		{
			
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead:
			//Directory directory = FSDirectory.open("/tmp/testindex");
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
			// Parse a simple query that searches for "text":
			QueryParser parser = new QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			// Iterate through the results:
			for (int i = 0; i < hits.Length; i++)
			{
				Document hitDoc = isearcher.Doc(hits[i].doc);
				Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed.");
			}
			isearcher.Close();
			directory.Close();
		}
Пример #16
0
		public virtual void  TestSimpleSkip()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Term term = new Term("test", "a");
			for (int i = 0; i < 5000; i++)
			{
				Document d1 = new Document();
				d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d1);
			}
			writer.Flush();
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = SegmentReader.GetOnlySegmentReader(dir);
			SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions();
			tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);
			
			for (int i = 0; i < 2; i++)
			{
				counter = 0;
				tp.Seek(term);
				
				CheckSkipTo(tp, 14, 185); // no skips
				CheckSkipTo(tp, 17, 190); // one skip on level 0
				CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
				
				// this test would fail if we had only one skip level,
				// because than more bytes would be read from the freqStream
				CheckSkipTo(tp, 4800, 250); // one skip on level 2
			}
		}
        public virtual void TestDateCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            const long @base = 13; // prime
            long day = 1000L * 60 * 60 * 24;

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            for (int i = 0; i < 50; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new longs costed less than if they had only been packed
            Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8);
        }
Пример #18
0
		public virtual void  TestFilterWorks()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			for (int i = 0; i < 500; i++)
			{
				Document document = new Document();
				document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(document);
			}
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir, true);
			
			SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim()));
			SpanQueryFilter filter = new SpanQueryFilter(query);
			SpanFilterResult result = filter.BitSpans(reader);
			DocIdSet docIdSet = result.DocIdSet;
			Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be");
			AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
			var spans = result.Positions;
			Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
			int size = GetDocIdSetSize(docIdSet);
			Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size);
			for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); )
			{
				SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current;
				Assert.IsTrue(info != null, "info is null and it shouldn't be");
				//The doc should indicate the bit is on
				AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc);
				//There should be two positions in each
				Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2);
			}
			reader.Close();
		}
Пример #19
0
		public virtual void  TestMethod()
		{
			RAMDirectory directory = new RAMDirectory();
			
			System.String[] values = new System.String[]{"1", "2", "3", "4"};
			
			try
			{
				IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
				for (int i = 0; i < values.Length; i++)
				{
					Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
					doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.UN_TOKENIZED));
					writer.AddDocument(doc);
				}
				writer.Close();
				
				BooleanQuery booleanQuery1 = new BooleanQuery();
				booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD);
				booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD);
				
				BooleanQuery query = new BooleanQuery();
				query.Add(booleanQuery1, BooleanClause.Occur.MUST);
				query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT);
				
				IndexSearcher indexSearcher = new IndexSearcher(directory);
				Hits hits = indexSearcher.Search(query);
				Assert.AreEqual(2, hits.Length(), "Number of matched documents");
			}
			catch (System.IO.IOException e)
			{
				Assert.Fail(e.Message);
			}
		}
Пример #20
0
		public override void  SetUp()
		{
			base.SetUp();
			directory = new RAMDirectory();
			Analyzer analyzer = new AnonymousClassAnalyzer(this);
			IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			Document doc = new Document();
			doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED));
			IFieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED);
			doc.Add(repeatedField);
			doc.Add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();

		    searcher = new IndexSearcher(directory, true);
			query = new PhraseQuery();
		}
Пример #21
0
		public override void  SetUp()
		{
			base.SetUp();
			directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			Document doc = new Document();
			doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(directory);
			query = new TermQuery(new Term("field", "three"));
			filter = NewStaticFilterB();
		}
		public override void SetUp()
		{
			base.SetUp();
			base.SetUp();
			Lucene.Net.Documents.Document doc;
			
			RAMDirectory rd1 = new RAMDirectory();
			IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field1", "the quick brown fox jumps", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
			doc.Add(new Field("field2", "the quick brown fox jumps", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
			doc.Add(new Field("field4", "", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.TOKENIZED));
			iw1.AddDocument(doc);
			
			iw1.Close();
			RAMDirectory rd2 = new RAMDirectory();
			IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field0", "", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.TOKENIZED));
			doc.Add(new Field("field1", "the fox jumps over the lazy dog", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
			doc.Add(new Field("field3", "the fox jumps over the lazy dog", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
			iw2.AddDocument(doc);
			
			iw2.Close();
			
			this.ir1 = IndexReader.Open(rd1);
			this.ir2 = IndexReader.Open(rd2);
		}
Пример #23
0
		public virtual void  CreateDummySearcher()
		{
			// Create a dummy index with nothing in it.
			// This could possibly fail if Lucene starts checking for docid ranges...
			RAMDirectory rd = new RAMDirectory();
			IndexWriter iw = new IndexWriter(rd, new WhitespaceAnalyzer(), true);
			iw.Close();
			s = new IndexSearcher(rd);
		}
Пример #24
0
		public virtual void  TestGetFieldNames()
		{
			RAMDirectory d = new RAMDirectory();
			// set up writer
			IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true);
			AddDocumentWithFields(writer);
			writer.Close();
			// set up reader
			IndexReader reader = IndexReader.Open(d);
            System.Collections.Hashtable fieldNames = (System.Collections.Hashtable) reader.GetFieldNames();
			Assert.IsTrue(fieldNames.Contains("keyword"));
			Assert.IsTrue(fieldNames.Contains("text"));
			Assert.IsTrue(fieldNames.Contains("unindexed"));
			Assert.IsTrue(fieldNames.Contains("unstored"));
			// add more documents
			writer = new IndexWriter(d, new StandardAnalyzer(), false);
			// want to get some more segments here
			for (int i = 0; i < 5 * writer.mergeFactor; i++)
			{
				AddDocumentWithFields(writer);
			}
			// new fields are in some different segments (we hope)
			for (int i = 0; i < 5 * writer.mergeFactor; i++)
			{
				AddDocumentWithDifferentFields(writer);
			}
			writer.Close();
			// verify fields again
			reader = IndexReader.Open(d);
			fieldNames = (System.Collections.Hashtable) reader.GetFieldNames();
            Assert.AreEqual(9, fieldNames.Count); // the following fields + an empty one (bug?!)
			Assert.IsTrue(fieldNames.Contains("keyword"));
			Assert.IsTrue(fieldNames.Contains("text"));
			Assert.IsTrue(fieldNames.Contains("unindexed"));
			Assert.IsTrue(fieldNames.Contains("unstored"));
			Assert.IsTrue(fieldNames.Contains("keyword2"));
			Assert.IsTrue(fieldNames.Contains("text2"));
			Assert.IsTrue(fieldNames.Contains("unindexed2"));
			Assert.IsTrue(fieldNames.Contains("unstored2"));
			
			// verify that only indexed fields were returned
			System.Collections.ICollection indexedFieldNames = reader.GetFieldNames(true);
            Assert.AreEqual(6, indexedFieldNames.Count);
			Assert.IsTrue(fieldNames.Contains("keyword"));
			Assert.IsTrue(fieldNames.Contains("text"));
			Assert.IsTrue(fieldNames.Contains("unstored"));
			Assert.IsTrue(fieldNames.Contains("keyword2"));
			Assert.IsTrue(fieldNames.Contains("text2"));
			Assert.IsTrue(fieldNames.Contains("unstored2"));
			
			// verify that only unindexed fields were returned
			System.Collections.ICollection unindexedFieldNames = reader.GetFieldNames(false);
            Assert.AreEqual(3, unindexedFieldNames.Count); // the following fields + an empty one
			Assert.IsTrue(fieldNames.Contains("unindexed"));
            Assert.IsTrue(fieldNames.Contains("unindexed2"));
		}
Пример #25
0
		public virtual void  TestPhrasePrefix()
		{
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Document doc1 = new Document();
			Document doc2 = new Document();
			Document doc3 = new Document();
			Document doc4 = new Document();
			Document doc5 = new Document();
			doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED));
			doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED));
			doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED));
			doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED));
			doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc1);
			writer.AddDocument(doc2);
			writer.AddDocument(doc3);
			writer.AddDocument(doc4);
			writer.AddDocument(doc5);
			writer.Optimize();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(indexStore);
			
			//PhrasePrefixQuery query1 = new PhrasePrefixQuery();
			MultiPhraseQuery query1 = new MultiPhraseQuery();
			//PhrasePrefixQuery query2 = new PhrasePrefixQuery();
			MultiPhraseQuery query2 = new MultiPhraseQuery();
			query1.Add(new Term("body", "blueberry"));
			query2.Add(new Term("body", "strawberry"));
			
			System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
			IndexReader ir = IndexReader.Open(indexStore);
			
			// this TermEnum gives "piccadilly", "pie" and "pizza".
			System.String prefix = "pi";
			TermEnum te = ir.Terms(new Term("body", prefix + "*"));
			do 
			{
				if (te.Term().Text().StartsWith(prefix))
				{
					termsWithPrefix.Add(te.Term());
				}
			}
			while (te.Next());
			
			query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term)));
			query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term)));
			
			ScoreDoc[] result;
			result = searcher.Search(query1, null, 1000).scoreDocs;
			Assert.AreEqual(2, result.Length);
			
			result = searcher.Search(query2, null, 1000).scoreDocs;
			Assert.AreEqual(0, result.Length);
		}
Пример #26
0
		public virtual void  TestDocCount()
		{
			Directory dir = new RAMDirectory();
			
			IndexWriter writer = null;
			IndexReader reader = null;
			int i;
			
			try
			{
				writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
				
				// add 100 documents
				for (i = 0; i < 100; i++)
				{
					AddDoc(writer);
				}
				Assert.AreEqual(100, writer.DocCount());
				writer.Close();
				
				// delete 40 documents
				reader = IndexReader.Open(dir);
				for (i = 0; i < 40; i++)
				{
					reader.Delete(i);
				}
				reader.Close();
				
				// test doc count before segments are merged/index is optimized
				writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
				Assert.AreEqual(100, writer.DocCount());
				writer.Close();
				
				reader = IndexReader.Open(dir);
				Assert.AreEqual(100, reader.MaxDoc());
				Assert.AreEqual(60, reader.NumDocs());
				reader.Close();
				
				// optimize the index and check that the new doc count is correct
				writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
				writer.Optimize();
				Assert.AreEqual(60, writer.DocCount());
				writer.Close();
				
				// check that the index reader gives the same numbers.
				reader = IndexReader.Open(dir);
				Assert.AreEqual(60, reader.MaxDoc());
				Assert.AreEqual(60, reader.NumDocs());
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
			}
		}
Пример #27
0
		public virtual void  TestBefore()
		{
			// create an index
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true);
			
			long now = (long) (DateTime.UtcNow - new DateTime(1970, 1, 1)).TotalMilliseconds;
			
			Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
			// add time that is in the past
			doc.Add(new Field("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 1000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));
			doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			writer.Optimize();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(indexStore);
			
			// filter that should preserve matches
			//DateFilter df1 = DateFilter.Before("datefield", now);
			RangeFilter df1 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), false, true);
			// filter that should discard matches
			//DateFilter df2 = DateFilter.Before("datefield", now - 999999);
			RangeFilter df2 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(0, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), true, false);
			
			// search something that doesn't exist with DateFilter
			Query query1 = new TermQuery(new Term("body", "NoMatchForThis"));
			
			// search for something that does exists
			Query query2 = new TermQuery(new Term("body", "sunny"));
			
			Hits result;
			
			// ensure that queries return expected results without DateFilter first
			result = searcher.Search(query1);
			Assert.AreEqual(0, result.Length());
			
			result = searcher.Search(query2);
			Assert.AreEqual(1, result.Length());
			
			
			// run queries with DateFilter
			result = searcher.Search(query1, df1);
			Assert.AreEqual(0, result.Length());
			
			result = searcher.Search(query1, df2);
			Assert.AreEqual(0, result.Length());
			
			result = searcher.Search(query2, df1);
			Assert.AreEqual(1, result.Length());
			
			result = searcher.Search(query2, df2);
			Assert.AreEqual(0, result.Length());
		}
Пример #28
0
		public virtual void  TestOutOfOrderCollection()
		{
			
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED);
			for (int i = 0; i < 10; i++)
			{
				writer.AddDocument(new Document());
			}
			writer.Commit();
			writer.Close();
			
			bool[] inOrder = new bool[]{false, true};
			System.String[] actualTSDCClass = new System.String[]{"OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector"};
			
			// Save the original value to set later.
			bool origVal = BooleanQuery.GetAllowDocsOutOfOrder();
			
			BooleanQuery.SetAllowDocsOutOfOrder(true);
			
			BooleanQuery bq = new BooleanQuery();
			// Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
			// which delegates to BS if there are no mandatory clauses.
			bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
			// Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return
			// the clause instead of BQ.
			bq.SetMinimumNumberShouldMatch(1);
			try
			{
				
				IndexSearcher searcher = new IndexSearcher(dir);
				for (int i = 0; i < inOrder.Length; i++)
				{
					TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]);
					Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName);
					
					searcher.Search(new MatchAllDocsQuery(), tdc);
					
					ScoreDoc[] sd = tdc.TopDocs().scoreDocs;
					Assert.AreEqual(3, sd.Length);
					for (int j = 0; j < sd.Length; j++)
					{
						Assert.AreEqual(j, sd[j].doc, "expected doc Id " + j + " found " + sd[j].doc);
					}
				}
			}
			finally
			{
				// Whatever happens, reset BooleanQuery.allowDocsOutOfOrder to the
				// original value. Don't set it to false in case the implementation in BQ
				// will change some day.
				BooleanQuery.SetAllowDocsOutOfOrder(origVal);
			}
		}
Пример #29
0
        public virtual void  TestBefore()
        {
            // create an index
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            
            long now = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
            
            Document doc = new Document();
            // add time that is in the past
            doc.Add(new Field("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);
            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true);
            
            // filter that should preserve matches
            //DateFilter df1 = DateFilter.Before("datefield", now);
            TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true);
            // filter that should discard matches
            //DateFilter df2 = DateFilter.Before("datefield", now - 999999);
            TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false);
            
            // search something that doesn't exist with DateFilter
            Query query1 = new TermQuery(new Term("body", "NoMatchForThis"));
            
            // search for something that does exists
            Query query2 = new TermQuery(new Term("body", "sunny"));
            
            ScoreDoc[] result;
            
            // ensure that queries return expected results without DateFilter first
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            
            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(1, result.Length);
            
            
            // run queries with DateFilter
            result = searcher.Search(query1, df1, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            
            result = searcher.Search(query1, df2, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            
            result = searcher.Search(query2, df1, 1000).ScoreDocs;
            Assert.AreEqual(1, result.Length);
            
            result = searcher.Search(query2, df2, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
        }
Пример #30
0
		public virtual void  TestBefore()
		{
			// create an index
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true);
			
			long now = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
			
			Document doc = new Document();
			// add time that is in the past
			doc.Add(Field.Keyword("datefield", DateField.TimeToString(now - 1000)));
			doc.Add(Field.Text("body", "Today is a very sunny day in New York City"));
			writer.AddDocument(doc);
			writer.Optimize();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(indexStore);
			
			// filter that should preserve matches
			DateFilter df1 = DateFilter.Before("datefield", now);
			
			// filter that should discard matches
			DateFilter df2 = DateFilter.Before("datefield", now - 999999);
			
			// search something that doesn't exist with DateFilter
			Query query1 = new TermQuery(new Term("body", "NoMatchForThis"));
			
			// search for something that does exists
			Query query2 = new TermQuery(new Term("body", "sunny"));
			
			Hits result;
			
			// ensure that queries return expected results without DateFilter first
			result = searcher.Search(query1);
			Assert.AreEqual(0, result.Length());
			
			result = searcher.Search(query2);
			Assert.AreEqual(1, result.Length());
			
			
			// run queries with DateFilter
			result = searcher.Search(query1, df1);
			Assert.AreEqual(0, result.Length());
			
			result = searcher.Search(query1, df2);
			Assert.AreEqual(0, result.Length());
			
			result = searcher.Search(query2, df1);
			Assert.AreEqual(1, result.Length());
			
			result = searcher.Search(query2, df2);
			Assert.AreEqual(0, result.Length());
		}
Пример #31
0
        public virtual void  TestStopWordSearching()
        {
            Analyzer  analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
            Directory ramDir   = new RAMDirectory();
            var       iw       = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            var       doc      = new Document();

            doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED));
            iw.AddDocument(doc);
            iw.Close();

            var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer);

            mfqp.DefaultOperator = QueryParser.Operator.AND;
            var q          = mfqp.Parse("the footest");
            var is_Renamed = new IndexSearcher(ramDir, true);
            var hits       = is_Renamed.Search(q, null, 1000).ScoreDocs;

            Assert.AreEqual(1, hits.Length);
            is_Renamed.Close();
        }
Пример #32
0
        public virtual void  TestNot_Renamed()
        {
            RAMDirectory store  = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            Document d1 = new Document();

            d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(d1, null);
            writer.Optimize(null);
            writer.Close();

            Searcher    searcher = new IndexSearcher(store, true, null);
            QueryParser parser   = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
            Query       query    = parser.Parse("a NOT b");

            //System.out.println(query);
            ScoreDoc[] hits = searcher.Search(query, null, 1000, null).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
        }
Пример #33
0
        public virtual void  TestMergeDocCount0()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null);

            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.MergeFactor = 100;

            for (int i = 0; i < 250; i++)
            {
                AddDoc(writer);
                CheckInvariants(writer);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, false, null);

            reader.DeleteDocuments(new Term("content", "aaa"), null);
            reader.Close();

            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED, null);
            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.MergeFactor = 5;

            // merge factor is changed, so check invariants after all adds
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
            }
            writer.Commit(null);
            ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync();
            writer.Commit(null);
            CheckInvariants(writer);
            Assert.AreEqual(10, writer.MaxDoc());

            writer.Close();
        }
Пример #34
0
        public virtual void  TestWithPendingDeletes3()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);
            IndexWriter writer = NewWriter(dir, false);

            // Adds 10 docs, then replaces them with another 10
            // docs, so 10 pending deletes:
            for (int i = 0; i < 20; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
                writer.UpdateDocument(new Term("id", "" + (i % 10)), doc);
            }

            // Deletes one of the 10 added docs, leaving 9:
            PhraseQuery q = new PhraseQuery();

            q.Add(new Term("content", "bbb"));
            q.Add(new Term("content", "14"));
            writer.DeleteDocuments(q);

            writer.AddIndexesNoOptimize(new Directory[] { aux });

            writer.Optimize();
            writer.Commit();

            VerifyNumDocs(dir, 1039);
            VerifyTermDocs(dir, new Term("content", "aaa"), 1030);
            VerifyTermDocs(dir, new Term("content", "bbb"), 9);

            writer.Close();
            dir.Close();
            aux.Close();
        }
Пример #35
0
        public void Test_Store_RAMDirectory()
        {
            Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory();

            //Index 1 Doc
            Lucene.Net.Index.IndexWriter  wr  = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true);
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
            wr.AddDocument(doc);
            wr.Close();

            //now serialize it
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();
            serializer.Serialize(memoryStream, ramDIR);

            //Close DIR
            ramDIR.Close();
            ramDIR = null;

            //now deserialize
            memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream);

            //Add 1 more doc
            wr  = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false);
            doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
            wr.AddDocument(doc);
            wr.Close();

            //Search
            Lucene.Net.Search.IndexSearcher     s       = new Lucene.Net.Search.IndexSearcher(ramDIR2);
            Lucene.Net.QueryParsers.QueryParser qp      = new Lucene.Net.QueryParsers.QueryParser("field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             q       = qp.Parse("value1");
            Lucene.Net.Search.TopDocs           topDocs = s.Search(q, 100);
            s.Close();

            Assert.AreEqual(topDocs.totalHits, 2, "See the issue: LUCENENET-174");
        }
Пример #36
0
        public virtual void  Test()
        {
            //Positive test of FieldInfos
            Assert.IsTrue(testDoc != null);
            FieldInfos fieldInfos = new FieldInfos();

            fieldInfos.Add(testDoc);
            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Size() == 7);             //this is 7 b/c we are using the no-arg constructor
            RAMDirectory dir = new RAMDirectory();

            System.String name   = "testFile";
            OutputStream  output = dir.CreateFile(name);

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            try
            {
                fieldInfos.Write(output);
                output.Close();
                Assert.IsTrue(output.Length() > 0);
                FieldInfos readIn = new FieldInfos(dir, name);
                Assert.IsTrue(fieldInfos.Size() == readIn.Size());
                FieldInfo info = readIn.FieldInfo("textField1");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector == false);

                info = readIn.FieldInfo("textField2");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector == true);

                dir.Close();
            }
            catch (System.IO.IOException e)
            {
                Assert.IsTrue(false);
            }
        }
        public virtual void  TestNoMergeAfterCopy()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
            Assert.AreEqual(1060, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1060);
        }
Пример #38
0
        public virtual void  TestFilterWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < 500; i++)
            {
                Document document = new Document();
                document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document, null);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, true, null);

            SpanTermQuery    query    = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim()));
            SpanQueryFilter  filter   = new SpanQueryFilter(query);
            SpanFilterResult result   = filter.BitSpans(reader, null);
            DocIdSet         docIdSet = result.DocIdSet;

            Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be");
            AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
            var spans = result.Positions;

            Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
            int size = GetDocIdSetSize(docIdSet);

            Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size);
            for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();)
            {
                SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current;
                Assert.IsTrue(info != null, "info is null and it shouldn't be");
                //The doc should indicate the bit is on
                AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc);
                //There should be two positions in each
                Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2);
            }
            reader.Close();
        }
Пример #39
0
        public virtual void  TestShrinkToAfterShortestMatch()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new TestPayloadAnalyzer(this), IndexWriter.MaxFieldLength.LIMITED);
            Document     doc       = new Document();

            doc.Add(new Field("content", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("a b c d e f g h i j a k")))));
            writer.AddDocument(doc);
            writer.Close();

            IndexSearcher is_Renamed = new IndexSearcher(directory, true);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs   = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq   = new SpanNearQuery(sqs, 1, true);
            Spans         spans = snq.GetSpans(is_Renamed.IndexReader);

            TopDocs topDocs = is_Renamed.Search(snq, 1);

            System.Collections.Hashtable payloadSet = new System.Collections.Hashtable();
            for (int i = 0; i < topDocs.ScoreDocs.Length; i++)
            {
                while (spans.Next())
                {
                    System.Collections.Generic.ICollection <byte[]> payloads = spans.GetPayload();

                    for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                    {
                        CollectionsHelper.AddIfNotContains(payloadSet, new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current)));
                    }
                }
            }
            Assert.AreEqual(2, payloadSet.Count);
            Assert.IsTrue(payloadSet.Contains("a:Noise:10"));
            Assert.IsTrue(payloadSet.Contains("k:Noise:11"));
        }
Пример #40
0
        public virtual void  TestBasicDelete()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer     = null;
            IndexReader reader     = null;
            Term        searchTerm = new Term("content", "aaa");

            //  add 100 documents with term : aaa
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
            for (int i = 0; i < 100; i++)
            {
                AddDoc(writer, searchTerm.Text());
            }
            writer.Close();

            // OPEN READER AT THIS POINT - this should fix the view of the
            // index at the point of having 100 "aaa" documents and 0 "bbb"
            reader = IndexReader.Open(dir);
            Assert.AreEqual(100, reader.DocFreq(searchTerm), "first docFreq");
            AssertTermDocsCount("first reader", reader, searchTerm, 100);

            // DELETE DOCUMENTS CONTAINING TERM: aaa
            int deleted = 0;

            reader  = IndexReader.Open(dir);
            deleted = reader.Delete(searchTerm);
            Assert.AreEqual(100, deleted, "deleted count");
            Assert.AreEqual(100, reader.DocFreq(searchTerm), "deleted docFreq");
            AssertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
            reader.Close();

            // CREATE A NEW READER and re-test
            reader = IndexReader.Open(dir);
            Assert.AreEqual(100, reader.DocFreq(searchTerm), "deleted docFreq");
            AssertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
            reader.Close();
        }
Пример #41
0
        public virtual void  TestSetNorm_Renamed()
        {
            RAMDirectory store  = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            // add the same document four times
            IFieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED);
            Document   d1 = new Document();

            d1.Add(f1);
            writer.AddDocument(d1);
            writer.AddDocument(d1);
            writer.AddDocument(d1);
            writer.AddDocument(d1);
            writer.Close();

            // reset the boost of each instance of this document
            IndexReader reader = IndexReader.Open(store, false);

            reader.SetNorm(0, "field", 1.0f);
            reader.SetNorm(1, "field", 2.0f);
            reader.SetNorm(2, "field", 4.0f);
            reader.SetNorm(3, "field", 16.0f);
            reader.Close();

            // check that searches are ordered by this boost
            float[] scores = new float[4];

            new IndexSearcher(store, true).Search(new TermQuery(new Term("field", "word")), new AnonymousClassCollector(scores, this));

            float lastScore = 0.0f;

            for (int i = 0; i < 4; i++)
            {
                Assert.IsTrue(scores[i] > lastScore);
                lastScore = scores[i];
            }
        }
Пример #42
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory    directory = new RAMDirectory();
            PayloadAnalyzer analyzer  = new PayloadAnalyzer(this);
            IndexWriter     writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetSimilarity(similarity);
            //writer.infoStream = System.out;
            for (int i = 0; i < 1000; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                System.String txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1);
                doc.Add(new Field("field2", txt, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            writer.Optimize();
            writer.Close();

            searcher            = new IndexSearcher(directory, true);
            searcher.Similarity = similarity;
        }
Пример #43
0
        public virtual void  TestCachingWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true);

            writer.Close();

            IndexReader reader = IndexReader.Open(dir);

            MockFilter           filter = new MockFilter();
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // first time, nested filter is called
            cacher.Bits(reader);
            Assert.IsTrue(filter.WasCalled(), "first time");

            // second time, nested filter should not be called
            filter.Clear();
            cacher.Bits(reader);
            Assert.IsFalse(filter.WasCalled(), "second time");

            reader.Close();
        }
Пример #44
0
        public override void  SetUp()
        {
            base.SetUp();

            //
            RAMDirectory rd = new RAMDirectory();

            //
            IndexWriter writer = new IndexWriter(rd, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);

            //
            Document d = new Document();

            d.Add(new Field(FIELD_T, "Optimize not deleting all files", Field.Store.YES, Field.Index.ANALYZED));
            d.Add(new Field(FIELD_C, "Deleted When I run an optimize in our production environment.", Field.Store.YES, Field.Index.ANALYZED));

            //
            writer.AddDocument(d);
            writer.Close();

            //
            searcher = new IndexSearcher(rd, true);
        }
Пример #45
0
        public virtual void  TestNullDocIdSet()
        {
            // Tests that if a Filter produces a null DocIdSet, which is given to
            // IndexSearcher, everything works fine. This came up in LUCENE-1754.
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), MaxFieldLength.UNLIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("c", "val", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
            writer.AddDocument(doc);
            writer.Close();

            // First verify the document is searchable.
            IndexSearcher searcher = new IndexSearcher(dir, true);

            Assert.AreEqual(1, searcher.Search(new MatchAllDocsQuery(), 10).TotalHits);

            // Now search w/ a Filter which returns a null DocIdSet
            Filter f = new AnonymousClassFilter(this);

            Assert.AreEqual(0, searcher.Search(new MatchAllDocsQuery(), f, 10).TotalHits);
            searcher.Close();
        }
        public virtual void  TestKeepNoneOnInitDeletionPolicy()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(this);

                Directory dir = new RAMDirectory();

                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                writer.SetMaxBufferedDocs(10);
                writer.UseCompoundFile = useCompoundFile;
                for (int i = 0; i < 107; i++)
                {
                    AddDoc(writer);
                }
                writer.Close();

                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                writer.UseCompoundFile = useCompoundFile;
                writer.Optimize();
                writer.Close();

                Assert.AreEqual(2, policy.numOnInit);
                // If we are not auto committing then there should
                // be exactly 2 commits (one per close above):
                Assert.AreEqual(2, policy.numOnCommit);

                // Simplistic check: just verify the index is in fact
                // readable:
                IndexReader reader = IndexReader.Open(dir, true);
                reader.Close();

                dir.Close();
            }
        }
Пример #47
0
        public virtual void  TestDanish()
        {
            /* build an index */
            RAMDirectory danishIndex = new RAMDirectory();
            IndexWriter  writer      = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);

            // Danish collation orders the words below in the given order
            // (example taken from TestSort.testInternationalSort() ).
            System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" };
            for (int docnum = 0; docnum < words.Length; ++docnum)
            {
                Document doc = new Document();
                doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc);
            }
            writer.Optimize();
            writer.Close();

            IndexReader   reader = IndexReader.Open(danishIndex, true);
            IndexSearcher search = new IndexSearcher(reader);
            Query         q      = new TermQuery(new Term("body", "body"));

            System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo;
            Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);

            // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
            // but Danish collation does.
            int numHits =
                search.Search(q, new TermRangeFilter("content", "H\u00D8T", "MAND", F, F, collator), 1000).TotalHits;

            Assert.AreEqual(1, numHits, "The index Term should be included.");

            numHits = search.Search(q, new TermRangeFilter("content", "H\u00C5T", "MAND", F, F, collator), 1000).TotalHits;
            Assert.AreEqual(0, numHits, "The index Term should not be included.");
            search.Close();
        }
        public virtual void  TestMergeDocCount0()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true);

            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(100);

            for (int i = 0; i < 250; i++)
            {
                AddDoc(writer);
                CheckInvariants(writer);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir);

            reader.DeleteDocuments(new Term("content", "aaa"));
            reader.Close();

            writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false);
            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(5);

            // merge factor is changed, so check invariants after all adds
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
            }
            CheckInvariants(writer);
            Assert.AreEqual(10, writer.DocCount());

            writer.Close();
        }
Пример #49
0
        public virtual void  TestBasic()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED));
            writer.AddDocument(doc);
            writer.Close();

            TermQuery termQuery = new TermQuery(new Term("field", "value"));

            // should not throw exception with primitive query
            QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery);

            IndexSearcher searcher = new IndexSearcher(dir, true);
            TopDocs       hits     = searcher.Search(new MatchAllDocsQuery(), qwf, 10);

            Assert.AreEqual(1, hits.totalHits);

            // should not throw exception with complex primitive query
            BooleanQuery booleanQuery = new BooleanQuery();

            booleanQuery.Add(termQuery, Occur.MUST);
            booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT);
            qwf = new QueryWrapperFilter(termQuery);

            hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10);
            Assert.AreEqual(1, hits.totalHits);

            // should not throw exception with non primitive Query (doesn't implement
            // Query#createWeight)
            qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu")));

            hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10);
            Assert.AreEqual(1, hits.totalHits);
        }
Пример #50
0
        public virtual void  TestMethod()
        {
            RAMDirectory directory = new RAMDirectory();

            System.String[] categories = new System.String[] { "food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink" };

            Query       rw1    = null;
            Query       rw2    = null;
            IndexReader reader = null;

            try
            {
                IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                for (int i = 0; i < categories.Length; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
                    writer.AddDocument(doc);
                }
                writer.Close();

                reader = IndexReader.Open(directory);
                PrefixQuery query = new PrefixQuery(new Term("category", "foo"));
                rw1 = query.Rewrite(reader);

                BooleanQuery bq = new BooleanQuery();
                bq.Add(query, BooleanClause.Occur.MUST);

                rw2 = bq.Rewrite(reader);
            }
            catch (System.IO.IOException e)
            {
                Assert.Fail(e.Message);
            }

            Assert.AreEqual(GetCount(reader, rw1), GetCount(reader, rw2), "Number of Clauses Mismatch");
        }
        public virtual void  TestMultiTermDocs()
        {
            RAMDirectory ramDir1 = new RAMDirectory();

            AddDoc(ramDir1, "test foo", true);
            RAMDirectory ramDir2 = new RAMDirectory();

            AddDoc(ramDir2, "test blah", true);
            RAMDirectory ramDir3 = new RAMDirectory();

            AddDoc(ramDir3, "test wow", true);

            IndexReader[] readers1 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir3) };
            IndexReader[] readers2 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir2), IndexReader.Open(ramDir3) };
            MultiReader   mr2      = new MultiReader(readers1);
            MultiReader   mr3      = new MultiReader(readers2);

            // test mixing up TermDocs and TermEnums from different readers.
            TermDocs td2 = mr2.TermDocs();
            TermEnum te3 = mr3.Terms(new Term("body", "wow"));

            td2.Seek(te3);
            int ret = 0;

            // This should blow up if we forget to check that the TermEnum is from the same
            // reader as the TermDocs.
            while (td2.Next())
            {
                ret += td2.Doc();
            }
            td2.Close();
            te3.Close();

            // really a dummy assert to ensure that we got some docs and to ensure that
            // nothing is optimized out.
            Assert.IsTrue(ret > 0);
        }
Пример #52
0
        /// <summary> Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
        /// and analyzes them using the PayloadAnalyzer
        /// </summary>
        /// <param name="similarity">The Similarity class to use in the Searcher
        /// </param>
        /// <param name="numDocs">The num docs to add
        /// </param>
        /// <returns> An IndexSearcher
        /// </returns>
        /// <throws>  IOException </throws>
        public virtual IndexSearcher SetUp(Similarity similarity, int numDocs)
        {
            RAMDirectory    directory = new RAMDirectory();
            PayloadAnalyzer analyzer  = new PayloadAnalyzer(this);
            IndexWriter     writer    = new IndexWriter(directory, analyzer, true);

            writer.SetSimilarity(similarity);
            //writer.infoStream = System.out;
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field(FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(MULTI_FIELD, English.IntToEnglish(i) + "  " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            //writer.optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory);

            searcher.SetSimilarity(similarity);
            return(searcher);
        }
Пример #53
0
        private float CheckPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults)
        {
            query.SetSlop(slop);

            RAMDirectory       ramDir   = new RAMDirectory();
            WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
            IndexWriter        writer   = new IndexWriter(ramDir, analyzer, MaxFieldLength.UNLIMITED);

            writer.AddDocument(doc);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(ramDir);
            TopDocs       td       = searcher.Search(query, null, 10);

            //System.out.println("slop: "+slop+"  query: "+query+"  doc: "+doc+"  Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore());
            Assert.AreEqual(expectedNumResults, td.TotalHits, "slop: " + slop + "  query: " + query + "  doc: " + doc + "  Wrong number of hits");

            //QueryUtils.check(query,searcher);

            searcher.Close();
            ramDir.Close();

            return(td.GetMaxScore());
        }
Пример #54
0
        public virtual void  TestNoCopySegments()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(9);
            writer.MergeFactor = 4;
            AddDocs(writer, 2);

            writer.AddIndexesNoOptimize(new Directory[] { aux });
            Assert.AreEqual(1032, writer.MaxDoc());
            Assert.AreEqual(2, writer.GetSegmentCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1032);
        }
Пример #55
0
 public override void  SetUp()
 {
     base.SetUp();
     dir = new RAMDirectory();
 }
Пример #56
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" };
            System.String   test1     = "eating chocolate in a computer lab";                                             //6 terms
            System.String   test2     = "computer in a computer lab";                                                     //5 terms
            System.String   test3     = "a chocolate lab grows old";                                                      //5 terms
            System.String   test4     = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new RAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1);
                writer.AddDocument(testDoc2);
                writer.AddDocument(testDoc3);
                writer.AddDocument(testDoc4);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir);
                TermEnum      termEnum      = knownSearcher.reader.Terms();
                TermDocs      termDocs      = knownSearcher.reader.TermDocs();
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.GetSimilarity();
                while (termEnum.Next() == true)
                {
                    Term term = termEnum.Term();
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term);
                    while (termDocs.Next())
                    {
                        int docId = termDocs.Doc();
                        int freq  = termDocs.Freq();
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field");
                        float          tf     = sim.Tf(freq);
                        float          idf    = sim.Idf(term, knownSearcher);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text().Equals(vTerms[i]) == true)
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query query = new TermQuery(new Term("Field", "chocolate"));
                Hits  hits  = knownSearcher.Search(query);
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length() == 3);
                float score = hits.Score(0);

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString()));
                Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString()));
                Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString()));
                TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field");
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32  freqInt    = (System.Int32)test4Map[term];
                    System.Object tmpFreqInt = test4Map[term];
                    Assert.IsTrue(tmpFreqInt != null);
                    Assert.IsTrue(freqInt == freq);
                }
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Пример #57
0
        public virtual void  TestExpirationTimeDeletionPolicy()
        {
            double SECONDS = 2.0;

            bool autoCommit      = false;
            bool useCompoundFile = true;

            Directory dir = new RAMDirectory();
            ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS);
            IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);

            writer.SetUseCompoundFile(useCompoundFile);
            writer.Close();

            long lastDeleteTime = 0;

            for (int i = 0; i < 7; i++)
            {
                // Record last time when writer performed deletes of
                // past commits
                lastDeleteTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                writer         = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                writer.SetUseCompoundFile(useCompoundFile);
                for (int j = 0; j < 17; j++)
                {
                    AddDoc(writer);
                }
                writer.Close();

                // Make sure to sleep long enough so that some commit
                // points will be deleted:
                System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int)(1000.0 * (SECONDS / 5.0))));
            }

            // First, make sure the policy in fact deleted something:
            Assert.IsTrue(policy.numDelete > 0, "no commits were deleted");

            // Then simplistic check: just verify that the
            // segments_N's that still exist are in fact within SECONDS
            // seconds of the last one's mod time, and, that I can
            // open a reader on each:
            long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

            System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
            dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
            while (gen > 0)
            {
                try
                {
                    IndexReader reader = IndexReader.Open(dir);
                    reader.Close();
                    fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
                    long modTime = dir.FileModified(fileName);
                    Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted");
                }
                catch (System.IO.IOException e)
                {
                    // OK
                    break;
                }

                dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                gen--;
            }

            dir.Close();
        }
Пример #58
0
        public virtual void  TestKeepAllDeletionPolicy()
        {
            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                // Never deletes a commit
                KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this);

                Directory dir = new RAMDirectory();
                policy.dir = dir;

                IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                writer.SetMaxBufferedDocs(10);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.SetMergeScheduler(new SerialMergeScheduler());
                for (int i = 0; i < 107; i++)
                {
                    AddDoc(writer);
                    if (autoCommit && i % 10 == 0)
                    {
                        writer.Commit();
                    }
                }
                writer.Close();

                writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.Optimize();
                writer.Close();

                Assert.AreEqual(2, policy.numOnInit);
                if (!autoCommit)
                {
                    // If we are not auto committing then there should
                    // be exactly 2 commits (one per close above):
                    Assert.AreEqual(2, policy.numOnCommit);
                }

                // Test listCommits
                System.Collections.ICollection commits = IndexReader.ListCommits(dir);
                if (!autoCommit)
                {
                    // 1 from opening writer + 2 from closing writer
                    Assert.AreEqual(3, commits.Count);
                }
                // 1 from opening writer + 2 from closing writer +
                // 11 from calling writer.commit() explicitly above
                else
                {
                    Assert.AreEqual(14, commits.Count);
                }

                System.Collections.IEnumerator it = commits.GetEnumerator();
                // Make sure we can open a reader on each commit:
                while (it.MoveNext())
                {
                    IndexCommit commit = (IndexCommit)it.Current;
                    IndexReader r      = IndexReader.Open(commit, null);
                    r.Close();
                }

                // Simplistic check: just verify all segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
                while (gen > 0)
                {
                    IndexReader reader = IndexReader.Open(dir);
                    reader.Close();
                    dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    gen--;

                    if (gen > 0)
                    {
                        // Now that we've removed a commit point, which
                        // should have orphan'd at least one index file.
                        // Open & close a writer and assert that it
                        // actually removed something:
                        int preCount = dir.ListAll().Length;
                        writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED);
                        writer.Close();
                        int postCount = dir.ListAll().Length;
                        Assert.IsTrue(postCount < preCount);
                    }
                }

                dir.Close();
            }
        }
Пример #59
0
        public virtual void  TestKeepLastNDeletionPolicyWithCreates()
        {
            int N = 10;

            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                writer.SetMaxBufferedDocs(10);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.Close();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                    writer.SetMaxBufferedDocs(10);
                    writer.SetUseCompoundFile(useCompoundFile);
                    for (int j = 0; j < 17; j++)
                    {
                        AddDoc(writer);
                    }
                    // this is a commit when autoCommit=false:
                    writer.Close();
                    IndexReader reader = IndexReader.Open(dir, policy);
                    reader.DeleteDocument(3);
                    reader.SetNorm(5, "content", 2.0F);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).scoreDocs;
                    Assert.AreEqual(16, hits.Length);
                    // this is a commit when autoCommit=false:
                    reader.Close();
                    searcher.Close();

                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                    // This will not commit: there are no changes
                    // pending because we opened for "create":
                    writer.Close();
                }

                Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
                if (!autoCommit)
                {
                    Assert.AreEqual(3 * (N + 1), policy.numOnCommit);
                }

                IndexSearcher searcher2 = new IndexSearcher(dir);
                ScoreDoc[]    hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                Assert.AreEqual(0, hits2.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 0;

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir);

                        // Work backwards in commits on what the expected
                        // count should be.  Only check this in the
                        // autoCommit false case:
                        if (!autoCommit)
                        {
                            searcher2 = new IndexSearcher(reader);
                            hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                            Assert.AreEqual(expectedCount, hits2.Length);
                            searcher2.Close();
                            if (expectedCount == 0)
                            {
                                expectedCount = 16;
                            }
                            else if (expectedCount == 16)
                            {
                                expectedCount = 17;
                            }
                            else if (expectedCount == 17)
                            {
                                expectedCount = 0;
                            }
                        }
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last " + N);
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }
Пример #60
0
        public virtual void  TestKeepLastNDeletionPolicy()
        {
            int N = 5;

            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                Directory dir = new RAMDirectory();

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                for (int j = 0; j < N + 1; j++)
                {
                    IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                    writer.SetMaxBufferedDocs(10);
                    writer.SetUseCompoundFile(useCompoundFile);
                    for (int i = 0; i < 17; i++)
                    {
                        AddDoc(writer);
                    }
                    writer.Optimize();
                    writer.Close();
                }

                Assert.IsTrue(policy.numDelete > 0);
                Assert.AreEqual(N + 1, policy.numOnInit);
                if (autoCommit)
                {
                    Assert.IsTrue(policy.numOnCommit > 1);
                }
                else
                {
                    Assert.AreEqual(N + 1, policy.numOnCommit);
                }

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir);
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits prior to last " + N);
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }