public virtual void TestEncodeDecode() { int iterations = RandomInts.NextIntBetween(Random(), 1, 1000); float AcceptableOverheadRatio = (float)Random().NextDouble(); int[] values = new int[(iterations - 1) * Lucene41PostingsFormat.BLOCK_SIZE + ForUtil.MAX_DATA_SIZE]; for (int i = 0; i < iterations; ++i) { int bpv = Random().Next(32); if (bpv == 0) { int value = RandomInts.NextIntBetween(Random(), 0, int.MaxValue); for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = value; } } else { for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = RandomInts.NextIntBetween(Random(), 0, (int)PackedInts.MaxValue(bpv)); } } } Directory d = new RAMDirectory(); long endPointer; { // encode IndexOutput @out = d.CreateOutput("test.bin", IOContext.DEFAULT); ForUtil forUtil = new ForUtil(AcceptableOverheadRatio, @out); for (int i = 0; i < iterations; ++i) { forUtil.WriteBlock(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, values.Length), new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], @out); } endPointer = @out.FilePointer; @out.Dispose(); } { // decode IndexInput @in = d.OpenInput("test.bin", IOContext.READONCE); ForUtil forUtil = new ForUtil(@in); for (int i = 0; i < iterations; ++i) { if (Random().NextBoolean()) { forUtil.SkipBlock(@in); continue; } int[] restored = new int[Lucene41.ForUtil.MAX_DATA_SIZE]; forUtil.ReadBlock(@in, new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], restored); Assert.AreEqual(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, (i + 1) * Lucene41PostingsFormat.BLOCK_SIZE), Arrays.CopyOf(restored, Lucene41PostingsFormat.BLOCK_SIZE)); } Assert.AreEqual(endPointer, @in.FilePointer); @in.Dispose(); } }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public virtual void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("Field", "one two three four five")); doc.Add(Field.Text("sorter", "b")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three four")); doc.Add(Field.Text("sorter", "d")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three y")); doc.Add(Field.Text("sorter", "a")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two x")); doc.Add(Field.Text("sorter", "c")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("Field", "three")); filter = new AnonymousClassFilter(this); }
public virtual void TestCachingWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); IndexReader reader = IndexReader.Open(dir); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called cacher.GetDocIdSet(reader); Assert.IsTrue(filter.WasCalled(), "first time"); // make sure no exception if cache is holding the wrong bitset cacher.Bits(reader); cacher.GetDocIdSet(reader); // second time, nested filter should not be called filter.Clear(); cacher.GetDocIdSet(reader); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Close(); }
/*public TestCustomSearcherSort(System.String name):base(name) { }*/ /*[STAThread] public static void Main(System.String[] argv) { // TestRunner.run(suite()); // {{Aroush-2.9}} how is this done in NUnit? }*/ /*public static Test suite() { return new TestSuite(typeof(TestCustomSearcherSort)); }*/ // create an index for testing private Directory GetIndex() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); RandomGen random = new RandomGen(this, NewRandom()); for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) { // some documents must not have an entry in the first sort field doc.Add(new Field("publicationDate_", random.GetLuceneDate(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } if ((i % 7) == 0) { // some documents to match the query (see below) doc.Add(new Field("content", "test", Field.Store.YES, Field.Index.ANALYZED)); } // every document has a defined 'mandant' field doc.Add(new Field("mandant", System.Convert.ToString(i % 3), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); return indexStore; }
public override void SetUp() { base.SetUp(); Document doc; RAMDirectory rd1 = new RAMDirectory(); IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field1", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field2", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field4", "", Field.Store.NO, Field.Index.ANALYZED)); iw1.AddDocument(doc); iw1.Close(); RAMDirectory rd2 = new RAMDirectory(); IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field0", "", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("field1", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field3", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); iw2.AddDocument(doc); iw2.Close(); this.ir1 = IndexReader.Open(rd1, true); this.ir2 = IndexReader.Open(rd2, true); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); AddDoc("one", iw); AddDoc("two", iw); AddDoc("three four", iw); iw.Close(); IndexSearcher is_Renamed = new IndexSearcher(dir); Hits hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(3, hits.Length()); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(3, hits.Length()); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(1, hits.Length()); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(2, hits.Length()); is_Renamed.Close(); }
public virtual void TestAllSegmentsSmall() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); }
public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte) System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte) theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] values = new System.String[]{"1", "2", "3", "4"}; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < values.Length; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), Occur.MUST_NOT); IndexSearcher indexSearcher = new IndexSearcher(directory, true); ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
private static Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = new System.Random((System.Int32) (BASE_SEED + 42)); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public override void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("field", "three")); filter = new AnonymousClassFilter(); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] values = new System.String[]{"1", "2", "3", "4"}; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < values.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Close(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = new IndexSearcher(directory); Hits hits = indexSearcher.Search(query); Assert.AreEqual(2, hits.Length(), "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED)); IFieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED); doc.Add(repeatedField); doc.Add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); query = new PhraseQuery(); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("field", "three")); filter = NewStaticFilterB(); }
public override void SetUp() { base.SetUp(); base.SetUp(); Lucene.Net.Documents.Document doc; RAMDirectory rd1 = new RAMDirectory(); IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field1", "the quick brown fox jumps", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field2", "the quick brown fox jumps", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field4", "", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.TOKENIZED)); iw1.AddDocument(doc); iw1.Close(); RAMDirectory rd2 = new RAMDirectory(); IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field0", "", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field1", "the fox jumps over the lazy dog", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field3", "the fox jumps over the lazy dog", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); iw2.AddDocument(doc); iw2.Close(); this.ir1 = IndexReader.Open(rd1); this.ir2 = IndexReader.Open(rd2); }
public virtual void CreateDummySearcher() { // Create a dummy index with nothing in it. // This could possibly fail if Lucene starts checking for docid ranges... RAMDirectory rd = new RAMDirectory(); IndexWriter iw = new IndexWriter(rd, new WhitespaceAnalyzer(), true); iw.Close(); s = new IndexSearcher(rd); }
public virtual void TestGetFieldNames() { RAMDirectory d = new RAMDirectory(); // set up writer IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true); AddDocumentWithFields(writer); writer.Close(); // set up reader IndexReader reader = IndexReader.Open(d); System.Collections.Hashtable fieldNames = (System.Collections.Hashtable) reader.GetFieldNames(); Assert.IsTrue(fieldNames.Contains("keyword")); Assert.IsTrue(fieldNames.Contains("text")); Assert.IsTrue(fieldNames.Contains("unindexed")); Assert.IsTrue(fieldNames.Contains("unstored")); // add more documents writer = new IndexWriter(d, new StandardAnalyzer(), false); // want to get some more segments here for (int i = 0; i < 5 * writer.mergeFactor; i++) { AddDocumentWithFields(writer); } // new fields are in some different segments (we hope) for (int i = 0; i < 5 * writer.mergeFactor; i++) { AddDocumentWithDifferentFields(writer); } writer.Close(); // verify fields again reader = IndexReader.Open(d); fieldNames = (System.Collections.Hashtable) reader.GetFieldNames(); Assert.AreEqual(9, fieldNames.Count); // the following fields + an empty one (bug?!) Assert.IsTrue(fieldNames.Contains("keyword")); Assert.IsTrue(fieldNames.Contains("text")); Assert.IsTrue(fieldNames.Contains("unindexed")); Assert.IsTrue(fieldNames.Contains("unstored")); Assert.IsTrue(fieldNames.Contains("keyword2")); Assert.IsTrue(fieldNames.Contains("text2")); Assert.IsTrue(fieldNames.Contains("unindexed2")); Assert.IsTrue(fieldNames.Contains("unstored2")); // verify that only indexed fields were returned System.Collections.ICollection indexedFieldNames = reader.GetFieldNames(true); Assert.AreEqual(6, indexedFieldNames.Count); Assert.IsTrue(fieldNames.Contains("keyword")); Assert.IsTrue(fieldNames.Contains("text")); Assert.IsTrue(fieldNames.Contains("unstored")); Assert.IsTrue(fieldNames.Contains("keyword2")); Assert.IsTrue(fieldNames.Contains("text2")); Assert.IsTrue(fieldNames.Contains("unstored2")); // verify that only unindexed fields were returned System.Collections.ICollection unindexedFieldNames = reader.GetFieldNames(false); Assert.AreEqual(3, unindexedFieldNames.Count); // the following fields + an empty one Assert.IsTrue(fieldNames.Contains("unindexed")); Assert.IsTrue(fieldNames.Contains("unindexed2")); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } } while (te.Next()); query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestDocCount() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; try { writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (i = 0; i < 100; i++) { AddDoc(writer); } Assert.AreEqual(100, writer.DocCount()); writer.Close(); // delete 40 documents reader = IndexReader.Open(dir); for (i = 0; i < 40; i++) { reader.Delete(i); } reader.Close(); // test doc count before segments are merged/index is optimized writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); Assert.AreEqual(100, writer.DocCount()); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Optimize(); Assert.AreEqual(60, writer.DocCount()); writer.Close(); // check that the index reader gives the same numbers. reader = IndexReader.Open(dir); Assert.AreEqual(60, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); } }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (long) (DateTime.UtcNow - new DateTime(1970, 1, 1)).TotalMilliseconds; Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); // add time that is in the past doc.Add(new Field("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 1000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); RangeFilter df1 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); RangeFilter df2 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(0, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void TestOutOfOrderCollection() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } writer.Commit(); writer.Close(); bool[] inOrder = new bool[]{false, true}; System.String[] actualTSDCClass = new System.String[]{"OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector"}; // Save the original value to set later. bool origVal = BooleanQuery.GetAllowDocsOutOfOrder(); BooleanQuery.SetAllowDocsOutOfOrder(true); BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.SetMinimumNumberShouldMatch(1); try { IndexSearcher searcher = new IndexSearcher(dir); for (int i = 0; i < inOrder.Length; i++) { TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]); Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName); searcher.Search(new MatchAllDocsQuery(), tdc); ScoreDoc[] sd = tdc.TopDocs().scoreDocs; Assert.AreEqual(3, sd.Length); for (int j = 0; j < sd.Length; j++) { Assert.AreEqual(j, sd[j].doc, "expected doc Id " + j + " found " + sd[j].doc); } } } finally { // Whatever happens, reset BooleanQuery.allowDocsOutOfOrder to the // original value. Don't set it to false in case the implementation in BQ // will change some day. BooleanQuery.SetAllowDocsOutOfOrder(origVal); } }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long now = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Document doc = new Document(); // add time that is in the past doc.Add(new Field("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; Document doc = new Document(); // add time that is in the past doc.Add(Field.Keyword("datefield", DateField.TimeToString(now - 1000))); doc.Add(Field.Text("body", "Today is a very sunny day in New York City")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches DateFilter df1 = DateFilter.Before("datefield", now); // filter that should discard matches DateFilter df2 = DateFilter.Before("datefield", now - 999999); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void TestStopWordSearching() { Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); Directory ramDir = new RAMDirectory(); var iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); var doc = new Document(); doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); iw.Close(); var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer); mfqp.DefaultOperator = QueryParser.Operator.AND; var q = mfqp.Parse("the footest"); var is_Renamed = new IndexSearcher(ramDir, true); var hits = is_Renamed.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); is_Renamed.Close(); }
public virtual void TestNot_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document d1 = new Document(); d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1, null); writer.Optimize(null); writer.Close(); Searcher searcher = new IndexSearcher(store, true, null); QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new SimpleAnalyzer()); Query query = parser.Parse("a NOT b"); //System.out.println(query); ScoreDoc[] hits = searcher.Search(query, null, 1000, null).ScoreDocs; Assert.AreEqual(0, hits.Length); }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 100; for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir, false, null); reader.DeleteDocuments(new Term("content", "aaa"), null); reader.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 5; // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } writer.Commit(null); ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync(); writer.Commit(null); CheckInvariants(writer); Assert.AreEqual(10, writer.MaxDoc()); writer.Close(); }
public virtual void TestWithPendingDeletes3() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); writer.UpdateDocument(new Term("id", "" + (i % 10)), doc); } // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); q.Add(new Term("content", "bbb")); q.Add(new Term("content", "14")); writer.DeleteDocuments(q); writer.AddIndexesNoOptimize(new Directory[] { aux }); writer.Optimize(); writer.Commit(); VerifyNumDocs(dir, 1039); VerifyTermDocs(dir, new Term("content", "aaa"), 1030); VerifyTermDocs(dir, new Term("content", "bbb"), 9); writer.Close(); dir.Close(); aux.Close(); }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.totalHits, 2, "See the issue: LUCENENET-174"); }
public virtual void Test() { //Positive test of FieldInfos Assert.IsTrue(testDoc != null); FieldInfos fieldInfos = new FieldInfos(); fieldInfos.Add(testDoc); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == 7); //this is 7 b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); System.String name = "testFile"; OutputStream output = dir.CreateFile(name); Assert.IsTrue(output != null); //Use a RAMOutputStream try { fieldInfos.Write(output); output.Close(); Assert.IsTrue(output.Length() > 0); FieldInfos readIn = new FieldInfos(dir, name); Assert.IsTrue(fieldInfos.Size() == readIn.Size()); FieldInfo info = readIn.FieldInfo("textField1"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector == false); info = readIn.FieldInfo("textField2"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector == true); dir.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
public virtual void TestNoMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1060, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1060); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader, null); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestShrinkToAfterShortestMatch() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(this), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("content", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("a b c d e f g h i j a k"))))); writer.AddDocument(doc); writer.Close(); IndexSearcher is_Renamed = new IndexSearcher(directory, true); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); Spans spans = snq.GetSpans(is_Renamed.IndexReader); TopDocs topDocs = is_Renamed.Search(snq, 1); System.Collections.Hashtable payloadSet = new System.Collections.Hashtable(); for (int i = 0; i < topDocs.ScoreDocs.Length; i++) { while (spans.Next()) { System.Collections.Generic.ICollection <byte[]> payloads = spans.GetPayload(); for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();) { CollectionsHelper.AddIfNotContains(payloadSet, new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current))); } } } Assert.AreEqual(2, payloadSet.Count); Assert.IsTrue(payloadSet.Contains("a:Noise:10")); Assert.IsTrue(payloadSet.Contains("k:Noise:11")); }
public virtual void TestBasicDelete() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; Term searchTerm = new Term("content", "aaa"); // add 100 documents with term : aaa writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < 100; i++) { AddDoc(writer, searchTerm.Text()); } writer.Close(); // OPEN READER AT THIS POINT - this should fix the view of the // index at the point of having 100 "aaa" documents and 0 "bbb" reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.DocFreq(searchTerm), "first docFreq"); AssertTermDocsCount("first reader", reader, searchTerm, 100); // DELETE DOCUMENTS CONTAINING TERM: aaa int deleted = 0; reader = IndexReader.Open(dir); deleted = reader.Delete(searchTerm); Assert.AreEqual(100, deleted, "deleted count"); Assert.AreEqual(100, reader.DocFreq(searchTerm), "deleted docFreq"); AssertTermDocsCount("deleted termDocs", reader, searchTerm, 0); reader.Close(); // CREATE A NEW READER and re-test reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.DocFreq(searchTerm), "deleted docFreq"); AssertTermDocsCount("deleted termDocs", reader, searchTerm, 0); reader.Close(); }
public virtual void TestSetNorm_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // add the same document four times IFieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED); Document d1 = new Document(); d1.Add(f1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.Close(); // reset the boost of each instance of this document IndexReader reader = IndexReader.Open(store, false); reader.SetNorm(0, "field", 1.0f); reader.SetNorm(1, "field", 2.0f); reader.SetNorm(2, "field", 4.0f); reader.SetNorm(3, "field", 16.0f); reader.Close(); // check that searches are ordered by this boost float[] scores = new float[4]; new IndexSearcher(store, true).Search(new TermQuery(new Term("field", "word")), new AnonymousClassCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); System.String txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1); doc.Add(new Field("field2", txt, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); searcher.Similarity = similarity; }
public virtual void TestCachingWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); writer.Close(); IndexReader reader = IndexReader.Open(dir); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called cacher.Bits(reader); Assert.IsTrue(filter.WasCalled(), "first time"); // second time, nested filter should not be called filter.Clear(); cacher.Bits(reader); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Close(); }
public override void SetUp() { base.SetUp(); // RAMDirectory rd = new RAMDirectory(); // IndexWriter writer = new IndexWriter(rd, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); // Document d = new Document(); d.Add(new Field(FIELD_T, "Optimize not deleting all files", Field.Store.YES, Field.Index.ANALYZED)); d.Add(new Field(FIELD_C, "Deleted When I run an optimize in our production environment.", Field.Store.YES, Field.Index.ANALYZED)); // writer.AddDocument(d); writer.Close(); // searcher = new IndexSearcher(rd, true); }
public virtual void TestNullDocIdSet() { // Tests that if a Filter produces a null DocIdSet, which is given to // IndexSearcher, everything works fine. This came up in LUCENE-1754. Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("c", "val", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); writer.AddDocument(doc); writer.Close(); // First verify the document is searchable. IndexSearcher searcher = new IndexSearcher(dir, true); Assert.AreEqual(1, searcher.Search(new MatchAllDocsQuery(), 10).TotalHits); // Now search w/ a Filter which returns a null DocIdSet Filter f = new AnonymousClassFilter(this); Assert.AreEqual(0, searcher.Search(new MatchAllDocsQuery(), f, 10).TotalHits); searcher.Close(); }
public virtual void TestKeepNoneOnInitDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(this); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMaxBufferedDocs(10); writer.UseCompoundFile = useCompoundFile; for (int i = 0; i < 107; i++) { AddDoc(writer); } writer.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = useCompoundFile; writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); // Simplistic check: just verify the index is in fact // readable: IndexReader reader = IndexReader.Open(dir, true); reader.Close(); dir.Close(); } }
public virtual void TestDanish() { /* build an index */ RAMDirectory danishIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); // Danish collation orders the words below in the given order // (example taken from TestSort.testInternationalSort() ). System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" }; for (int docnum = 0; docnum < words.Length; ++docnum) { Document doc = new Document(); doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(danishIndex, true); IndexSearcher search = new IndexSearcher(reader); Query q = new TermQuery(new Term("body", "body")); System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo; Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. int numHits = search.Search(q, new TermRangeFilter("content", "H\u00D8T", "MAND", F, F, collator), 1000).TotalHits; Assert.AreEqual(1, numHits, "The index Term should be included."); numHits = search.Search(q, new TermRangeFilter("content", "H\u00C5T", "MAND", F, F, collator), 1000).TotalHits; Assert.AreEqual(0, numHits, "The index Term should not be included."); search.Close(); }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(100); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir); reader.DeleteDocuments(new Term("content", "aaa")); reader.Close(); writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(5); // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } CheckInvariants(writer); Assert.AreEqual(10, writer.DocCount()); writer.Close(); }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[] { "food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink" }; Query rw1 = null; Query rw2 = null; IndexReader reader = null; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory); PrefixQuery query = new PrefixQuery(new Term("category", "foo")); rw1 = query.Rewrite(reader); BooleanQuery bq = new BooleanQuery(); bq.Add(query, BooleanClause.Occur.MUST); rw2 = bq.Rewrite(reader); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } Assert.AreEqual(GetCount(reader, rw1), GetCount(reader, rw2), "Number of Clauses Mismatch"); }
public virtual void TestMultiTermDocs() { RAMDirectory ramDir1 = new RAMDirectory(); AddDoc(ramDir1, "test foo", true); RAMDirectory ramDir2 = new RAMDirectory(); AddDoc(ramDir2, "test blah", true); RAMDirectory ramDir3 = new RAMDirectory(); AddDoc(ramDir3, "test wow", true); IndexReader[] readers1 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir3) }; IndexReader[] readers2 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir2), IndexReader.Open(ramDir3) }; MultiReader mr2 = new MultiReader(readers1); MultiReader mr3 = new MultiReader(readers2); // test mixing up TermDocs and TermEnums from different readers. TermDocs td2 = mr2.TermDocs(); TermEnum te3 = mr3.Terms(new Term("body", "wow")); td2.Seek(te3); int ret = 0; // This should blow up if we forget to check that the TermEnum is from the same // reader as the TermDocs. while (td2.Next()) { ret += td2.Doc(); } td2.Close(); te3.Close(); // really a dummy assert to ensure that we got some docs and to ensure that // nothing is optimized out. Assert.IsTrue(ret > 0); }
/// <summary> Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField /// and analyzes them using the PayloadAnalyzer /// </summary> /// <param name="similarity">The Similarity class to use in the Searcher /// </param> /// <param name="numDocs">The num docs to add /// </param> /// <returns> An IndexSearcher /// </returns> /// <throws> IOException </throws> public virtual IndexSearcher SetUp(Similarity similarity, int numDocs) { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(MULTI_FIELD, English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } //writer.optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); return(searcher); }
private float CheckPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) { query.SetSlop(slop); RAMDirectory ramDir = new RAMDirectory(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(ramDir, analyzer, MaxFieldLength.UNLIMITED); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(ramDir); TopDocs td = searcher.Search(query, null, 10); //System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore()); Assert.AreEqual(expectedNumResults, td.TotalHits, "slop: " + slop + " query: " + query + " doc: " + doc + " Wrong number of hits"); //QueryUtils.check(query,searcher); searcher.Close(); ramDir.Close(); return(td.GetMaxScore()); }
public virtual void TestNoCopySegments() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(9); writer.MergeFactor = 4; AddDocs(writer, 2); writer.AddIndexesNoOptimize(new Directory[] { aux }); Assert.AreEqual(1032, writer.MaxDoc()); Assert.AreEqual(2, writer.GetSegmentCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1032); }
public override void SetUp() { base.SetUp(); dir = new RAMDirectory(); }
public virtual void TestKnownSetOfDocuments() { System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" }; System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader.Terms(); TermDocs termDocs = knownSearcher.reader.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i]) == true) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("Field", "chocolate")); Hits hits = knownSearcher.Search(query); //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length() == 3); float score = hits.Score(0); /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString())); Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString())); Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString())); TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = (System.Int32)test4Map[term]; System.Object tmpFreqInt = test4Map[term]; Assert.IsTrue(tmpFreqInt != null); Assert.IsTrue(freqInt == freq); } knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestExpirationTimeDeletionPolicy() { double SECONDS = 2.0; bool autoCommit = false; bool useCompoundFile = true; Directory dir = new RAMDirectory(); ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); long lastDeleteTime = 0; for (int i = 0; i < 7; i++) { // Record last time when writer performed deletes of // past commits lastDeleteTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } writer.Close(); // Make sure to sleep long enough so that some commit // points will be deleted: System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int)(1000.0 * (SECONDS / 5.0)))); } // First, make sure the policy in fact deleted something: Assert.IsTrue(policy.numDelete > 0, "no commits were deleted"); // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); while (gen > 0) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); long modTime = dir.FileModified(fileName); Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted"); } catch (System.IO.IOException e) { // OK break; } dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.Close(); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.SetMergeScheduler(new SerialMergeScheduler()); for (int i = 0; i < 107; i++) { AddDoc(writer); if (autoCommit && i % 10 == 0) { writer.Commit(); } } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (!autoCommit) { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Test listCommits System.Collections.ICollection commits = IndexReader.ListCommits(dir); if (!autoCommit) { // 1 from opening writer + 2 from closing writer Assert.AreEqual(3, commits.Count); } // 1 from opening writer + 2 from closing writer + // 11 from calling writer.commit() explicitly above else { Assert.AreEqual(14, commits.Count); } System.Collections.IEnumerator it = commits.GetEnumerator(); // Make sure we can open a reader on each commit: while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; IndexReader r = IndexReader.Open(commit, null); r.Close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(16, hits.Length); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (!autoCommit) { Assert.AreEqual(3 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestKeepLastNDeletionPolicy() { int N = 5; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; Directory dir = new RAMDirectory(); KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); for (int j = 0; j < N + 1; j++) { IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 17; i++) { AddDoc(writer); } writer.Optimize(); writer.Close(); } Assert.IsTrue(policy.numDelete > 0); Assert.AreEqual(N + 1, policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 1); } else { Assert.AreEqual(N + 1, policy.numOnCommit); } // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); if (i == N) { Assert.Fail("should have failed on commits prior to last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }