public virtual void TestByte() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new NumericDocValuesField("value", 23)); doc.Add(NewStringField("value", "23", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new NumericDocValuesField("value", -1)); doc.Add(NewStringField("value", "-1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new NumericDocValuesField("value", 4)); doc.Add(NewStringField("value", "4", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.BYTE)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(3, td.TotalHits); // numeric order Assert.AreEqual("-1", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("4", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); Assert.AreEqual("23", searcher.Doc(td.ScoreDocs[2].Doc).Get("value")); AssertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestConstantScoreQueryAndFilter() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(NewStringField("field", "a", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "b", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b")))); Query query = new ConstantScoreQuery(filterB); IndexSearcher s = NewSearcher(r); Assert.AreEqual(1, s.Search(query, filterB, 1).TotalHits); // Query for field:b, Filter field:b Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a")))); query = new ConstantScoreQuery(filterA); Assert.AreEqual(0, s.Search(query, filterB, 1).TotalHits); // Query field:b, Filter field:a r.Dispose(); d.Dispose(); }
public virtual void TestString() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new SortedDocValuesField("value", new BytesRef("foo"))); doc.Add(NewStringField("value", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SortedDocValuesField("value", new BytesRef("bar"))); doc.Add(NewStringField("value", "bar", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(2, td.TotalHits); // 'bar' comes before 'foo' Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); AssertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public virtual void TestStartPositions() { Directory dir = NewDirectory(); // mimic StopAnalyzer CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer); Document doc = new Document(); doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO)); writer.AddDocument(doc2); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // user queries on "starts-with quick" SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); // user queries on "starts-with the quick" SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2); sfq = new SpanNotQuery(include, sfq); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); Analyzer = new MockAnalyzer(Random()); Dir = NewDirectory(); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, Analyzer); config.SetMergePolicy(NewLogMergePolicy()); // we will use docids to validate RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, config); writer.AddDocument(Doc("lucene", "lucene is a very popular search engine library")); writer.AddDocument(Doc("solr", "solr is a very popular search server and is using lucene")); writer.AddDocument(Doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop")); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestPrefixQuery_Mem() { Directory directory = NewDirectory(); string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("category", categories[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "One in /Computers/Mac"); query = new PrefixQuery(new Term("category", "")); Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category"); Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "everything"); writer.Dispose(); reader.Dispose(); directory.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); fieldName = Random.NextBoolean() ? "field" : ""; // sometimes use an empty string as field name RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000))); Document doc = new Document(); Field field = NewStringField(fieldName, "", Field.Store.NO); doc.Add(field); List<string> terms = new List<string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random); field.SetStringValue(s); terms.Add(s); writer.AddDocument(doc); } if (Verbose) { // utf16 order terms.Sort(); Console.WriteLine("UTF16 order:"); foreach (string s in terms) { Console.WriteLine(" " + UnicodeUtil.ToHexString(s)); } } reader = writer.GetReader(); searcher1 = NewSearcher(reader); searcher2 = NewSearcher(reader); writer.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") })); writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") })); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestSimilarity_Mem() { Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(new SimpleSimilarity())); Document d1 = new Document(); d1.Add(NewTextField("field", "a c", Field.Store.YES)); Document d2 = new Document(); d2.Add(NewTextField("field", "a b c", Field.Store.YES)); writer.AddDocument(d1); writer.AddDocument(d2); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("field", "a"); Term b = new Term("field", "b"); Term c = new Term("field", "c"); searcher.Search(new TermQuery(b), new CollectorAnonymousInnerClassHelper(this)); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(a), BooleanClause.Occur.SHOULD); bq.Add(new TermQuery(b), BooleanClause.Occur.SHOULD); //System.out.println(bq.toString("field")); searcher.Search(bq, new CollectorAnonymousInnerClassHelper2(this)); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); //System.out.println(pq.toString("field")); searcher.Search(pq, new CollectorAnonymousInnerClassHelper3(this)); pq.Slop = 2; //System.out.println(pq.toString("field")); searcher.Search(pq, new CollectorAnonymousInnerClassHelper4(this)); reader.Dispose(); store.Dispose(); }
public virtual void TestDeletePartiallyWrittenFilesIfAbort() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); iwConf.SetCodec(CompressingCodec.RandomInstance(Random())); // disable CFS because this test checks file names iwConf.SetMergePolicy(NewLogMergePolicy(false)); iwConf.SetUseCompoundFile(false); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); Document validDoc = new Document(); validDoc.Add(new IntField("id", 0, Field.Store.YES)); iw.AddDocument(validDoc); iw.Commit(); // make sure that #writeField will fail to trigger an abort Document invalidDoc = new Document(); FieldType fieldType = new FieldType(); fieldType.Stored = true; invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType)); try { iw.AddDocument(invalidDoc); iw.Commit(); } finally { int counter = 0; foreach (string fileName in dir.ListAll()) { if (fileName.EndsWith(".fdt") || fileName.EndsWith(".fdx")) { counter++; } } // Only one .fdt and one .fdx files must have been found Assert.AreEqual(2, counter); iw.Dispose(); dir.Dispose(); } }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); field.StringValue = "quick brown fox"; iw.AddDocument(doc); field.StringValue = "jumps over lazy broun dog"; iw.AddDocument(doc); field.StringValue = "jumps over extremely very lazy broxn dog"; iw.AddDocument(doc); Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestAfter() { // create an index Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); long now = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; Document doc = new Document(); // add time that is in the future doc.Add(NewStringField("datefield", DateTools.TimeToString(now + 888888, DateTools.Resolution.MILLISECOND), Field.Store.YES)); doc.Add(NewTextField("body", "Today is a very sunny day in New York City", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // filter that should preserve matches // DateFilter df1 = DateFilter.After("datefield", now); TermRangeFilter df1 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), true, false); // filter that should discard matches // DateFilter df2 = DateFilter.After("datefield", now + 999999); TermRangeFilter df2 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999999, DateTools.Resolution.MILLISECOND), false, true); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForthis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); reader.Dispose(); indexStore.Dispose(); }
/// <summary> /// populates a writer with random stuff. this must be fully reproducable with /// the seed! /// </summary> public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, Random random) { LineFileDocs lineFileDocs = new LineFileDocs(random); for (int i = 0; i < numdocs; i++) { writer.AddDocument(lineFileDocs.NextDoc()); } lineFileDocs.Dispose(); }
public virtual void TestDocBoost_Mem() { Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Field f1 = NewTextField("field", "word", Field.Store.YES); Field f2 = NewTextField("field", "word", Field.Store.YES); f2.Boost = 2.0f; Documents.Document d1 = new Documents.Document(); Documents.Document d2 = new Documents.Document(); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 writer.AddDocument(d1); writer.AddDocument(d2); IndexReader reader = writer.Reader; writer.Dispose(); float[] scores = new float[4]; IndexSearcher searcher = NewSearcher(reader); searcher.Search(new TermQuery(new Term("field", "word")), new CollectorAnonymousInnerClassHelper(this, scores)); float lastScore = 0.0f; for (int i = 0; i < 2; i++) { if (VERBOSE) { Console.WriteLine(searcher.Explain(new TermQuery(new Term("field", "word")), i)); } Assert.IsTrue(scores[i] > lastScore, "score: " + scores[i] + " should be > lastScore: " + lastScore); lastScore = scores[i]; } reader.Dispose(); store.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); doc.Add(NewTextField(FN, "the quick brown fox jumps over the lazy ??? dog 493432 49344", Field.Store.NO)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("field", "value", Field.Store.NO)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = NewSearcher(reader); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(1, hits.TotalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(1, hits.TotalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(1, hits.TotalHits); // test a query with no hits termQuery = new TermQuery(new Term("field", "not_exist")); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(0, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(0, hits.TotalHits); reader.Dispose(); dir.Dispose(); }
internal virtual void MakeIndex() { // we use RAMDirectory here, because we dont want to stay on open files on Windows: d = new RAMDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(newField("ints", "1", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); w.ForceMerge(1); r = w.Reader; w.Dispose(); SubR = (AtomicReader)(r.Leaves()[0]).Reader(); }
public override void SetUp() { base.SetUp(); // Create an index writer. Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); // oldest doc: // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 writer.AddDocument(CreateDocument("Document 1", 1192001122000L)); // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 writer.AddDocument(CreateDocument("Document 2", 1192001126000L)); // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 writer.AddDocument(CreateDocument("Document 3", 1192101133000L)); // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 writer.AddDocument(CreateDocument("Document 4", 1192104129000L)); // latest doc: // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 writer.AddDocument(CreateDocument("Document 5", 1192209943000L)); Reader = writer.Reader; writer.Dispose(); }
public static Directory GetDirectory(Analyzer analyzer, string[] vals) { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 100, 1000)).SetMergePolicy(NewLogMergePolicy())); foreach (string s in vals) { Document d = new Document(); d.Add(NewTextField(FIELD, s, Field.Store.YES)); writer.AddDocument(d); } writer.Dispose(); return directory; }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.Add(NewStringField("field", Convert.ToString(i), Field.Store.NO)); doc.Add(NewStringField("field2", Convert.ToString(i % 2 == 0), Field.Store.NO)); iw.AddDocument(doc); } Reader = iw.Reader; iw.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); Writer = new RandomIndexWriter(Random(), Dir, Similarity, TimeZone); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Writer.AddDocument(new Document()); if (Rarely()) { Writer.Commit(); } } }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); iw.AddDocument(doc); } Ir = iw.Reader; iw.Dispose(); @is = NewSearcher(Ir); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); PerFieldSimilarityWrapper sim = new ExampleSimilarityProvider(this); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(sim); RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, iwc); Document doc = new Document(); Field field = NewTextField("foo", "", Field.Store.NO); doc.Add(field); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(field2); field.StringValue = "quick brown fox"; field2.StringValue = "quick brown fox"; iw.AddDocument(doc); field.StringValue = "jumps over lazy brown dog"; field2.StringValue = "jumps over lazy brown dog"; iw.AddDocument(doc); Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); Searcher.Similarity = sim; }
public virtual void TestBinaryFieldInIndex() { FieldType ft = new FieldType(); ft.Stored = true; IndexableField binaryFldStored = new StoredField("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(BinaryValStored)); IndexableField stringFldStored = new Field("stringStored", BinaryValStored, ft); Documents.Document doc = new Documents.Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /// <summary> /// test for field count </summary> Assert.AreEqual(2, doc.Fields.Count); /// <summary> /// add the doc to a ram index </summary> Directory dir = NewDirectory(); Random r = Random(); RandomIndexWriter writer = new RandomIndexWriter(r, dir); writer.AddDocument(doc); /// <summary> /// open a reader and fetch the document </summary> IndexReader reader = writer.Reader; Documents.Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /// <summary> /// fetch the binary stored field and compare it's content with the original one </summary> BytesRef bytes = docFromReader.GetBinaryValue("binaryStored"); Assert.IsNotNull(bytes); string binaryFldStoredTest = Encoding.UTF8.GetString((byte[])(Array)bytes.Bytes).Substring(bytes.Offset, bytes.Length); //new string(bytes.Bytes, bytes.Offset, bytes.Length, IOUtils.CHARSET_UTF_8); Assert.IsTrue(binaryFldStoredTest.Equals(BinaryValStored)); /// <summary> /// fetch the string field and compare it's content with the original one </summary> string stringFldStoredTest = docFromReader.Get("stringStored"); Assert.IsTrue(stringFldStoredTest.Equals(BinaryValStored)); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); Writer = new RandomIndexWriter(Random, Dir, Similarity, TimeZone); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Writer.AddDocument(new Document()); if (Rarely()) { Writer.Commit(); } } }
public override void SetUp() { base.SetUp(); // populate an index with 30 documents, this should be enough for the test. // The documents have no content - the test uses MatchAllDocsQuery(). Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, Similarity, TimeZone); for (int i = 0; i < 30; i++) { writer.AddDocument(new Document()); } Reader = writer.Reader; writer.Dispose(); }
public virtual void TestOutOfOrderCollection() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } bool[] inOrder = new bool[] { false, true }; string[] actualTSDCClass = new string[] { "OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector" }; BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); for (int i = 0; i < inOrder.Length; i++) { TopDocsCollector <ScoreDoc> tdc = TopScoreDocCollector.Create(3, inOrder[i]); Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName); searcher.Search(new MatchAllDocsQuery(), tdc); ScoreDoc[] sd = tdc.GetTopDocs().ScoreDocs; Assert.AreEqual(3, sd.Length); for (int j = 0; j < sd.Length; j++) { Assert.AreEqual(j, sd[j].Doc, "expected doc Id " + j + " found " + sd[j].Doc); } } writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestCaching() { Directory dir = new RAMDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); TokenStream stream = new TokenStreamAnonymousInnerClassHelper(this); stream = new CachingTokenFilter(stream); doc.Add(new TextField("preanalyzed", stream)); // 1) we consume all tokens twice before we add the doc to the index CheckTokens(stream); stream.Reset(); CheckTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly writer.AddDocument(doc); IndexReader reader = writer.Reader; DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term1")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term2")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term3")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); reader.Dispose(); writer.Dispose(); // 3) reset stream and consume tokens again stream.Reset(); CheckTokens(stream); dir.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); Writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, Dir); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Writer.AddDocument(new Document()); if (Rarely()) { Writer.Commit(); } } }
public virtual void TestMultiValuedNRQ() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); const string format = "D11"; int num = AtLeast(500); for (int l = 0; l < num; l++) { Document doc = new Document(); for (int m = 0, c = Random().Next(10); m <= c; m++) { int value = Random().Next(int.MaxValue); doc.Add(NewStringField("asc", value.ToString(format), Field.Store.NO)); doc.Add(new IntField("trie", value, Field.Store.NO)); } writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); num = AtLeast(50); for (int i = 0; i < num; i++) { int lower = Random().Next(int.MaxValue); int upper = Random().Next(int.MaxValue); if (lower > upper) { int a = lower; lower = upper; upper = a; } TermRangeQuery cq = TermRangeQuery.NewStringRange("asc", lower.ToString(format), upper.ToString(format), true, true); NumericRangeQuery<int> tq = NumericRangeQuery.NewIntRange("trie", lower, upper, true, true); TopDocs trTopDocs = searcher.Search(cq, 1); TopDocs nrTopDocs = searcher.Search(tq, 1); Assert.AreEqual(trTopDocs.TotalHits, nrTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); } reader.Dispose(); directory.Dispose(); }
public override void SetUp() { base.SetUp(); // populate an index with 30 documents, this should be enough for the test. // The documents have no content - the test uses MatchAllDocsQuery(). Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, Dir); for (int i = 0; i < 30; i++) { writer.AddDocument(new Document()); } Reader = writer.GetReader(); writer.Dispose(); }
public virtual void TestBasics() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(new StringField("string", "a" + i, Field.Store.NO)); doc.Add(new StringField("string", "b" + i, Field.Store.NO)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); TotalHitCountCollector c = new TotalHitCountCollector(); searcher.Search(new MatchAllDocsQuery(), null, c); Assert.AreEqual(5, c.TotalHits); reader.Dispose(); indexStore.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy())); //writer.setNoCFSRatio(1.0); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; } else if (mod2 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; } else if (mod3 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; } else { ft.StoreTermVectors = true; } doc.Add(new Field("field", English.IntToEnglish(i), ft)); //test no term vectors too doc.Add(new TextField("noTV", English.IntToEnglish(i), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); }
public virtual void TestMissingTerms() { string fieldName = "field1"; Directory rd = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), rd); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(NewStringField(fieldName, "" + term, Field.Store.YES)); w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); IndexSearcher searcher = NewSearcher(reader); int numDocs = reader.NumDocs; ScoreDoc[] results; MatchAllDocsQuery q = new MatchAllDocsQuery(); List<string> terms = new List<string>(); terms.Add("5"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, terms.ToArray()), numDocs).ScoreDocs; Assert.AreEqual(0, results.Length, "Must match nothing"); terms = new List<string>(); terms.Add("10"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, terms.ToArray()), numDocs).ScoreDocs; Assert.AreEqual(1, results.Length, "Must match 1"); terms = new List<string>(); terms.Add("10"); terms.Add("20"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, terms.ToArray()), numDocs).ScoreDocs; Assert.AreEqual(2, results.Length, "Must match 2"); reader.Dispose(); rd.Dispose(); }
public virtual void TestNoOrds() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; doc.Add(new Field("foo", "this is a test", ft)); iw.AddDocument(doc); AtomicReader ir = GetOnlySegmentReader(iw.GetReader()); Terms terms = ir.GetTermVector(0, "foo"); Assert.IsNotNull(terms); TermsEnum termsEnum = terms.GetEnumerator(); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this"))); try { var _ = termsEnum.Ord; Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } try { termsEnum.SeekExact(0); Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } ir.Dispose(); iw.Dispose(); dir.Dispose(); }