public override void SetUp() { base.SetUp(); INDEX_SIZE = AtLeast(2000); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index); RandomGen random = new RandomGen(this, Random()); for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the { // problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) // some documents must not have an entry in the first { // sort field doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES)); } if ((i % 7) == 0) // some documents to match the query (see below) { doc.Add(NewTextField("content", "test", Field.Store.YES)); } // every document has a defined 'mandant' field doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Query = new TermQuery(new Term("content", "test")); }
private void CreateRandomIndexes(int maxSegments) { dir = NewDirectory(); numDocs = AtLeast(150); int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5); ISet<string> randomTerms = new HashSet<string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random())); } terms = new List<string>(randomTerms); int seed = Random().Next(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort)); iw = new RandomIndexWriter(new Random(seed), dir, iwc); for (int i = 0; i < numDocs; ++i) { Document doc = RandomDocument(); iw.AddDocument(doc); if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0)) { iw.Commit(); } if (Random().nextInt(15) == 0) { string term = RandomInts.RandomFrom(Random(), terms); iw.DeleteDocuments(new Term("s", term)); } } reader = iw.Reader; }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); FieldName = Random().NextBoolean() ? "field" : ""; // sometimes use an empty string as field name RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField(FieldName, "", Field.Store.NO); doc.Add(field); List<string> terms = new List<string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; terms.Add(s); writer.AddDocument(doc); } if (VERBOSE) { // utf16 order terms.Sort(); Console.WriteLine("UTF16 order:"); foreach (string s in terms) { Console.WriteLine(" " + UnicodeUtil.ToHexString(s)); } } Reader = writer.Reader; Searcher1 = NewSearcher(Reader); Searcher2 = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int numDocs = TestUtil.NextInt(Random(), 2049, 4000); for (int i = 0; i < numDocs; i++) { var document = new Document { NewTextField("english", English.IntToEnglish(i), Field.Store.NO), NewTextField("oddeven", (i%2 == 0) ? "even" : "odd", Field.Store.NO ), NewStringField("byte", string.Empty + (unchecked((byte) Random().Next ())), Field.Store.NO), NewStringField("short", string.Empty + ((short) Random().Next()), Field.Store .NO), new IntField("int", Random().Next(), Field.Store.NO), new LongField("long", Random().NextLong(), Field.Store.NO), new FloatField("float", Random().NextFloat(), Field.Store.NO), new DoubleField("double", Random().NextDouble(), Field.Store.NO), new NumericDocValuesField("intdocvalues", Random().Next()), new FloatDocValuesField("floatdocvalues", Random().NextFloat()) }; iw.AddDocument(document); } reader = iw.Reader; iw.Dispose(); searcher = NewSearcher(reader); }
public void TestMax() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo"))); doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar"))); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz"))); doc.Add(NewStringField("id", "2", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); // slow wrapper does not support random access ordinals (there is no need for that!) IndexSearcher searcher = NewSearcher(ir, false); Sort sort = new Sort(new SortedSetSortField("value", false, Selector.MAX)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); assertEquals(2, td.TotalHits); // 'baz' comes before 'foo' assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id")); assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id")); assertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetCodec(new Lucene46Codec()); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); // these fields should sometimes get term vectors, etc Field idField = NewStringField("id", "", Field.Store.NO); Field bodyField = NewTextField("body", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 5); doc.Add(idField); doc.Add(bodyField); doc.Add(dvField); for (int i = 0; i < 100; i++) { idField.StringValue = Convert.ToString(i); bodyField.StringValue = TestUtil.RandomUnicodeString(Random()); riw.AddDocument(doc); if (Random().Next(7) == 0) { riw.Commit(); } // TODO: we should make a new format with a clean header... // if (Random().nextInt(20) == 0) { // riw.DeleteDocuments(new Term("id", Integer.toString(i))); // } } riw.Dispose(); CheckHeaders(dir); dir.Dispose(); }
public virtual void TestIndexing() { DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete"); BaseDirectoryWrapper d = NewFSDirectory(tmpDir); // We want to "see" files removed if Lucene removed // them. this is still worth running on Windows since // some files the IR opens and closes. if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).NoDeleteOpenFile = false; } RandomIndexWriter w = new RandomIndexWriter(Random(), d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); w.w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 5, 30)); w.Commit(); ThreadClass[] indexThreads = new ThreadClass[Random().Next(4)]; long stopTime = Environment.TickCount + AtLeast(1000); for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new ThreadAnonymousInnerClassHelper(w, stopTime); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } HashSet<string> allFiles = new HashSet<string>(); DirectoryReader r = DirectoryReader.Open(d); while (Environment.TickCount < stopTime) { IndexCommit ic = r.IndexCommit; if (VERBOSE) { Console.WriteLine("TEST: check files: " + ic.FileNames); } allFiles.AddAll(ic.FileNames); // Make sure no old files were removed foreach (string fileName in allFiles) { Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist"); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Thread.Sleep(1); } r.Dispose(); foreach (ThreadClass t in indexThreads) { t.Join(); } w.Dispose(); d.Dispose(); System.IO.Directory.Delete(tmpDir.FullName, true); }
public override void SetUp() { base.SetUp(); // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.YES); doc.Add(field); Terms = new SortedSet<BytesRef>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; Terms.Add(new BytesRef(s)); writer.AddDocument(doc); } TermsAutomaton = BasicAutomata.MakeStringUnion(Terms); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestPrefixQuery_Mem() { Directory directory = NewDirectory(); string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("category", categories[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "One in /Computers/Mac"); query = new PrefixQuery(new Term("category", "")); Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category"); Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "everything"); writer.Dispose(); reader.Dispose(); directory.Dispose(); }
public void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store .NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); reader = iw.Reader; searcher = new IndexSearcher(reader); iw.Dispose(); }
public virtual void TestMethod() { Directory directory = NewDirectory(); string[] values = new string[] { "1", "2", "3", "4" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < values.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(FIELD, values[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; writer.Dispose(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = NewSearcher(ir); ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Number of matched documents"); ir.Dispose(); directory.Dispose(); }
public virtual void TestReuseDocsEnumNoReuse() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves()) { AtomicReader indexReader = (AtomicReader)ctx.Reader(); Terms terms = indexReader.Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc()); while ((iterator.Next()) != null) { DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc()), null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); } IOUtils.Close(writer, open, dir); }
public override void SetUp() { base.SetUp(); _dir = NewDirectory(); _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; Analyzer analyzer = new MockAnalyzer(Random()); Document doc; for (int i = 0; i < 100; i++) { doc = new Document(); doc.Add(new Field(_idFieldName, Random().toString(), ft)); doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append( Random().toString()).toString(), ft)); doc.Add(new Field(_classFieldName, Random().toString(), ft)); _indexWriter.AddDocument(doc, analyzer); } _indexWriter.Commit(); _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader); }
public virtual void TestRollbackIntegrityWithBufferFlush() { Directory dir = NewDirectory(); RandomIndexWriter rw = new RandomIndexWriter(Random(), dir); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES)); rw.AddDocument(doc); } rw.Dispose(); // If buffer size is small enough to cause a flush, errors ensue... IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND)); for (int i = 0; i < 3; i++) { Document doc = new Document(); string value = Convert.ToString(i); doc.Add(NewStringField("pk", value, Field.Store.YES)); doc.Add(NewStringField("text", "foo", Field.Store.YES)); w.UpdateDocument(new Term("pk", value), doc); } w.Rollback(); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback"); r.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.OmitNorms = true; Field field = NewField("field", "", customType); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; Field f = NewField("foo", "this is a test test", ft); doc.Add(f); for (int i = 0; i < 100; i++) { w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS); while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(2, de.Freq()); } reader.Dispose(); dir.Dispose(); }
public virtual void TestString() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewStringField("value", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("value", "bar", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(2, td.TotalHits); // 'bar' comes before 'foo' Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); ir.Dispose(); dir.Dispose(); }
/// <summary> /// populates a writer with random stuff. this must be fully reproducable with the seed! /// </summary> public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, long seed) { Random random = new Random((int)seed); // primary source for our data is from linefiledocs, its realistic. LineFileDocs lineFileDocs = new LineFileDocs(random); // TODO: we should add other fields that use things like docs&freqs but omit positions, // because linefiledocs doesn't cover all the possibilities. for (int i = 0; i < numdocs; i++) { Document document = lineFileDocs.NextDoc(); // grab the title and add some SortedSet instances for fun string title = document.Get("titleTokenized"); string[] split = title.Split("\\s+".ToCharArray()); foreach (string trash in split) { document.Add(new SortedSetDocValuesField("sortedset", new BytesRef(trash))); } // add a numeric dv field sometimes document.RemoveFields("sparsenumeric"); if (random.Next(4) == 2) { document.Add(new NumericDocValuesField("sparsenumeric", random.Next())); } writer.AddDocument(document); } lineFileDocs.Dispose(); }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestDefault() { Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); // create and open an index writer var iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); var config = Config; seedIndex(tw, iw, config); IndexReader ir = iw.Reader; tw.Commit(); // prepare index reader and taxonomy. var tr = new DirectoryTaxonomyReader(taxoDir); // prepare searcher to search against IndexSearcher searcher = NewSearcher(ir); FacetsCollector sfc = PerformSearch(tr, ir, searcher); // Obtain facets results and hand-test them AssertCorrectResults(GetTaxonomyFacetCounts(tr, config, sfc)); assertOrdinalsExist("$facets", ir); IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer (Random())); iwc.SetMergePolicy(NewLogMergePolicy()); var iw = new RandomIndexWriter(Random(), dir, iwc); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store .NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); iw.ForceMerge(1); reader = iw.Reader; iw.Dispose(); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int num = AtLeast(3); int skip = Random().Next(num); var terms = new List<Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.Reader; w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves.First(); TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality()); reader.Dispose(); dir.Dispose(); }
public virtual void TestEmptyBucketWithMoreDocs() { // this test checks the logic of nextDoc() when all sub scorers have docs // beyond the first bucket (for example). Currently, the code relies on the // 'more' variable to work properly, and this test ensures that if the logic // changes, we have a test to back it up. Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); writer.Commit(); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); BooleanWeight weight = (BooleanWeight)(new BooleanQuery()).CreateWeight(searcher); BulkScorer[] scorers = new BulkScorer[] { new BulkScorerAnonymousInnerClassHelper() }; BooleanScorer bs = new BooleanScorer(weight, false, 1, Arrays.AsList(scorers), new List<BulkScorer>(), scorers.Length); IList<int> hits = new List<int>(); bs.Score(new CollectorAnonymousInnerClassHelper(this, hits)); Assert.AreEqual(1, hits.Count, "should have only 1 hit"); Assert.AreEqual(3000, (int)hits[0], "hit should have been docID=3000"); ir.Dispose(); directory.Dispose(); }
public virtual void TestDocsAndPositionsEnumStart() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("foo", "bar", Field.Store.NO)); writer.AddDocument(doc); DirectoryReader reader = writer.Reader; AtomicReader r = GetOnlySegmentReader(reader); DocsAndPositionsEnum disi = r.TermPositionsEnum(new Term("foo", "bar")); int docid = disi.DocID(); Assert.AreEqual(-1, docid); Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = r.Terms("foo").Iterator(null); Assert.IsTrue(te.SeekExact(new BytesRef("bar"))); disi = te.DocsAndPositions(null, disi); docid = disi.DocID(); Assert.AreEqual(-1, docid); Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); writer.Dispose(); r.Dispose(); dir.Dispose(); }
public void TestReverse() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("value", "foo", Field.Store.NO)); doc.Add(NewStringField("value", "bar", Field.Store.NO)); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("value", "baz", Field.Store.NO)); doc.Add(NewStringField("id", "2", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortedSetSortField("value", true)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); assertEquals(2, td.TotalHits); // 'bar' comes before 'baz' assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id")); assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id")); ir.Dispose(); dir.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, analyzer); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("field", "one two three four five", Field.Store.YES)); doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES)); IndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES); doc.Add(repeatedField); doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public static void BeforeClass() { Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddSome(doc, AlwaysTerms); if (Random().Next(100) < 90) { AddSome(doc, CommonTerms); } if (Random().Next(100) < 50) { AddSome(doc, MediumTerms); } if (Random().Next(100) < 10) { AddSome(doc, RareTerms); } iw.AddDocument(doc); } iw.ForceMerge(1); iw.Dispose(); r = DirectoryReader.Open(Dir); atomicReader = GetOnlySegmentReader(r); Searcher = new IndexSearcher(atomicReader); Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(); }
// TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs public virtual void BuildIndex(Directory dir) { Random random = Random(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); int boost = Random().Next(255); Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES); f.Boost = boost; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(ByteTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); docs.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.NO); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: setUp searcher=" + Searcher); } }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); testDoc.value = Random().NextFloat(); doc.Add(new SingleDocValuesField("value", testDoc.value)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); ValueSource values = new SingleFieldSource("value"); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values); // Slow, yet hopefully bug-free, faceting: var expectedValues = new List <Dictionary <string, float?> >(numDims); for (int i = 0; i < numDims; i++) { expectedValues.Add(new Dictionary <string, float?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken, StringComparison.Ordinal)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { float?v = expectedValues[j].ContainsKey(doc.dims[j]) ? expectedValues[j][doc.dims[j]] : null; if (v == null) { expectedValues[j][doc.dims[j]] = doc.value; } else { expectedValues[j][doc.dims[j]] = (float)v + doc.value; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); float totValue = 0; foreach (KeyValuePair <string, float?> ent in expectedValues[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totValue += ent.Value.Value; } SortLabelValues(labelValues); if (totValue > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totValue, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); if (VERBOSE) { Console.WriteLine("expected=\n" + expected.ToString()); Console.WriteLine("actual=\n" + actual.ToString()); } AssertFloatValuesEquals(expected, actual); } IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new Int32Field("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new Int32Field("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
public void TestSimpleExamples() { DirectSpellChecker spellChecker = new DirectSpellChecker(); spellChecker.MinQueryLength = (0); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone); for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); similar = spellChecker.SuggestSimilar(new Term("numbers", "five"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); if (similar.Length > 0) { assertFalse(similar[0].String.equals("five")); // don't suggest a word for itself } similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); similar = spellChecker.SuggestSimilar(new Term("numbers", "fiv"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); similar = spellChecker.SuggestSimilar(new Term("numbers", "fives"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); assertTrue(similar.Length > 0); similar = spellChecker.SuggestSimilar(new Term("numbers", "fie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals("five", similar[0].String); // add some more documents for (int i = 1000; i < 1100; i++) { Document doc = new Document(); doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO)); writer.AddDocument(doc); } ir.Dispose(); ir = writer.Reader; // look ma, no spellcheck index rebuild similar = spellChecker.SuggestSimilar(new Term("numbers", "tousand"), 10, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("thousand", similar[0].String); ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public void TestBasics() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone); var docs = new string[] { @"this is the end of the world right", @"is this it or maybe not", @"this is the end of the universe as we know it", @"there is the famous restaurant at the end of the universe" }; for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES)); doc.Add(NewTextField(@"field", docs[i], Field.Store.NO)); w.AddDocument(doc); } IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); { CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 2); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "restaurant")); query.Add(new Term("field", "universe")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } r.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestPhrasePrefix() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexStore); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(NewTextField("body", "blueberry pie", Field.Store.YES)); doc2.Add(NewTextField("body", "blueberry strudel", Field.Store.YES)); doc3.Add(NewTextField("body", "blueberry pizza", Field.Store.YES)); doc4.Add(NewTextField("body", "blueberry chewing gum", Field.Store.YES)); doc5.Add(NewTextField("body", "piccadilly circus", Field.Store.YES)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); IndexReader reader = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); // PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); LinkedList <Term> termsWithPrefix = new LinkedList <Term>(); // this TermEnum gives "piccadilly", "pie" and "pizza". string prefix = "pi"; TermsEnum te = MultiFields.GetFields(reader).GetTerms("body").GetIterator(null); te.SeekCeil(new BytesRef(prefix)); do { string s = te.Term.Utf8ToString(); if (s.StartsWith(prefix, StringComparison.Ordinal)) { termsWithPrefix.AddLast(new Term("body", s)); } else { break; } } while (te.Next() != null); query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/)); query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/)); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); reader.Dispose(); indexStore.Dispose(); }
public void TestWithDeletions() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); Random rand = Random; List <string> termsToDel = new List <string>(); foreach (Document doc in docs.Values) { if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) { termsToDel.Add(doc.Get(FIELD_NAME)); } writer.AddDocument(doc); } writer.Commit(); Term[] delTerms = new Term[termsToDel.size()]; for (int i = 0; i < termsToDel.size(); i++) { delTerms[i] = new Term(FIELD_NAME, termsToDel[i]); } foreach (Term delTerm in delTerms) { writer.DeleteDocuments(delTerm); } writer.Commit(); writer.Dispose(); foreach (string termToDel in termsToDel) { var toDel = docs[termToDel]; docs.Remove(termToDel); assertTrue(null != toDel); } IndexReader ir = DirectoryReader.Open(dir); assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0); assertEquals(ir.NumDocs, docs.size()); ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs[field]; docs.Remove(field); long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault(); long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault(); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, w2 + w1); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void TestOptions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField("text", "foobar", Field.Store.NO)); writer.AddDocument(doc); doc.Add(NewTextField("text", "foobar", Field.Store.NO)); writer.AddDocument(doc); doc.Add(NewTextField("text", "foobaz", Field.Store.NO)); writer.AddDocument(doc); doc.Add(NewTextField("text", "fobar", Field.Store.NO)); writer.AddDocument(doc); IndexReader ir = writer.Reader; DirectSpellChecker spellChecker = new DirectSpellChecker(); spellChecker.MaxQueryFrequency = (0F); SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MinQueryLength = (5); similar = spellChecker.SuggestSimilar(new Term("text", "foba"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MaxEdits = (1); similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.Accuracy = (0.9F); similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MinPrefix = (0); similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(1, similar.Length); similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MinPrefix = (1); similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MaxEdits = (2); similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 2, ir, SuggestMode.SUGGEST_ALWAYS); assertEquals(2, similar.Length); ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); long startTime = Environment.TickCount; // TODO: replace w/ the @nightly test data; make this // into an optional @nightly stress test Document doc = new Document(); Field body = NewTextField("body", "", Field.Store.NO); doc.Add(body); StringBuilder sb = new StringBuilder(); for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int numTerms = Random().Next(10); for (int termCount = 0; termCount < numTerms; termCount++) { sb.Append(Random().NextBoolean() ? "aaa" : "bbb"); sb.Append(' '); } body.StringValue = sb.ToString(); w.AddDocument(doc); sb.Remove(0, sb.Length); } IndexReader r = w.Reader; w.Dispose(); long endTime = Environment.TickCount; if (VERBOSE) { Console.WriteLine("BUILD took " + (endTime - startTime)); } IndexSearcher s = NewSearcher(r); AtomicBoolean failed = new AtomicBoolean(); AtomicLong netSearch = new AtomicLong(); ThreadClass[] threads = new ThreadClass[NUM_SEARCH_THREADS]; for (int threadID = 0; threadID < NUM_SEARCH_THREADS; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, s, failed, netSearch); threads[threadID].SetDaemon(true); } foreach (ThreadClass t in threads) { t.Start(); } foreach (ThreadClass t in threads) { t.Join(); } if (VERBOSE) { Console.WriteLine(NUM_SEARCH_THREADS + " threads did " + netSearch.Get() + " searches"); } r.Dispose(); dir.Dispose(); }
public void BeforeClassDrillDownQueryTest() { dir = NewDirectory(); Random r = Random(); RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false))); taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); config = new FacetsConfig(); // Randomize the per-dim config: config.SetHierarchical("a", Random().NextBoolean()); config.SetMultiValued("a", Random().NextBoolean()); if (Random().NextBoolean()) { config.SetIndexFieldName("a", "$a"); } config.SetRequireDimCount("a", true); config.SetHierarchical("b", Random().NextBoolean()); config.SetMultiValued("b", Random().NextBoolean()); if (Random().NextBoolean()) { config.SetIndexFieldName("b", "$b"); } config.SetRequireDimCount("b", true); for (int i = 0; i < 100; i++) { Document doc = new Document(); if (i % 2 == 0) // 50 { doc.Add(new TextField("content", "foo", Field.Store.NO)); } if (i % 3 == 0) // 33 { doc.Add(new TextField("content", "bar", Field.Store.NO)); } if (i % 4 == 0) // 25 { if (r.NextBoolean()) { doc.Add(new FacetField("a", "1")); } else { doc.Add(new FacetField("a", "2")); } } if (i % 5 == 0) // 20 { doc.Add(new FacetField("b", "1")); } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxo = new DirectoryTaxonomyReader(taxoDir); }