public override void SetUp() { base.SetUp(); dir = NewDirectory(); var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store .NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); reader = iw.Reader; searcher = new IndexSearcher(reader); iw.Dispose(); }
public void TestReverse() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("value", "foo", Field.Store.NO)); doc.Add(NewStringField("value", "bar", Field.Store.NO)); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("value", "baz", Field.Store.NO)); doc.Add(NewStringField("id", "2", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortedSetSortField("value", true)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); assertEquals(2, td.TotalHits); // 'bar' comes before 'baz' assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id")); assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id")); ir.Dispose(); dir.Dispose(); }
public void TestMax() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo"))); doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar"))); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz"))); doc.Add(NewStringField("id", "2", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); // slow wrapper does not support random access ordinals (there is no need for that!) IndexSearcher searcher = NewSearcher(ir, false); Sort sort = new Sort(new SortedSetSortField("value", false, Selector.MAX)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); assertEquals(2, td.TotalHits); // 'baz' comes before 'foo' assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id")); assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id")); assertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public virtual void TestString() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewStringField("value", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("value", "bar", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(2, td.TotalHits); // 'bar' comes before 'foo' Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); ir.Dispose(); dir.Dispose(); }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer (Random())); iwc.SetMergePolicy(NewLogMergePolicy()); var iw = new RandomIndexWriter(Random(), dir, iwc); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store .NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); iw.ForceMerge(1); reader = iw.Reader; iw.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true), Similarity, TimeZone); for (int i = 900; i < 1112; i++) { Document doc = new Document(); string num = Regex.Replace(Regex.Replace(English.IntToEnglish(i), "[-]", " "), "[,]", ""); doc.Add(NewTextField("numbers", num, Field.Store.NO)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(NewTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(NewTextField("numbers", "hundredeight eightyeight yeight", Field.Store.NO)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(NewTextField("numbers", "tres y cinco", Field.Store.NO)); writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; Field f = NewField("foo", "this is a test test", ft); doc.Add(f); for (int i = 0; i < 100; i++) { w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS); while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(2, de.Freq()); } reader.Dispose(); dir.Dispose(); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int num = AtLeast(3); int skip = Random().Next(num); var terms = new List<Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.Reader; w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves.First(); TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality()); reader.Dispose(); dir.Dispose(); }
private void CreateRandomIndexes(int maxSegments) { dir = NewDirectory(); numDocs = AtLeast(150); int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5); ISet<string> randomTerms = new HashSet<string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random())); } terms = new List<string>(randomTerms); int seed = Random().Next(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort)); iw = new RandomIndexWriter(new Random(seed), dir, iwc); for (int i = 0; i < numDocs; ++i) { Document doc = RandomDocument(); iw.AddDocument(doc); if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0)) { iw.Commit(); } if (Random().nextInt(15) == 0) { string term = RandomInts.RandomFrom(Random(), terms); iw.DeleteDocuments(new Term("s", term)); } } reader = iw.Reader; }
public virtual void TestPrefixQuery_Mem() { Directory directory = NewDirectory(); string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("category", categories[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "One in /Computers/Mac"); query = new PrefixQuery(new Term("category", "")); Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category"); Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "everything"); writer.Dispose(); reader.Dispose(); directory.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); FieldName = Random().NextBoolean() ? "field" : ""; // sometimes use an empty string as field name RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField(FieldName, "", Field.Store.NO); doc.Add(field); List<string> terms = new List<string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; terms.Add(s); writer.AddDocument(doc); } if (VERBOSE) { // utf16 order terms.Sort(); Console.WriteLine("UTF16 order:"); foreach (string s in terms) { Console.WriteLine(" " + UnicodeUtil.ToHexString(s)); } } Reader = writer.Reader; Searcher1 = NewSearcher(Reader); Searcher2 = NewSearcher(Reader); writer.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetCodec(new Lucene46Codec()); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); // these fields should sometimes get term vectors, etc Field idField = NewStringField("id", "", Field.Store.NO); Field bodyField = NewTextField("body", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 5); doc.Add(idField); doc.Add(bodyField); doc.Add(dvField); for (int i = 0; i < 100; i++) { idField.StringValue = Convert.ToString(i); bodyField.StringValue = TestUtil.RandomUnicodeString(Random()); riw.AddDocument(doc); if (Random().Next(7) == 0) { riw.Commit(); } // TODO: we should make a new format with a clean header... // if (Random().nextInt(20) == 0) { // riw.DeleteDocuments(new Term("id", Integer.toString(i))); // } } riw.Dispose(); CheckHeaders(dir); dir.Dispose(); }
public void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.OmitNorms = true; Field field = NewField("field", "", customType); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int numDocs = TestUtil.NextInt(Random(), 2049, 4000); for (int i = 0; i < numDocs; i++) { var document = new Document { NewTextField("english", English.IntToEnglish(i), Field.Store.NO), NewTextField("oddeven", (i%2 == 0) ? "even" : "odd", Field.Store.NO ), NewStringField("byte", string.Empty + (unchecked((byte) Random().Next ())), Field.Store.NO), NewStringField("short", string.Empty + ((short) Random().Next()), Field.Store .NO), new IntField("int", Random().Next(), Field.Store.NO), new LongField("long", Random().NextLong(), Field.Store.NO), new FloatField("float", Random().NextFloat(), Field.Store.NO), new DoubleField("double", Random().NextDouble(), Field.Store.NO), new NumericDocValuesField("intdocvalues", Random().Next()), new FloatDocValuesField("floatdocvalues", Random().NextFloat()) }; iw.AddDocument(document); } reader = iw.Reader; iw.Dispose(); searcher = NewSearcher(reader); }
public override void SetUp() { base.SetUp(); INDEX_SIZE = AtLeast(2000); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index); RandomGen random = new RandomGen(this, Random()); for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the { // problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) // some documents must not have an entry in the first { // sort field doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES)); } if ((i % 7) == 0) // some documents to match the query (see below) { doc.Add(NewTextField("content", "test", Field.Store.YES)); } // every document has a defined 'mandant' field doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Query = new TermQuery(new Term("content", "test")); }
public virtual void TestMethod() { Directory directory = NewDirectory(); string[] values = new string[] { "1", "2", "3", "4" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < values.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(FIELD, values[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; writer.Dispose(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = NewSearcher(ir); ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Number of matched documents"); ir.Dispose(); directory.Dispose(); }
public override void SetUp() { base.SetUp(); _dir = NewDirectory(); _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; Analyzer analyzer = new MockAnalyzer(Random()); Document doc; for (int i = 0; i < 100; i++) { doc = new Document(); doc.Add(new Field(_idFieldName, Random().toString(), ft)); doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append( Random().toString()).toString(), ft)); doc.Add(new Field(_classFieldName, Random().toString(), ft)); _indexWriter.AddDocument(doc, analyzer); } _indexWriter.Commit(); _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader); }
public virtual void TestRollbackIntegrityWithBufferFlush() { Directory dir = NewDirectory(); RandomIndexWriter rw = new RandomIndexWriter(Random(), dir); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES)); rw.AddDocument(doc); } rw.Dispose(); // If buffer size is small enough to cause a flush, errors ensue... IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND)); for (int i = 0; i < 3; i++) { Document doc = new Document(); string value = Convert.ToString(i); doc.Add(NewStringField("pk", value, Field.Store.YES)); doc.Add(NewStringField("text", "foo", Field.Store.YES)); w.UpdateDocument(new Term("pk", value), doc); } w.Rollback(); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback"); r.Dispose(); dir.Dispose(); }
public static void BeforeClass() { Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddSome(doc, AlwaysTerms); if (Random().Next(100) < 90) { AddSome(doc, CommonTerms); } if (Random().Next(100) < 50) { AddSome(doc, MediumTerms); } if (Random().Next(100) < 10) { AddSome(doc, RareTerms); } iw.AddDocument(doc); } iw.ForceMerge(1); iw.Dispose(); r = DirectoryReader.Open(Dir); atomicReader = GetOnlySegmentReader(r); Searcher = new IndexSearcher(atomicReader); Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.NO); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: setUp searcher=" + Searcher); } }
public void TestInternalLevenshteinDistance() { DirectSpellChecker spellchecker = new DirectSpellChecker(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, true), Similarity, TimeZone); string[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoið‘" }; for (int i = 0; i < termsToAdd.Length; i++) { Document doc = new Document(); doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; string misspelled = "metanoix"; SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length == 4); IStringDistance sd = spellchecker.Distance; assertTrue(sd is LuceneLevenshteinDistance); foreach (SuggestWord word in similar) { assertTrue(word.Score == sd.GetDistance(word.String, misspelled)); assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()? } ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.YES); doc.Add(field); Terms = new SortedSet<BytesRef>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; Terms.Add(new BytesRef(s)); writer.AddDocument(doc); } TermsAutomaton = BasicAutomata.MakeStringUnion(Terms); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") })); writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") })); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); LittleReader = DirectoryReader.Open(Directory); Searcher = NewSearcher(LittleReader); // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one) Searcher.Similarity = new DefaultSimilarity(); // Make big index Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT)); // First multiply small test index: MulFactor = 1; int docCount = 0; if (VERBOSE) { Console.WriteLine("\nTEST: now copy index..."); } do { if (VERBOSE) { Console.WriteLine("\nTEST: cycle..."); } Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT)); RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2); w.AddIndexes(copy); docCount = w.MaxDoc(); w.Dispose(); MulFactor *= 2; } while (docCount < 3000); RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc_ = new Document(); doc_.Add(NewTextField("field2", "xxx", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } doc_ = new Document(); doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } Reader = riw.Reader; BigSearcher = NewSearcher(Reader); riw.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random().NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; writer.AddDocument(doc); DirectoryReader reader = writer.Reader; Terms terms = reader.GetTermVector(1, "field"); Debug.Assert(terms != null); TermsEnum termsEnum = terms.Iterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(0, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.Payload); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); field.StringValue = "quick brown fox"; iw.AddDocument(doc); field.StringValue = "jumps over lazy broun dog"; iw.AddDocument(doc); field.StringValue = "jumps over extremely very lazy broxn dog"; iw.AddDocument(doc); Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void Test() { Directory dir = NewDirectory(); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new TextField("eng", new BugReproTokenStream())); riw.AddDocument(doc); riw.Dispose(); dir.Dispose(); }
/// <summary> /// LUCENENET specific /// Passed in because NewStringField and NewIndexWriterConfig are no /// longer static. /// </summary> private IndexReader Build(Random random, TestIndex index) { /* build an index */ Document doc = new Document(); Field idField = NewStringField(random, "id", "", Field.Store.YES); Field randField = NewStringField(random, "rand", "", Field.Store.YES); Field bodyField = NewStringField(random, "body", "", Field.Store.NO); doc.Add(idField); doc.Add(randField); doc.Add(bodyField); RandomIndexWriter writer = new RandomIndexWriter(random, index.Index, NewIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(TestUtil.NextInt32(random, 50, 1000)).SetMergePolicy(NewLogMergePolicy())); TestUtil.ReduceOpenFiles(writer.IndexWriter); while (true) { int minCount = 0; int maxCount = 0; for (int d = MinId; d <= MaxId; d++) { idField.SetStringValue(Pad(d)); int r = index.AllowNegativeRandomInts ? random.Next() : random.Next(int.MaxValue); if (index.MaxR < r) { index.MaxR = r; maxCount = 1; } else if (index.MaxR == r) { maxCount++; } if (r < index.MinR) { index.MinR = r; minCount = 1; } else if (r == index.MinR) { minCount++; } randField.SetStringValue(Pad(r)); bodyField.SetStringValue("body"); writer.AddDocument(doc); } if (minCount == 1 && maxCount == 1) { // our subclasses rely on only 1 doc having the min or // max, so, we loop until we satisfy that. it should be // exceedingly rare (Yonik calculates 1 in ~429,000) // times) that this loop requires more than one try: IndexReader ir = writer.GetReader(); writer.Dispose(); return(ir); } // try again writer.DeleteAll(); } }
public virtual void TestNonIndexedFields() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new StoredField("bogusbytes", "bogus")); doc.Add(new StoredField("bogusshorts", "bogus")); doc.Add(new StoredField("bogusints", "bogus")); doc.Add(new StoredField("boguslongs", "bogus")); doc.Add(new StoredField("bogusfloats", "bogus")); doc.Add(new StoredField("bogusdoubles", "bogus")); doc.Add(new StoredField("bogusterms", "bogus")); doc.Add(new StoredField("bogustermsindex", "bogus")); doc.Add(new StoredField("bogusmultivalued", "bogus")); doc.Add(new StoredField("bogusbits", "bogus")); iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); IFieldCache cache = FieldCache.DEFAULT; cache.PurgeAllCaches(); Assert.AreEqual(0, cache.GetCacheEntries().Length); #pragma warning disable 612, 618 Bytes bytes = cache.GetBytes(ar, "bogusbytes", true); Assert.AreEqual((byte)0, bytes.Get(0)); Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true); Assert.AreEqual(0, shorts.Get(0)); #pragma warning restore 612, 618 Int32s ints = cache.GetInt32s(ar, "bogusints", true); Assert.AreEqual(0, ints.Get(0)); Int64s longs = cache.GetInt64s(ar, "boguslongs", true); Assert.AreEqual(0, longs.Get(0)); Singles floats = cache.GetSingles(ar, "bogusfloats", true); Assert.AreEqual(0, floats.Get(0), 0.0f); Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true); Assert.AreEqual(0, doubles.Get(0), 0.0D); BytesRef scratch = new BytesRef(); BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true); binaries.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex"); Assert.AreEqual(-1, sorted.GetOrd(0)); sorted.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued"); sortedSet.SetDocument(0); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); IBits bits = cache.GetDocsWithField(ar, "bogusbits"); Assert.IsFalse(bits.Get(0)); // check that we cached nothing Assert.AreEqual(0, cache.GetCacheEntries().Length); ir.Dispose(); dir.Dispose(); }
private void PopulateSampleIndex(Analyzer analyzer) { indexWriter.DeleteAll(); indexWriter.Commit(); String text; Document doc = new Document(); text = "The traveling press secretary for Mitt Romney lost his cool and cursed at reporters " + "who attempted to ask questions of the Republican presidential candidate in a public plaza near the Tomb of " + "the Unknown Soldier in Warsaw Tuesday."; doc.Add(new Field(textFieldName, text, ft)); doc.Add(new Field(categoryFieldName, "politics", ft)); doc.Add(new Field(booleanFieldName, "true", ft)); indexWriter.AddDocument(doc, analyzer); doc = new Document(); text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" + " States, that he will be tougher against Iran's nuclear ambitions than President Barack Obama."; doc.Add(new Field(textFieldName, text, ft)); doc.Add(new Field(categoryFieldName, "politics", ft)); doc.Add(new Field(booleanFieldName, "true", ft)); indexWriter.AddDocument(doc, analyzer); doc = new Document(); text = "And there's a threshold question that he has to answer for the American people and " + "that's whether he is prepared to be commander-in-chief,\" she continued. \"As we look to the past events, we " + "know that this raises some questions about his preparedness and we'll see how the rest of his trip goes.\""; doc.Add(new Field(textFieldName, text, ft)); doc.Add(new Field(categoryFieldName, "politics", ft)); doc.Add(new Field(booleanFieldName, "true", ft)); indexWriter.AddDocument(doc, analyzer); doc = new Document(); text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " + "keep quiet and not go there,\" said Alan Lizotte, dean and professor at the State University of New York at " + "Albany's School of Criminal Justice."; doc.Add(new Field(textFieldName, text, ft)); doc.Add(new Field(categoryFieldName, "politics", ft)); doc.Add(new Field(booleanFieldName, "true", ft)); indexWriter.AddDocument(doc, analyzer); doc = new Document(); text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " + "technology at the University of Wisconsin-La Crosse, documented the historic moment and shared it with the " + "world through the Internet."; doc.Add(new Field(textFieldName, text, ft)); doc.Add(new Field(categoryFieldName, "technology", ft)); doc.Add(new Field(booleanFieldName, "false", ft)); indexWriter.AddDocument(doc, analyzer); doc = new Document(); text = "So, about all those experts and analysts who've spent the past year or so saying " + "Facebook was going to make a phone. A new expert has stepped forward to say it's not going to happen."; doc.Add(new Field(textFieldName, text, ft)); doc.Add(new Field(categoryFieldName, "technology", ft)); doc.Add(new Field(booleanFieldName, "false", ft)); indexWriter.AddDocument(doc, analyzer); doc = new Document(); text = "More than 400 million people trust Google with their e-mail, and 50 million store files" + " in the cloud using the Dropbox service. People manage their bank accounts, pay bills, trade stocks and " + "generally transfer or store huge volumes of personal data online."; doc.Add(new Field(textFieldName, text, ft)); doc.Add(new Field(categoryFieldName, "technology", ft)); doc.Add(new Field(booleanFieldName, "false", ft)); indexWriter.AddDocument(doc, analyzer); doc = new Document(); text = "unlabeled doc"; doc.Add(new Field(textFieldName, text, ft)); indexWriter.AddDocument(doc, analyzer); indexWriter.Commit(); }
public override void BeforeClass() { base.BeforeClass(); NUM_DOCS = AtLeast(500); NUM_ORDS = AtLeast(2); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; unicodeStrings = new string[NUM_DOCS]; //MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS]; multiValued = RectangularArrays.ReturnRectangularArray <BytesRef>(NUM_DOCS, NUM_ORDS); if (Verbose) { Console.WriteLine("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", (theLong--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theDouble", (theDouble--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theByte", (theByte--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theShort", (theShort--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theInt", (theInt--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theFloat", (theFloat--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO)); if (i % 2 == 0) { doc.Add(NewStringField("sparse", (i).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); } if (i % 2 == 0) { doc.Add(new Int32Field("numInt", i, Field.Store.NO)); } // sometimes skip the field: if (Random.Next(40) != 17) { unicodeStrings[i] = GenerateString(i); doc.Add(NewStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (Random.Next(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { string newValue = GenerateString(i); multiValued[i][j] = new BytesRef(newValue); doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Array.Sort(multiValued[i]); } writer.AddDocument(doc); } IndexReader r = writer.GetReader(); reader = SlowCompositeReaderWrapper.Wrap(r); writer.Dispose(); }
public void TestSimple() { string groupField = "hotel"; FieldType customType = new FieldType(); customType.IsStored = true; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && Random.nextBoolean(); // 0 Document doc = new Document(); AddField(doc, groupField, "a", useDv); AddField(doc, "airport", "ams", useDv); AddField(doc, "duration", "5", useDv); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "a", useDv); AddField(doc, "airport", "dus", useDv); AddField(doc, "duration", "10", useDv); w.AddDocument(doc); // 2 doc = new Document(); AddField(doc, groupField, "b", useDv); AddField(doc, "airport", "ams", useDv); AddField(doc, "duration", "10", useDv); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddField(doc, groupField, "b", useDv); AddField(doc, "airport", "ams", useDv); AddField(doc, "duration", "5", useDv); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "b", useDv); AddField(doc, "airport", "ams", useDv); AddField(doc, "duration", "5", useDv); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.GetReader()); IList <TermGroupFacetCollector.FacetEntry> entries = null; AbstractGroupFacetCollector groupedAirportFacetCollector = null; TermGroupFacetCollector.GroupedFacetResult airportResult = null; foreach (int limit in new int[] { 2, 10, 100, int.MaxValue }) { // any of these limits is plenty for the data we have groupedAirportFacetCollector = CreateRandomCollector (useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false); indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector); int maxOffset = 5; airportResult = groupedAirportFacetCollector.MergeSegmentResults (int.MaxValue == limit ? limit : maxOffset + limit, 0, false); assertEquals(3, airportResult.TotalCount); assertEquals(0, airportResult.TotalMissingCount); entries = airportResult.GetFacetEntries(maxOffset, limit); assertEquals(0, entries.size()); entries = airportResult.GetFacetEntries(0, limit); assertEquals(2, entries.size()); assertEquals("ams", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); assertEquals("dus", entries[1].Value.Utf8ToString()); assertEquals(1, entries[1].Count); entries = airportResult.GetFacetEntries(1, limit); assertEquals(1, entries.size()); assertEquals("dus", entries[0].Value.Utf8ToString()); assertEquals(1, entries[0].Count); } AbstractGroupFacetCollector groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false); indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector); TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 0, false); assertEquals(4, durationResult.TotalCount); assertEquals(0, durationResult.TotalMissingCount); entries = durationResult.GetFacetEntries(0, 10); assertEquals(2, entries.size()); assertEquals("10", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); assertEquals("5", entries[1].Value.Utf8ToString()); assertEquals(2, entries[1].Count); // 5 doc = new Document(); AddField(doc, groupField, "b", useDv); // missing airport if (useDv) { AddField(doc, "airport", "", useDv); } AddField(doc, "duration", "5", useDv); w.AddDocument(doc); // 6 doc = new Document(); AddField(doc, groupField, "b", useDv); AddField(doc, "airport", "bru", useDv); AddField(doc, "duration", "10", useDv); w.AddDocument(doc); // 7 doc = new Document(); AddField(doc, groupField, "b", useDv); AddField(doc, "airport", "bru", useDv); AddField(doc, "duration", "15", useDv); w.AddDocument(doc); // 8 doc = new Document(); AddField(doc, groupField, "a", useDv); AddField(doc, "airport", "bru", useDv); AddField(doc, "duration", "10", useDv); w.AddDocument(doc); indexSearcher.IndexReader.Dispose(); indexSearcher = NewSearcher(w.GetReader()); groupedAirportFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv); indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector); airportResult = groupedAirportFacetCollector.MergeSegmentResults(3, 0, true); entries = airportResult.GetFacetEntries(1, 2); assertEquals(2, entries.size()); if (useDv) { assertEquals(6, airportResult.TotalCount); assertEquals(0, airportResult.TotalMissingCount); assertEquals("bru", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); assertEquals("", entries[1].Value.Utf8ToString()); assertEquals(1, entries[1].Count); } else { assertEquals(5, airportResult.TotalCount); assertEquals(1, airportResult.TotalMissingCount); assertEquals("bru", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); assertEquals("dus", entries[1].Value.Utf8ToString()); assertEquals(1, entries[1].Count); } groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false); indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector); durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 2, true); assertEquals(5, durationResult.TotalCount); assertEquals(0, durationResult.TotalMissingCount); entries = durationResult.GetFacetEntries(1, 1); assertEquals(1, entries.size()); assertEquals("5", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); // 9 doc = new Document(); AddField(doc, groupField, "c", useDv); AddField(doc, "airport", "bru", useDv); AddField(doc, "duration", "15", useDv); w.AddDocument(doc); // 10 doc = new Document(); AddField(doc, groupField, "c", useDv); AddField(doc, "airport", "dus", useDv); AddField(doc, "duration", "10", useDv); w.AddDocument(doc); indexSearcher.IndexReader.Dispose(); indexSearcher = NewSearcher(w.GetReader()); groupedAirportFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false); indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector); airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false); entries = airportResult.GetFacetEntries(0, 10); if (useDv) { assertEquals(8, airportResult.TotalCount); assertEquals(0, airportResult.TotalMissingCount); assertEquals(4, entries.size()); assertEquals("", entries[0].Value.Utf8ToString()); assertEquals(1, entries[0].Count); assertEquals("ams", entries[1].Value.Utf8ToString()); assertEquals(2, entries[1].Count); assertEquals("bru", entries[2].Value.Utf8ToString()); assertEquals(3, entries[2].Count); assertEquals("dus", entries[3].Value.Utf8ToString()); assertEquals(2, entries[3].Count); } else { assertEquals(7, airportResult.TotalCount); assertEquals(1, airportResult.TotalMissingCount); assertEquals(3, entries.size()); assertEquals("ams", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); assertEquals("bru", entries[1].Value.Utf8ToString()); assertEquals(3, entries[1].Count); assertEquals("dus", entries[2].Value.Utf8ToString()); assertEquals(2, entries[2].Count); } groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false); indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector); durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 0, true); assertEquals(5, durationResult.TotalCount); assertEquals(0, durationResult.TotalMissingCount); entries = durationResult.GetFacetEntries(0, 10); assertEquals(2, entries.size()); assertEquals("10", entries[0].Value.Utf8ToString()); assertEquals(3, entries[0].Count); assertEquals("15", entries[1].Value.Utf8ToString()); assertEquals(2, entries[1].Count); w.Dispose(); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); string[] tokens = GetRandomTokens(10); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new SortedSetDocValuesFacetCounts(state, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int?v; if (!expectedCounts[j].TryGetValue(doc.dims[j], out v)) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties //sortTies(actual); CollectionAssert.AreEqual(expected, actual); } IOUtils.Dispose(w, searcher.IndexReader, indexDir, taxoDir); }
public virtual void TestSimple() { Random random = Random; DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.NUMERIC, DocValuesType.BINARY, DocValuesType.SORTED, }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; Document doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random text", Field.Store.NO)); doc.Add(new StringField("id", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text blob", Field.Store.NO)); doc.Add(new StringField("id", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "2", dvType); doc.Add(new TextField("content", "some more random textual data", Field.Store.NO)); doc.Add(new StringField("id", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddField(doc, groupField, "2", dvType); doc.Add(new TextField("content", "some random text", Field.Store.NO)); doc.Add(new StringField("id", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text", Field.Store.NO)); doc.Add(new StringField("id", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random blob", Field.Store.NO)); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); AddField(doc, countField, "1", dvType); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.GetReader()); w.Dispose(); var cmp = Comparer <AbstractDistinctValuesCollector.IGroupCount <IComparable> > .Create((groupCount1, groupCount2) => { if (groupCount1.GroupValue == null) { if (groupCount2.GroupValue == null) { return(0); } return(-1); } else if (groupCount2.GroupValue == null) { return(1); } else { return(groupCount1.GroupValue.CompareTo(groupCount2.GroupValue)); } }); // === Search for content:random IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector); //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; // LUCENENET TODO: Try to work out how to do this without an O(n) operation var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(4, gcs.Count); CompareNull(gcs[0].GroupValue); List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); Compare("1", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); countValues.Sort(nullComparer); assertEquals(2, countValues.size()); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[3].GroupValue); countValues = new List <IComparable>(gcs[3].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:some firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(3, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(2, countValues.size()); countValues.Sort(nullComparer); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:blob firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(2, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); // B/c the only one document matched with blob inside the author 1 group assertEquals(1, countValues.Count); Compare("1", countValues[0]); Compare("3", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.Count); Compare("1", countValues[0]); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public void TestMinShouldMatch() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter(Random, dir, analyzer); string[] docs = new string[] { @"this is the end of the world right", @"is this it or maybe not", @"this is the end of the universe as we know it", @"there is the famous restaurant at the end of the universe" }; for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES)); doc.Add(NewTextField(@"field", docs[i], Field.Store.NO)); w.AddDocument(doc); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 0.5f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 2.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 0.49f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 1.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 1.0f; query.HighFreqMinimumNumberShouldMatch = 4.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0.0f); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); // doc 2 and 3 only get a score from low freq terms assertEquals( new JCG.HashSet <string> { @"2", @"3" }, new JCG.HashSet <string> { r.Document(search.ScoreDocs[1].Doc).Get(@"id"), r.Document(search.ScoreDocs[2].Doc).Get(@"id") }, aggressive: false); } { // only high freq terms around - check that min should match is applied CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "the")); query.LowFreqMinimumNumberShouldMatch = 1.0f; query.HighFreqMinimumNumberShouldMatch = 2.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 4); } { // only high freq terms around - check that min should match is applied CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "the")); query.LowFreqMinimumNumberShouldMatch = 1.0f; query.HighFreqMinimumNumberShouldMatch = 2.0f; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 2); assertEquals( new JCG.HashSet <string> { @"0", @"2" }, new JCG.HashSet <string> { r.Document(search.ScoreDocs[0].Doc).Get(@"id"), r.Document(search.ScoreDocs[1].Doc).Get(@"id") }, aggressive: false); } r.Dispose(); w.Dispose(); dir.Dispose(); }
public void GRandom() { int numDocs = TestUtil.NextInt(Random(), (10 * RANDOM_MULTIPLIER), (100 * RANDOM_MULTIPLIER)); Directory dir = null; RandomIndexWriter writer = null; IndexReader ir = null; try { dir = NewDirectory(); writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), Similarity, TimeZone); int maxLength = TestUtil.NextInt(Random(), 5, 50); List <string> originals = new List <string>(numDocs); List <string[]> breaks = new List <string[]>(numDocs); for (int i = 0; i < numDocs; i++) { string orig = ""; if (Random().nextBoolean()) { while (!GoodTestString(orig)) { orig = TestUtil.RandomSimpleString(Random(), maxLength); } } else { while (!GoodTestString(orig)) { orig = TestUtil.RandomUnicodeString(Random(), maxLength); } } originals.Add(orig); int totalLength = orig.CodePointCount(0, orig.Length); int breakAt = orig.OffsetByCodePoints(0, TestUtil.NextInt(Random(), 1, totalLength - 1)); string[] broken = new string[2]; broken[0] = orig.Substring(0, breakAt - 0); broken[1] = orig.Substring(breakAt); breaks.Add(broken); Document doc = new Document(); doc.Add(NewTextField("random_break", broken[0] + " " + broken[1], Field.Store.NO)); doc.Add(NewTextField("random_combine", orig, Field.Store.NO)); writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); ir = DirectoryReader.Open(dir); WordBreakSpellChecker wbsp = new WordBreakSpellChecker(); wbsp.MaxChanges = (1); wbsp.MinBreakWordLength = (1); wbsp.MinSuggestionFrequency = (1); wbsp.MaxCombineWordLength = (maxLength); for (int i = 0; i < originals.size(); i++) { string orig = originals[i]; string left = breaks[i][0]; string right = breaks[i][1]; { Term term = new Term("random_break", orig); SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); bool failed = true; foreach (SuggestWord[] sw1 in sw) { assertTrue(sw1.Length == 2); if (sw1[0].String.equals(left) && sw1[1].String.equals(right)) { failed = false; } } assertFalse("Failed getting break suggestions\n >Original: " + orig + "\n >Left: " + left + "\n >Right: " + right, failed); } { Term[] terms = { new Term("random_combine", left), new Term("random_combine", right) }; CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS); bool failed = true; foreach (CombineSuggestion cs1 in cs) { assertTrue(cs1.OriginalTermIndexes.Length == 2); if (cs1.Suggestion.String.equals(left + right)) { failed = false; } } assertFalse("Failed getting combine suggestions\n >Original: " + orig + "\n >Left: " + left + "\n >Right: " + right, failed); } } } catch (Exception e) { throw e; } finally { try { ir.Dispose(); } catch (Exception /*e1*/) { } try { writer.Dispose(); } catch (Exception /*e1*/) { } try { dir.Dispose(); } catch (Exception /*e1*/) { } } }
public virtual void TestParsingAndSearching() { string field = "content"; string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" }; // queries that should find all docs Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) }; // queries that should find no docs Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) }; PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } }; WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } }; // prepare the index Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, docs[i], Field.Store.NO)); iw.AddDocument(doc); } iw.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); // test queries that must find all foreach (Query q in matchAll) { if (VERBOSE) { Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none foreach (Query q in matchNone) { if (VERBOSE) { Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); } // thest the prefi queries find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { Query q = matchOneDocPrefix[i][j]; if (VERBOSE) { Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } // test the wildcard queries find only one doc for (int i = 0; i < matchOneDocWild.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { Query q = matchOneDocWild[i][j]; if (VERBOSE) { Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } reader.Dispose(); dir.Dispose(); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, analyzer); Document d = new Document(); d.Add(NewTextField("field", "bogus", Field.Store.YES)); writer.AddDocument(d); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1")); pos.NextDoc(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2")); pos.NextDoc(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); store.Dispose(); }
public void TestExtend() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter(Random, dir, analyzer); var docs = new string[] { @"this is the end of the world right", @"is this it or maybe not", @"this is the end of the universe as we know it", @"there is the famous restaurant at the end of the universe" }; for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES)); doc.Add(NewTextField(@"field", docs[i], Field.Store.NO)); w.AddDocument(doc); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } { // this one boosts the termQuery("field" "universe") by 10x CommonTermsQuery query = new ExtendedCommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2.0f : 0.5f); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"2", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"0", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } r.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); searcher.Similarity = new DefaultSimilarity(); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(); pq.Slop = 5; pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true); TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits3.TotalHits); Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id")); r.Dispose(); dir.Dispose(); }
public override void BeforeClass() // LUCENENET specific - renamed from BeforeClassDrillDownQueryTest() to ensure calling order { base.BeforeClass(); dir = NewDirectory(); Random r = Random; RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false))); taxoDir = NewDirectory(); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); config = new FacetsConfig(); // Randomize the per-dim config: config.SetHierarchical("a", Random.NextBoolean()); config.SetMultiValued("a", Random.NextBoolean()); if (Random.NextBoolean()) { config.SetIndexFieldName("a", "$a"); } config.SetRequireDimCount("a", true); config.SetHierarchical("b", Random.NextBoolean()); config.SetMultiValued("b", Random.NextBoolean()); if (Random.NextBoolean()) { config.SetIndexFieldName("b", "$b"); } config.SetRequireDimCount("b", true); for (int i = 0; i < 100; i++) { Document doc = new Document(); if (i % 2 == 0) // 50 { doc.Add(new TextField("content", "foo", Field.Store.NO)); } if (i % 3 == 0) // 33 { doc.Add(new TextField("content", "bar", Field.Store.NO)); } if (i % 4 == 0) // 25 { if (r.NextBoolean()) { doc.Add(new FacetField("a", "1")); } else { doc.Add(new FacetField("a", "2")); } } if (i % 5 == 0) // 20 { doc.Add(new FacetField("b", "1")); } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.GetReader(); writer.Dispose(); taxo = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestDocValuesIntegration() { AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.Add(new NumericDocValuesField("numeric", 42)); if (DefaultCodecSupportsSortedSet) { doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); } iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); BytesRef scratch = new BytesRef(); // Binary type: can be retrieved via getTerms() try { FieldCache.DEFAULT.GetInt32s(ar, "binary", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true); binary.Get(0, scratch); Assert.AreEqual("binary value", scratch.Utf8ToString()); try { FieldCache.DEFAULT.GetTermsIndex(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary"); Assert.IsTrue(bits.Get(0)); // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() try { FieldCache.DEFAULT.GetInt32s(ar, "sorted", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sorted"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true); binary.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted"); Assert.AreEqual(0, sorted.GetOrd(0)); Assert.AreEqual(1, sorted.ValueCount); sorted.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted"); Assert.IsTrue(bits.Get(0)); // Numeric type: can be retrieved via getInts() and so on Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false); Assert.AreEqual(42, numeric.Get(0)); try { FieldCache.DEFAULT.GetTerms(ar, "numeric", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric"); Assert.IsTrue(bits.Get(0)); // SortedSet type: can be retrieved via getDocTermOrds() if (DefaultCodecSupportsSortedSet) { try { FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTerms(ar, "sortedset", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(2, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset"); Assert.IsTrue(bits.Get(0)); } ir.Dispose(); dir.Dispose(); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.Terms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Size()); TermsEnum tvsEnum = tvs.Iterator(null); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next()); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv")) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next()); Assert.IsNull(tvsEnum.Next()); } r.Dispose(); dir.Dispose(); }
private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument) { Random random = Random; int numDocs = TestUtil.NextInt32(random, 138, 1145) * RANDOM_MULTIPLIER; int numGroups = TestUtil.NextInt32(random, 1, numDocs / 4); int numFacets = TestUtil.NextInt32(random, 1, numDocs / 6); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <string> groups = new List <string>(); for (int i = 0; i < numGroups; i++) { groups.Add(GenerateRandomNonEmptyString()); } List <string> facetValues = new List <string>(); for (int i = 0; i < numFacets; i++) { facetValues.Add(GenerateRandomNonEmptyString()); } string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)]; if (VERBOSE) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++) { contentBrs[contentIDX] = GenerateRandomNonEmptyString(); if (VERBOSE) { Console.WriteLine(" content=" + contentBrs[contentIDX]); } } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random) ) ); bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = NewStringField("group", "", Field.Store.NO); Field groupDc = new SortedDocValuesField("group_dv", new BytesRef()); if (useDv) { doc.Add(groupDc); docNoFacet.Add(groupDc); } doc.Add(group); docNoFacet.Add(group); Field[] facetFields; if (useDv) { Debug.Assert(!multipleFacetValuesPerDocument); facetFields = new Field[2]; facetFields[0] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[0]); docNoGroup.Add(facetFields[0]); facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef()); doc.Add(facetFields[1]); docNoGroup.Add(facetFields[1]); } else { facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; for (int i = 0; i < facetFields.Length; i++) { facetFields[i] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[i]); docNoGroup.Add(facetFields[i]); } } Field content = NewStringField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); docNoFacet.Add(content); docNoGroupNoFacet.Add(content); ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => { if (a == b) { return(0); } else if (a == null) { return(-1); } else if (b == null) { return(1); } else { return(a.CompareToOrdinal(b)); } })); // LUCENENET NOTE: Need JCG.Dictionary here because of null keys IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >(); int facetWithMostGroups = 0; for (int i = 0; i < numDocs; i++) { string groupValue; if (random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: if (useDv) { groupValue = ""; } else { groupValue = null; } } else { groupValue = groups[random.nextInt(groups.size())]; } string contentStr = contentBrs[random.nextInt(contentBrs.Length)]; if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups)) { searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >(); } List <string> facetVals = new List <string>(); if (useDv || random.nextInt(24) != 18) { if (useDv) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetFields[0].SetStringValue(facetValue); facetFields[1].SetBytesValue(new BytesRef(facetValue)); facetVals.Add(facetValue); } else { foreach (Field facetField in facetFields) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetField.SetStringValue(facetValue); facetVals.Add(facetValue); } } } else { uniqueFacetValues.Add(null); if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet)) { facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } } if (VERBOSE) { Console.WriteLine(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + Collections.ToString(facetVals)); } if (groupValue != null) { if (useDv) { groupDc.SetBytesValue(new BytesRef(groupValue)); } group.SetStringValue(groupValue); } else if (useDv) { // DV cannot have missing values: groupDc.SetBytesValue(new BytesRef()); } content.SetStringValue(contentStr); if (groupValue == null && !facetVals.Any()) { writer.AddDocument(docNoGroupNoFacet); } else if (!facetVals.Any()) { writer.AddDocument(docNoFacet); } else if (groupValue == null) { writer.AddDocument(docNoGroup); } else { writer.AddDocument(doc); } } DirectoryReader reader = writer.GetReader(); writer.Dispose(); return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv)); }
public virtual void TestRandomSampling() { Directory dir = NewDirectory(); Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(10000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO)); doc.Add(new FacetField("iMod10", Convert.ToString(i % 10))); writer.AddDocument(config.Build(taxoWriter, doc)); } Random random = Random(); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); IOUtils.Close(writer, taxoWriter); // Test empty results RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // There should be no divisions by zero searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults); // There should be no divisions by zero and no null result Assert.NotNull(collectRandomZeroResults.GetMatchingDocs); // There should be no results at all foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs) { Assert.AreEqual(0, doc.totalHits); } // Now start searching and retrieve results. // Use a query to select half of the documents. TermQuery query = new TermQuery(new Term("EvenOdd", "even")); // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i % // 10) are hits. // there is a REAL small chance that one of the 5 values will be missed when // sampling. // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be // missing) ~ 10^-193 // so that is probably not going to happen. int maxNumChildren = 5; RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits FacetsCollector fc = new FacetsCollector(); searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent)); FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent); FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent); FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc); FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher); FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10"); FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10"); Assert.AreEqual(random100Result, exactResult); // we should have five children, but there is a small chance we have less. // (see above). Assert.True(random10Result.ChildCount <= maxNumChildren); // there should be one child at least. Assert.True(random10Result.ChildCount >= 1); // now calculate some statistics to determine if the sampled result is 'ok'. // because random sampling is used, the results will vary each time. int sum = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { sum += (int)lav.value; } float mu = (float)sum / (float)maxNumChildren; float variance = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { variance += (float)Math.Pow((mu - (int)lav.value), 2); } variance = variance / maxNumChildren; float sigma = (float)Math.Sqrt(variance); // we query only half the documents and have 5 categories. The average // number of docs in a category will thus be the total divided by 5*2 float targetMu = numDocs / (5.0f * 2.0f); // the average should be in the range and the standard deviation should not // be too great Assert.True(sigma < 200); Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public void TestMVGroupedFacetingWithDeletes() { string groupField = "hotel"; FieldType customType = new FieldType(); customType.IsStored = (true); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); bool useDv = false; // Cannot assert this since we use NoMergePolicy: w.DoRandomForceMergeAssert = (false); // 0 Document doc = new Document(); doc.Add(new StringField("x", "x", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "a", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); w.Commit(); w.DeleteDocuments(new TermQuery(new Term("airport", "ams"))); // 2 doc = new Document(); AddField(doc, groupField, "a", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); AddField(doc, groupField, "a", useDv); doc.Add(new StringField("airport", "dus", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "b", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "b", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); // 6 doc = new Document(); AddField(doc, groupField, "b", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 7 doc = new Document(); doc.Add(new StringField("x", "x", Field.Store.NO)); w.AddDocument(doc); w.Commit(); w.Dispose(); IndexSearcher indexSearcher = NewSearcher(DirectoryReader.Open(dir)); AbstractGroupFacetCollector groupedAirportFacetCollector = CreateRandomCollector(groupField, "airport", null, true); indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector); TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false); assertEquals(3, airportResult.TotalCount); assertEquals(1, airportResult.TotalMissingCount); IList <TermGroupFacetCollector.FacetEntry> entries = airportResult.GetFacetEntries(0, 10); assertEquals(2, entries.size()); assertEquals("ams", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); assertEquals("dus", entries[1].Value.Utf8ToString()); assertEquals(1, entries[1].Count); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestRandomStringSort() { Random random = new Random(Random().Next()); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, Similarity, TimeZone); bool allowDups = random.NextBoolean(); HashSet <string> seen = new HashSet <string>(); int maxLength = TestUtil.NextInt(random, 5, 100); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { Document doc = new Document(); // 10% of the time, the document is missing the value: BytesRef br; if (Random().Next(10) != 7) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random, maxLength); } else { s = TestUtil.RandomUnicodeString(random, maxLength); } if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (VERBOSE) { Console.WriteLine(" " + numDocs + ": s=" + s); } br = new BytesRef(s); if (DefaultCodecSupportsDocValues()) { doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } doc.Add(NewStringField("string", s, Field.Store.NO)); docValues.Add(br); } else { br = null; if (VERBOSE) { Console.WriteLine(" " + numDocs + ": <missing>"); } docValues.Add(null); if (DefaultCodecSupportsDocValues()) { doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } } doc.Add(new StoredField("id", numDocs)); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.Reader.Dispose(); } } IndexReader r = writer.Reader; writer.Dispose(); if (VERBOSE) { Console.WriteLine(" reader=" + r); } IndexSearcher idxS = NewSearcher(r, false, Similarity); int ITERS = AtLeast(100); for (int iter = 0; iter < ITERS; iter++) { bool reverse = random.NextBoolean(); TopFieldDocs hits; SortField sf; bool sortMissingLast; bool missingIsNull; if (DefaultCodecSupportsDocValues() && random.NextBoolean()) { sf = new SortField("stringdv", SortFieldType.STRING, reverse); // Can only use sort missing if the DVFormat // supports docsWithField: sortMissingLast = DefaultCodecSupportsDocsWithField() && Random().NextBoolean(); missingIsNull = DefaultCodecSupportsDocsWithField(); } else { sf = new SortField("string", SortFieldType.STRING, reverse); sortMissingLast = Random().NextBoolean(); missingIsNull = true; } if (sortMissingLast) { sf.MissingValue = SortField.STRING_LAST; } Sort sort; if (random.NextBoolean()) { sort = new Sort(sf); } else { sort = new Sort(sf, SortField.FIELD_DOC); } int hitCount = TestUtil.NextInt(random, 1, r.MaxDoc + 20); RandomFilter f = new RandomFilter(random, (float)random.NextDouble(), docValues); int queryType = random.Next(3); if (queryType == 0) { // force out of order BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; hits = idxS.Search(bq, f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else if (queryType == 1) { hits = idxS.Search(new ConstantScoreQuery(f), null, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else { hits = idxS.Search(new MatchAllDocsQuery(), f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " " + hits.TotalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort); } // Compute expected results: var expected = f.MatchValues.ToList(); expected.Sort(new ComparerAnonymousInnerClassHelper(this, sortMissingLast)); if (reverse) { expected.Reverse(); } if (VERBOSE) { Console.WriteLine(" expected:"); for (int idx = 0; idx < expected.Count; idx++) { BytesRef br = expected[idx]; if (br == null && missingIsNull == false) { br = new BytesRef(); } Console.WriteLine(" " + idx + ": " + (br == null ? "<missing>" : br.Utf8ToString())); if (idx == hitCount - 1) { break; } } } if (VERBOSE) { Console.WriteLine(" actual:"); for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = (BytesRef)fd.Fields[0]; Console.WriteLine(" " + hitIDX + ": " + (br == null ? "<missing>" : br.Utf8ToString()) + " id=" + idxS.Doc(fd.Doc).Get("id")); } } for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = expected[hitIDX]; if (br == null && missingIsNull == false) { br = new BytesRef(); } // Normally, the old codecs (that don't support // docsWithField via doc values) will always return // an empty BytesRef for the missing case; however, // if all docs in a given segment were missing, in // that case it will return null! So we must map // null here, too: BytesRef br2 = (BytesRef)fd.Fields[0]; if (br2 == null && missingIsNull == false) { br2 = new BytesRef(); } Assert.AreEqual(br, br2, "hit=" + hitIDX + " has wrong sort value"); } } r.Dispose(); dir.Dispose(); }
public void TestBasic() { string groupField = "author"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); bool canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType valueType = vts[Random.nextInt(vts.Length)]; // 0 Document doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "1", Field.Store.NO)); doc.Add(NewStringField("id_2", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO)); doc.Add(NewStringField("id_1", "2", Field.Store.NO)); doc.Add(NewStringField("id_2", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO)); doc.Add(NewStringField("id_1", "3", Field.Store.NO)); doc.Add(NewStringField("id_2", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddGroupField(doc, groupField, "author2", canUseIDV, valueType); doc.Add(NewTextField("content", "some random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "4", Field.Store.NO)); doc.Add(NewStringField("id_2", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "5", Field.Store.NO)); doc.Add(NewStringField("id_2", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV, valueType); doc.Add(NewTextField("content", "random blob", Field.Store.NO)); doc.Add(NewStringField("id_1", "6", Field.Store.NO)); doc.Add(NewStringField("id_2", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO)); doc.Add(NewStringField("id_1", "6", Field.Store.NO)); doc.Add(NewStringField("id_2", "6", Field.Store.NO)); w.AddDocument(doc); // 7 -- no author field doc = new Document(); doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO)); doc.Add(NewStringField("id_1", "7", Field.Store.NO)); doc.Add(NewStringField("id_2", "7", Field.Store.NO)); w.AddDocument(doc); IndexReader reader = w.GetReader(); IndexSearcher indexSearcher = NewSearcher(reader); w.Dispose(); int maxDoc = reader.MaxDoc; Sort sortWithinGroup = new Sort(new SortField("id_1", SortFieldType.INT32, true)); var allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); // STRING sort type triggers different implementation Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortFieldType.STRING, true)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortFieldType.STRING, false)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); // 7 b/c higher doc id wins, even if order of field is in not in reverse. assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
private void CreateRandomIndexes() { dir1 = NewDirectory(); dir2 = NewDirectory(); int numDocs = AtLeast(150); int numTerms = TestUtil.NextInt32(Random, 1, numDocs / 5); ISet <string> randomTerms = new JCG.HashSet <string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random)); } terms = new JCG.List <string>(randomTerms); long seed = Random.NextInt64(); IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new J2N.Randomizer(seed))); IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new J2N.Randomizer(seed))); iwc2.SetMergePolicy(NewSortingMergePolicy(sort)); RandomIndexWriter iw1 = new RandomIndexWriter(new J2N.Randomizer(seed), dir1, iwc1); RandomIndexWriter iw2 = new RandomIndexWriter(new J2N.Randomizer(seed), dir2, iwc2); for (int i = 0; i < numDocs; ++i) { if (Random.nextInt(5) == 0 && i != numDocs - 1) { string term = RandomPicks.RandomFrom(Random, terms); iw1.DeleteDocuments(new Term("s", term)); iw2.DeleteDocuments(new Term("s", term)); } Document doc = randomDocument(); iw1.AddDocument(doc); iw2.AddDocument(doc); if (Random.nextInt(8) == 0) { iw1.Commit(); iw2.Commit(); } } // Make sure we have something to merge iw1.Commit(); iw2.Commit(); Document doc2 = randomDocument(); // NOTE: don't use RIW.addDocument directly, since it sometimes commits // which may trigger a merge, at which case forceMerge may not do anything. // With field updates this is a problem, since the updates can go into the // single segment in the index, and threefore the index won't be sorted. // This hurts the assumption of the test later on, that the index is sorted // by SortingMP. iw1.IndexWriter.AddDocument(doc2); iw2.IndexWriter.AddDocument(doc2); if (DefaultCodecSupportsFieldUpdates) { // update NDV of docs belonging to one term (covers many documents) long value = Random.NextInt64(); string term = RandomPicks.RandomFrom(Random, terms); iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); } iw1.ForceMerge(1); iw2.ForceMerge(1); iw1.Dispose(); iw2.Dispose(); reader = DirectoryReader.Open(dir1); sortedReader = DirectoryReader.Open(dir2); }
public virtual void TestRandom() { Directory dir = NewDirectory(); int numDocs = AtLeast(1000); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int[] idToNum = new int[numDocs]; int maxValue = TestUtil.NextInt(Random(), 10, 1000000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.YES)); int numTokens = TestUtil.NextInt(Random(), 1, 10); StringBuilder b = new StringBuilder(); for (int j = 0; j < numTokens; j++) { b.Append("a "); } doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); idToNum[i] = Random().Next(maxValue); doc.Add(new NumericDocValuesField("num", idToNum[i])); w.AddDocument(doc); } IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); int numHits = TestUtil.NextInt(Random(), 1, numDocs); bool reverse = Random().NextBoolean(); //System.out.println("numHits=" + numHits + " reverse=" + reverse); TopDocs hits = s.Search(new TermQuery(new Term("field", "a")), numHits); TopDocs hits2 = new QueryRescorerAnonymousInnerClassHelper3(this, new FixedScoreQuery(idToNum, reverse)) .Rescore(s, hits, numHits); int[] expected = new int[numHits]; for (int i = 0; i < numHits; i++) { expected[i] = hits.ScoreDocs[i].Doc; } int reverseInt = reverse ? -1 : 1; Array.Sort(expected, new ComparatorAnonymousInnerClassHelper(this, idToNum, r, reverseInt)); bool fail = false; for (int i = 0; i < numHits; i++) { //System.out.println("expected=" + expected[i] + " vs " + hits2.ScoreDocs[i].Doc + " v=" + idToNum[Integer.parseInt(r.Document(expected[i]).Get("id"))]); if ((int)expected[i] != hits2.ScoreDocs[i].Doc) { //System.out.println(" diff!"); fail = true; } } Assert.IsFalse(fail); r.Dispose(); dir.Dispose(); }
public override void BeforeClass() { base.BeforeClass(); ANALYZER = new MockAnalyzer(Random); qp = new StandardQueryParser(ANALYZER); IDictionary <string, /*Number*/ object> randomNumberMap = new JCG.Dictionary <string, object>(); /*SimpleDateFormat*/ string dateFormat; long randomDate; bool dateFormatSanityCheckPass; int count = 0; do { if (count > 100) { fail("This test has problems to find a sane random DateFormat/NumberFormat. Stopped trying after 100 iterations."); } dateFormatSanityCheckPass = true; LOCALE = RandomCulture(Random); TIMEZONE = RandomTimeZone(Random); DATE_STYLE = randomDateStyle(Random); TIME_STYLE = randomDateStyle(Random); //// assumes localized date pattern will have at least year, month, day, //// hour, minute //dateFormat = (SimpleDateFormat)DateFormat.getDateTimeInstance( // DATE_STYLE, TIME_STYLE, LOCALE); //// not all date patterns includes era, full year, timezone and second, //// so we add them here //dateFormat.applyPattern(dateFormat.toPattern() + " G s Z yyyy"); //dateFormat.setTimeZone(TIMEZONE); // assumes localized date pattern will have at least year, month, day, // hour, minute DATE_FORMAT = new NumberDateFormat(DATE_STYLE, TIME_STYLE, LOCALE) { TimeZone = TIMEZONE }; // not all date patterns includes era, full year, timezone and second, // so we add them here DATE_FORMAT.SetDateFormat(DATE_FORMAT.GetDateFormat() + " g s z yyyy"); dateFormat = DATE_FORMAT.GetDateFormat(); do { randomDate = Random.nextLong(); // prune date value so it doesn't pass in insane values to some // calendars. randomDate = randomDate % 3400000000000L; // truncate to second randomDate = (randomDate / 1000L) * 1000L; // only positive values randomDate = Math.Abs(randomDate); } while (randomDate == 0L); dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, randomDate); dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, 0); dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, -randomDate); count++; } while (!dateFormatSanityCheckPass); //NUMBER_FORMAT = NumberFormat.getNumberInstance(LOCALE); //NUMBER_FORMAT.setMaximumFractionDigits((Random().nextInt() & 20) + 1); //NUMBER_FORMAT.setMinimumFractionDigits((Random().nextInt() & 20) + 1); //NUMBER_FORMAT.setMaximumIntegerDigits((Random().nextInt() & 20) + 1); //NUMBER_FORMAT.setMinimumIntegerDigits((Random().nextInt() & 20) + 1); NUMBER_FORMAT = new NumberFormat(LOCALE); double randomDouble; long randomLong; int randomInt; float randomFloat; while ((randomLong = Convert.ToInt64(NormalizeNumber(Math.Abs(Random.nextLong())) )) == 0L) { ; } while ((randomDouble = Convert.ToDouble(NormalizeNumber(Math.Abs(Random.NextDouble())) )) == 0.0) { ; } while ((randomFloat = Convert.ToSingle(NormalizeNumber(Math.Abs(Random.nextFloat())) )) == 0.0f) { ; } while ((randomInt = Convert.ToInt32(NormalizeNumber(Math.Abs(Random.nextInt())))) == 0) { ; } randomNumberMap.Put(NumericType.INT64.ToString(), randomLong); randomNumberMap.Put(NumericType.INT32.ToString(), randomInt); randomNumberMap.Put(NumericType.SINGLE.ToString(), randomFloat); randomNumberMap.Put(NumericType.DOUBLE.ToString(), randomDouble); randomNumberMap.Put(DATE_FIELD_NAME, randomDate); RANDOM_NUMBER_MAP = randomNumberMap.AsReadOnly(); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)) .SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); IDictionary <String, NumericConfig> numericConfigMap = new JCG.Dictionary <String, NumericConfig>(); IDictionary <String, Field> numericFieldMap = new JCG.Dictionary <String, Field>(); qp.NumericConfigMap = (numericConfigMap); foreach (NumericType type in Enum.GetValues(typeof(NumericType))) { if (type == NumericType.NONE) { continue; } numericConfigMap.Put(type.ToString(), new NumericConfig(PRECISION_STEP, NUMBER_FORMAT, type)); FieldType ft2 = new FieldType(Int32Field.TYPE_NOT_STORED); ft2.NumericType = (type); ft2.IsStored = (true); ft2.NumericPrecisionStep = (PRECISION_STEP); ft2.Freeze(); Field field; switch (type) { case NumericType.INT32: field = new Int32Field(type.ToString(), 0, ft2); break; case NumericType.SINGLE: field = new SingleField(type.ToString(), 0.0f, ft2); break; case NumericType.INT64: field = new Int64Field(type.ToString(), 0L, ft2); break; case NumericType.DOUBLE: field = new DoubleField(type.ToString(), 0.0, ft2); break; default: fail(); field = null; break; } numericFieldMap.Put(type.ToString(), field); doc.Add(field); } numericConfigMap.Put(DATE_FIELD_NAME, new NumericConfig(PRECISION_STEP, DATE_FORMAT, NumericType.INT64)); FieldType ft = new FieldType(Int64Field.TYPE_NOT_STORED); ft.IsStored = (true); ft.NumericPrecisionStep = (PRECISION_STEP); Int64Field dateField = new Int64Field(DATE_FIELD_NAME, 0L, ft); numericFieldMap.Put(DATE_FIELD_NAME, dateField); doc.Add(dateField); foreach (NumberType numberType in Enum.GetValues(typeof(NumberType))) { setFieldValues(numberType, numericFieldMap); if (VERBOSE) { Console.WriteLine("Indexing document: " + doc); } writer.AddDocument(doc); } reader = writer.GetReader(); searcher = NewSearcher(reader); writer.Dispose(); }
public virtual void TestCustomDoublesValueSource() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); writer.AddDocument(doc); writer.AddDocument(doc); writer.AddDocument(doc); // Test wants 3 docs in one segment: writer.ForceMerge(1); var vs = new ValueSourceAnonymousInnerClassHelper(this, doc); FacetsConfig config = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); IndexReader r = writer.GetReader(); IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) }; Filter fastMatchFilter; AtomicBoolean filterWasUsed = new AtomicBoolean(); if (Random.NextBoolean()) { // Sort of silly: fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed); } else { fastMatchFilter = null; } if (VERBOSE) { Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter); } Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.GetTopChildren(10, "field").ToString()); Assert.True(fastMatchFilter == null || filterWasUsed); DrillDownQuery ddq = new DrillDownQuery(config); ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs)); // Test simple drill-down: Assert.AreEqual(1, s.Search(ddq, 10).TotalHits); // Test drill-sideways after drill-down DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter); DrillSidewaysResult dsr = ds.Search(ddq, 10); Assert.AreEqual(1, dsr.Hits.TotalHits); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Dispose(r, writer, dir); }
public void TestRandom() { int numberOfRuns = TestUtil.NextInt32(Random, 3, 6); for (int iter = 0; iter < numberOfRuns; iter++) { if (Verbose) { Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns)); } int numDocs = TestUtil.NextInt32(Random, 100, 1000) * RandomMultiplier; int numGroups = TestUtil.NextInt32(Random, 1, numDocs); if (Verbose) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } JCG.List <BytesRef> groups = new JCG.List <BytesRef>(); for (int i = 0; i < numGroups; i++) { string randomValue; do { // B/c of DV based impl we can't see the difference between an empty string and a null value. // For that reason we don't generate empty string groups. randomValue = TestUtil.RandomRealisticUnicodeString(Random); } while ("".Equals(randomValue, StringComparison.Ordinal)); groups.Add(new BytesRef(randomValue)); } string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)]; if (Verbose) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); sb.append("real").append(Random.nextInt(3)).append(' '); int fakeCount = Random.nextInt(10); for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) { sb.append("fake "); } contentStrings[contentIDX] = sb.toString(); if (Verbose) { Console.WriteLine(" content=" + sb.toString()); } } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); bool preFlex = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool canUseIDV = !preFlex; DocValuesType valueType = vts[Random.nextInt(vts.Length)]; Document doc = new Document(); Document docNoGroup = new Document(); Field group = NewStringField("group", "", Field.Store.NO); doc.Add(group); Field valuesField = null; if (canUseIDV) { switch (valueType) { case DocValuesType.BINARY: valuesField = new BinaryDocValuesField("group_dv", new BytesRef()); break; case DocValuesType.SORTED: valuesField = new SortedDocValuesField("group_dv", new BytesRef()); break; default: fail("unhandled type"); break; } doc.Add(valuesField); } Field sort1 = NewStringField("sort1", "", Field.Store.NO); doc.Add(sort1); docNoGroup.Add(sort1); Field sort2 = NewStringField("sort2", "", Field.Store.NO); doc.Add(sort2); docNoGroup.Add(sort2); Field sort3 = NewStringField("sort3", "", Field.Store.NO); doc.Add(sort3); docNoGroup.Add(sort3); Field content = NewTextField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); Int32Field id = new Int32Field("id", 0, Field.Store.NO); doc.Add(id); docNoGroup.Add(id); GroupDoc[] groupDocs = new GroupDoc[numDocs]; for (int i = 0; i < numDocs; i++) { BytesRef groupValue; if (Random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: groupValue = null; } else { groupValue = groups[Random.nextInt(groups.size())]; } GroupDoc groupDoc = new GroupDoc( i, groupValue, groups[Random.nextInt(groups.size())], groups[Random.nextInt(groups.size())], new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)), contentStrings[Random.nextInt(contentStrings.Length)] ); if (Verbose) { Console.WriteLine(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group is null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString()); } groupDocs[i] = groupDoc; if (groupDoc.group != null) { group.SetStringValue(groupDoc.group.Utf8ToString()); if (canUseIDV) { valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString())); } } sort1.SetStringValue(groupDoc.sort1.Utf8ToString()); sort2.SetStringValue(groupDoc.sort2.Utf8ToString()); sort3.SetStringValue(groupDoc.sort3.Utf8ToString()); content.SetStringValue(groupDoc.content); id.SetInt32Value(groupDoc.id); if (groupDoc.group is null) { w.AddDocument(docNoGroup); } else { w.AddDocument(doc); } } DirectoryReader r = w.GetReader(); w.Dispose(); // NOTE: intentional but temporary field cache insanity! FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false); int[] fieldIdToDocID = new int[numDocs]; for (int i = 0; i < numDocs; i++) { int fieldId = docIdToFieldId.Get(i); fieldIdToDocID[fieldId] = i; } try { IndexSearcher s = NewSearcher(r); if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(s.IndexReader.GetType())) { canUseIDV = false; } else { canUseIDV = !preFlex; } for (int contentID = 0; contentID < 3; contentID++) { ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs; foreach (ScoreDoc hit in hits) { GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)]; assertTrue(gd.score == 0.0); gd.score = hit.Score; int docId = gd.id; assertEquals(docId, docIdToFieldId.Get(hit.Doc)); } } foreach (GroupDoc gd in groupDocs) { assertTrue(gd.score != 0.0); } for (int searchIter = 0; searchIter < 100; searchIter++) { if (Verbose) { Console.WriteLine("TEST: searchIter=" + searchIter); } string searchTerm = "real" + Random.nextInt(3); bool sortByScoreOnly = Random.nextBoolean(); Sort sortWithinGroup = GetRandomSort(sortByScoreOnly); AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType); s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector); int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID); int[] actualGroupHeads = allGroupHeadsCollector.RetrieveGroupHeads(); // The actual group heads contains Lucene ids. Need to change them into our id value. for (int i = 0; i < actualGroupHeads.Length; i++) { actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]); } // Allows us the easily iterate and assert the actual and expected results. Array.Sort(expectedGroupHeads); Array.Sort(actualGroupHeads); if (Verbose) { Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name); Console.WriteLine("Sort within group: " + sortWithinGroup); Console.WriteLine("Num group: " + numGroups); Console.WriteLine("Num doc: " + numDocs); Console.WriteLine("\n=== Expected: \n"); foreach (int expectedDocId in expectedGroupHeads) { GroupDoc expectedGroupDoc = groupDocs[expectedDocId]; string expectedGroup = expectedGroupDoc.group is null ? null : expectedGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(), expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId) ); } Console.WriteLine("\n=== Actual: \n"); foreach (int actualDocId in actualGroupHeads) { GroupDoc actualGroupDoc = groupDocs[actualDocId]; string actualGroup = actualGroupDoc.group is null ? null : actualGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(), actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId) ); } Console.WriteLine("\n==================================================================================="); } assertArrayEquals(expectedGroupHeads, actualGroupHeads); } } finally { QueryUtils.PurgeFieldCache(r); } r.Dispose(); dir.Dispose(); } }
public virtual void TestMixedRangeAndNonRangeTaxonomy() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); Directory td = NewDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.Add(new NumericDocValuesField("field", l)); // For drill down by numeric range: doc.Add(new Int64Field("field", l, Field.Store.NO)); if ((l & 3) == 0) { doc.Add(new FacetField("dim", "a")); } else { doc.Add(new FacetField("dim", "b")); } w.AddDocument(config.Build(tw, doc)); } IndexReader r = w.GetReader(); var tr = new DirectoryTaxonomyReader(tw); IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + s); } DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); // First search, no drill downs: DrillDownQuery ddq = new DrillDownQuery(config); DrillSidewaysResult dsr = ds.Search(null, ddq, 10); Assert.AreEqual(100, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Second search, drill down on dim=b: ddq = new DrillDownQuery(config); ddq.Add("dim", "b"); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(75, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Third search, drill down on "less than or equal to 10": ddq = new DrillDownQuery(config); ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true)); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(11, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Dispose(tw, tr, td, w, r, d); }
public virtual void TestRandomDoubles() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int numDocs = AtLeast(1000); double[] values = new double[numDocs]; double minValue = double.PositiveInfinity; double maxValue = double.NegativeInfinity; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); double v = Random.NextDouble(); values[i] = v; doc.Add(new DoubleDocValuesField("field", v)); doc.Add(new DoubleField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt32(Random, 1, 5); DoubleRange[] ranges = new DoubleRange[numRange]; int[] expectedCounts = new int[numRange]; double minAcceptedValue = double.PositiveInfinity; double maxAcceptedValue = double.NegativeInfinity; for (int rangeID = 0; rangeID < numRange; rangeID++) { double min; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { min = prevRange.Min; } else { min = prevRange.Max; } } else { min = Random.NextDouble(); } double max; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { max = prevRange.Min; } else { max = prevRange.Max; } } else { max = Random.NextDouble(); } if (min > max) { double x = min; min = max; max = x; } bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random.NextBoolean(); maxIncl = Random.NextBoolean(); } ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random.NextBoolean()) { if (Random.NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new DoubleFieldSource("field"); Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.Label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.Value); DoubleRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random.NextBoolean()) { if (Random.NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Dispose(w, r, dir); }
public virtual void TestPayloadsPos0() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer()); Document doc = new Document(); doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k"))); writer.AddDocument(doc); IndexReader readerFromWriter = writer.Reader; AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter); DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a")); int count = 0; Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // "a" occurs 4 times Assert.AreEqual(4, tp.Freq()); Assert.AreEqual(0, tp.NextPosition()); Assert.AreEqual(1, tp.NextPosition()); Assert.AreEqual(3, tp.NextPosition()); Assert.AreEqual(6, tp.NextPosition()); // only one doc has "a" Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc()); IndexSearcher @is = NewSearcher(readerFromWriter); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; bool sawZero = false; if (VERBOSE) { Console.WriteLine("\ngetPayloadSpans test"); } Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); while (pspans.Next()) { if (VERBOSE) { Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End()); } var payloads = pspans.Payload; sawZero |= pspans.Start() == 0; foreach (var bytes in payloads) { count++; if (VERBOSE) { Console.WriteLine(" payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes)); } } } Assert.IsTrue(sawZero); Assert.AreEqual(5, count); // System.out.println("\ngetSpans test"); Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); count = 0; sawZero = false; while (spans.Next()) { count++; sawZero |= spans.Start() == 0; // System.out.println(spans.Doc() + " - " + spans.Start() + " - " + // spans.End()); } Assert.AreEqual(4, count); Assert.IsTrue(sawZero); // System.out.println("\nPayloadSpanUtil test"); sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext); var pls = psu.GetPayloadsForQuery(snq); count = pls.Count; foreach (var bytes in pls) { string s = Encoding.UTF8.GetString(bytes); //System.out.println(s); sawZero |= s.Equals("pos: 0"); } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); writer.Dispose(); @is.IndexReader.Dispose(); dir.Dispose(); }
internal virtual void TestSort(bool useFrom, bool VERBOSE) { IndexReader reader = null; Directory dir = null; if (!VERBOSE) { Console.WriteLine("Verbosity disabled. Enable manually if needed."); } int numDocs = VERBOSE ? AtLeast(50) : AtLeast(1000); //final int numDocs = AtLeast(50); string[] tokens = new string[] { "a", "b", "c", "d", "e" }; if (VERBOSE) { Console.WriteLine("TEST: make index"); } { dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); // w.setDoRandomForceMerge(false); // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100)); string[] content = new string[AtLeast(20)]; for (int contentIDX = 0; contentIDX < content.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); int numTokens = TestUtil.NextInt32(Random, 1, 10); for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) { sb.Append(tokens[Random.Next(tokens.Length)]).Append(' '); } content[contentIDX] = sb.ToString(); } for (int docIDX = 0; docIDX < numDocs; docIDX++) { Document doc = new Document(); doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO)); doc.Add(NewTextField("text", content[Random.Next(content.Length)], Field.Store.NO)); doc.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO)); int intValue; if (Random.Next(100) == 17) { intValue = int.MinValue; } else if (Random.Next(100) == 17) { intValue = int.MaxValue; } else { intValue = Random.Next(); } doc.Add(new Int32Field("int", intValue, Field.Store.NO)); if (VERBOSE) { Console.WriteLine(" doc=" + doc); } w.AddDocument(doc); } reader = w.GetReader(); w.Dispose(); } // NOTE: sometimes reader has just one segment, which is // important to test IndexSearcher searcher = NewSearcher(reader); IndexReaderContext ctx = searcher.TopReaderContext; ShardSearcher[] subSearchers; int[] docStarts; if (ctx is AtomicReaderContext) { subSearchers = new ShardSearcher[1]; docStarts = new int[1]; subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx); docStarts[0] = 0; } else { CompositeReaderContext compCTX = (CompositeReaderContext)ctx; int size = compCTX.Leaves.Count; subSearchers = new ShardSearcher[size]; docStarts = new int[size]; int docBase = 0; for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++) { AtomicReaderContext leave = compCTX.Leaves[searcherIDX]; subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX); docStarts[searcherIDX] = docBase; docBase += leave.Reader.MaxDoc; } } IList <SortField> sortFields = new List <SortField>(); sortFields.Add(new SortField("string", SortFieldType.STRING, true)); sortFields.Add(new SortField("string", SortFieldType.STRING, false)); sortFields.Add(new SortField("int", SortFieldType.INT32, true)); sortFields.Add(new SortField("int", SortFieldType.INT32, false)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, true)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, false)); sortFields.Add(new SortField(null, SortFieldType.SCORE, true)); sortFields.Add(new SortField(null, SortFieldType.SCORE, false)); sortFields.Add(new SortField(null, SortFieldType.DOC, true)); sortFields.Add(new SortField(null, SortFieldType.DOC, false)); for (int iter = 0; iter < 1000 * RANDOM_MULTIPLIER; iter++) { // TODO: custom FieldComp... Query query = new TermQuery(new Term("text", tokens[Random.Next(tokens.Length)])); Sort sort; if (Random.Next(10) == 4) { // Sort by score sort = null; } else { SortField[] randomSortFields = new SortField[TestUtil.NextInt32(Random, 1, 3)]; for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++) { randomSortFields[sortIDX] = sortFields[Random.Next(sortFields.Count)]; } sort = new Sort(randomSortFields); } int numHits = TestUtil.NextInt32(Random, 1, numDocs + 5); //final int numHits = 5; if (VERBOSE) { Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits); } int from = -1; int size = -1; // First search on whole index: TopDocs topHits; if (sort == null) { if (useFrom) { TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random.NextBoolean()); searcher.Search(query, c); from = TestUtil.NextInt32(Random, 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = searcher.Search(query, numHits); } } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean()); searcher.Search(query, c); if (useFrom) { from = TestUtil.NextInt32(Random, 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = c.GetTopDocs(0, numHits); } } if (VERBOSE) { if (useFrom) { Console.WriteLine("from=" + from + " size=" + size); } Console.WriteLine(" top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore)); if (topHits.ScoreDocs != null) { for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = topHits.ScoreDocs[hitIDX]; Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } // ... then all shards: Weight w = searcher.CreateNormalizedWeight(query); TopDocs[] shardHits = new TopDocs[subSearchers.Length]; for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) { TopDocs subHits; ShardSearcher subSearcher = subSearchers[shardIDX]; if (sort == null) { subHits = subSearcher.Search(w, numHits); } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean()); subSearcher.Search(w, c); subHits = c.GetTopDocs(0, numHits); } shardHits[shardIDX] = subHits; if (VERBOSE) { Console.WriteLine(" shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString())); if (subHits.ScoreDocs != null) { foreach (ScoreDoc sd in subHits.ScoreDocs) { Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } } // Merge: TopDocs mergedHits; if (useFrom) { mergedHits = TopDocs.Merge(sort, from, size, shardHits); } else { mergedHits = TopDocs.Merge(sort, numHits, shardHits); } if (mergedHits.ScoreDocs != null) { // Make sure the returned shards are correct: for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = mergedHits.ScoreDocs[hitIDX]; Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard"); } } TestUtil.AssertEquals(topHits, mergedHits); } reader.Dispose(); dir.Dispose(); }