public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer (Random())); iwc.SetMergePolicy(NewLogMergePolicy()); var iw = new RandomIndexWriter(Random(), dir, iwc); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store .NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); iw.ForceMerge(1); reader = iw.Reader; iw.Dispose(); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int num = AtLeast(3); int skip = Random().Next(num); var terms = new List<Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.Reader; w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves.First(); TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality()); reader.Dispose(); dir.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetCodec(new Lucene46Codec()); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); // these fields should sometimes get term vectors, etc Field idField = NewStringField("id", "", Field.Store.NO); Field bodyField = NewTextField("body", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 5); doc.Add(idField); doc.Add(bodyField); doc.Add(dvField); for (int i = 0; i < 100; i++) { idField.StringValue = Convert.ToString(i); bodyField.StringValue = TestUtil.RandomUnicodeString(Random()); riw.AddDocument(doc); if (Random().Next(7) == 0) { riw.Commit(); } // TODO: we should make a new format with a clean header... // if (Random().nextInt(20) == 0) { // riw.DeleteDocuments(new Term("id", Integer.toString(i))); // } } riw.Dispose(); CheckHeaders(dir); dir.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true), Similarity, TimeZone); for (int i = 900; i < 1112; i++) { Document doc = new Document(); string num = Regex.Replace(Regex.Replace(English.IntToEnglish(i), "[-]", " "), "[,]", ""); doc.Add(NewTextField("numbers", num, Field.Store.NO)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(NewTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(NewTextField("numbers", "hundredeight eightyeight yeight", Field.Store.NO)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(NewTextField("numbers", "tres y cinco", Field.Store.NO)); writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); }
public void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public override void SetUp() { base.SetUp(); // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.YES); doc.Add(field); Terms = new SortedSet<BytesRef>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; Terms.Add(new BytesRef(s)); writer.AddDocument(doc); } TermsAutomaton = BasicAutomata.MakeStringUnion(Terms); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.OmitNorms = true; Field field = NewField("field", "", customType); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; Field f = NewField("foo", "this is a test test", ft); doc.Add(f); for (int i = 0; i < 100; i++) { w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS); while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(2, de.Freq()); } reader.Dispose(); dir.Dispose(); }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int numDocs = TestUtil.NextInt(Random(), 2049, 4000); for (int i = 0; i < numDocs; i++) { var document = new Document { NewTextField("english", English.IntToEnglish(i), Field.Store.NO), NewTextField("oddeven", (i%2 == 0) ? "even" : "odd", Field.Store.NO ), NewStringField("byte", string.Empty + (unchecked((byte) Random().Next ())), Field.Store.NO), NewStringField("short", string.Empty + ((short) Random().Next()), Field.Store .NO), new IntField("int", Random().Next(), Field.Store.NO), new LongField("long", Random().NextLong(), Field.Store.NO), new FloatField("float", Random().NextFloat(), Field.Store.NO), new DoubleField("double", Random().NextDouble(), Field.Store.NO), new NumericDocValuesField("intdocvalues", Random().Next()), new FloatDocValuesField("floatdocvalues", Random().NextFloat()) }; iw.AddDocument(document); } reader = iw.Reader; iw.Dispose(); searcher = NewSearcher(reader); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store .NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); reader = iw.Reader; searcher = new IndexSearcher(reader); iw.Dispose(); }
public virtual void TestPrefixQuery_Mem() { Directory directory = NewDirectory(); string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("category", categories[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "One in /Computers/Mac"); query = new PrefixQuery(new Term("category", "")); Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category"); Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "everything"); writer.Dispose(); reader.Dispose(); directory.Dispose(); }
public void TestReverse() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("value", "foo", Field.Store.NO)); doc.Add(NewStringField("value", "bar", Field.Store.NO)); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("value", "baz", Field.Store.NO)); doc.Add(NewStringField("id", "2", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortedSetSortField("value", true)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); assertEquals(2, td.TotalHits); // 'bar' comes before 'baz' assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id")); assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id")); ir.Dispose(); dir.Dispose(); }
public virtual void TestMethod() { Directory directory = NewDirectory(); string[] values = new string[] { "1", "2", "3", "4" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < values.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(FIELD, values[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; writer.Dispose(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = NewSearcher(ir); ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Number of matched documents"); ir.Dispose(); directory.Dispose(); }
public void TestInternalLevenshteinDistance() { DirectSpellChecker spellchecker = new DirectSpellChecker(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, true), Similarity, TimeZone); string[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoið‘" }; for (int i = 0; i < termsToAdd.Length; i++) { Document doc = new Document(); doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; string misspelled = "metanoix"; SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length == 4); IStringDistance sd = spellchecker.Distance; assertTrue(sd is LuceneLevenshteinDistance); foreach (SuggestWord word in similar) { assertTrue(word.Score == sd.GetDistance(word.String, misspelled)); assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()? } ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestString() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewStringField("value", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("value", "bar", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(2, td.TotalHits); // 'bar' comes before 'foo' Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); ir.Dispose(); dir.Dispose(); }
public virtual void TestRollbackIntegrityWithBufferFlush() { Directory dir = NewDirectory(); RandomIndexWriter rw = new RandomIndexWriter(Random(), dir); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES)); rw.AddDocument(doc); } rw.Dispose(); // If buffer size is small enough to cause a flush, errors ensue... IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND)); for (int i = 0; i < 3; i++) { Document doc = new Document(); string value = Convert.ToString(i); doc.Add(NewStringField("pk", value, Field.Store.YES)); doc.Add(NewStringField("text", "foo", Field.Store.YES)); w.UpdateDocument(new Term("pk", value), doc); } w.Rollback(); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback"); r.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); INDEX_SIZE = AtLeast(2000); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index); RandomGen random = new RandomGen(this, Random()); for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the { // problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) // some documents must not have an entry in the first { // sort field doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES)); } if ((i % 7) == 0) // some documents to match the query (see below) { doc.Add(NewTextField("content", "test", Field.Store.YES)); } // every document has a defined 'mandant' field doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Query = new TermQuery(new Term("content", "test")); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.NO); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: setUp searcher=" + Searcher); } }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); LittleReader = DirectoryReader.Open(Directory); Searcher = NewSearcher(LittleReader); // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one) Searcher.Similarity = new DefaultSimilarity(); // Make big index Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT)); // First multiply small test index: MulFactor = 1; int docCount = 0; if (VERBOSE) { Console.WriteLine("\nTEST: now copy index..."); } do { if (VERBOSE) { Console.WriteLine("\nTEST: cycle..."); } Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT)); RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2); w.AddIndexes(copy); docCount = w.MaxDoc(); w.Dispose(); MulFactor *= 2; } while (docCount < 3000); RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc_ = new Document(); doc_.Add(NewTextField("field2", "xxx", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } doc_ = new Document(); doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } Reader = riw.Reader; BigSearcher = NewSearcher(Reader); riw.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new TextField("eng", new BugReproTokenStream())); riw.AddDocument(doc); riw.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorPositions = Random().NextBoolean(); ft.StoreTermVectorOffsets = Random().NextBoolean(); } Token[] tokens = new Token[] { MakeToken("a", 1, 0, 6), MakeToken("b", 1, 8, 9), MakeToken("a", 1, 9, 17), MakeToken("c", 1, 19, 50) }; doc.Add(new Field("content", new CannedTokenStream(tokens), ft)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("a")); Assert.IsNotNull(dp); Assert.AreEqual(0, dp.NextDoc()); Assert.AreEqual(2, dp.Freq()); Assert.AreEqual(0, dp.NextPosition()); Assert.AreEqual(0, dp.StartOffset()); Assert.AreEqual(6, dp.EndOffset()); Assert.AreEqual(2, dp.NextPosition()); Assert.AreEqual(9, dp.StartOffset()); Assert.AreEqual(17, dp.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("b")); Assert.IsNotNull(dp); Assert.AreEqual(0, dp.NextDoc()); Assert.AreEqual(1, dp.Freq()); Assert.AreEqual(1, dp.NextPosition()); Assert.AreEqual(8, dp.StartOffset()); Assert.AreEqual(9, dp.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("c")); Assert.IsNotNull(dp); Assert.AreEqual(0, dp.NextDoc()); Assert.AreEqual(1, dp.Freq()); Assert.AreEqual(3, dp.NextPosition()); Assert.AreEqual(19, dp.StartOffset()); Assert.AreEqual(50, dp.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); r.Dispose(); dir.Dispose(); }
public virtual void TestBefore() { // create an index Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); long now = DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond; Document doc = new Document(); // add time that is in the past doc.Add(NewStringField("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES)); doc.Add(NewTextField("body", "Today is a very sunny day in New York City", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // filter that should preserve matches // DateFilter df1 = DateFilter.Before("datefield", now); TermRangeFilter df1 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches // DateFilter df2 = DateFilter.Before("datefield", now - 999999); TermRangeFilter df2 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForthis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); reader.Dispose(); indexStore.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField(FN, "the quick brown fox jumps over the lazy ??? dog 493432 49344", Field.Store.NO)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), Similarity, TimeZone); AddDoc(writer, @"admin guest", @"010", @"20040101", @"Y"); AddDoc(writer, @"guest", @"020", @"20040101", @"Y"); AddDoc(writer, @"guest", @"020", @"20050101", @"Y"); AddDoc(writer, @"admin", @"020", @"20050101", @"Maybe"); AddDoc(writer, @"admin guest", @"030", @"20050101", @"N"); reader = SlowCompositeReaderWrapper.Wrap(writer.Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); // Add series of docs with specific information for MoreLikeThis AddDoc(writer, "lucene"); AddDoc(writer, "lucene release"); reader = writer.Reader; writer.Dispose(); searcher = NewSearcher(reader); }
public override void SetUp() { base.SetUp(); // create test index MDirectory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), MDirectory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); AddDocument(writer, "1", "I think it should work."); AddDocument(writer, "2", "I think it should work."); AddDocument(writer, "3", "I think it should work."); AddDocument(writer, "4", "I think it should work."); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); Searcher.Similarity = new DefaultSimilarity(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir, Similarity, TimeZone); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.Add(NewStringField("field", Convert.ToString(i), Field.Store.NO)); doc.Add(NewStringField("field2", Convert.ToString(i % 2 == 0), Field.Store.NO)); iw.AddDocument(doc); } Reader = iw.Reader; iw.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, DocFields[i], Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConfig); Document document = new Document(); Field idField = new StringField("id", "", Field.Store.NO); document.Add(idField); iw.AddDocument(document); ir = iw.Reader; @is = NewSearcher(ir); iw.Dispose(); }
public override void BeforeClass() { base.BeforeClass(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); LittleReader = DirectoryReader.Open(Directory); Searcher = NewSearcher(LittleReader); // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one) Searcher.Similarity = new DefaultSimilarity(); // Make big index Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT)); // First multiply small test index: MulFactor = 1; int docCount = 0; if (VERBOSE) { Console.WriteLine("\nTEST: now copy index..."); } do { if (VERBOSE) { Console.WriteLine("\nTEST: cycle..."); } Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT)); RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2, Similarity, TimeZone); w.AddIndexes(copy); docCount = w.MaxDoc; w.Dispose(); MulFactor *= 2; } while (docCount < 3000); RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc_ = new Document(); doc_.Add(NewTextField("field2", "xxx", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } doc_ = new Document(); doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } Reader = riw.Reader; BigSearcher = NewSearcher(Reader); riw.Dispose(); }
public void TestSimpleWithScoring() { const string idField = "id"; const string toField = "movieId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "A random movie", Field.Store.NO)); doc.Add(new TextField("name", "Movie 1", Field.Store.NO)); doc.Add(new TextField(idField, "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "A second random movie", Field.Store.NO)); doc.Add(new TextField("name", "Movie 2", Field.Store.NO)); doc.Add(new TextField(idField, "4", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for movie via subtitle Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max); TopDocs result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(0, result.ScoreDocs[0].Doc); assertEquals(3, result.ScoreDocs[1].Doc); // Score mode max. joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Max); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); assertEquals(0, result.ScoreDocs[1].Doc); // Score mode total joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Total); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(0, result.ScoreDocs[0].Doc); assertEquals(3, result.ScoreDocs[1].Doc); //Score mode avg joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Avg); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); assertEquals(0, result.ScoreDocs[1].Doc); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
public virtual void TestSimple() { Random random = Random(); DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.NUMERIC, DocValuesType.BINARY, DocValuesType.SORTED, }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())); bool canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; Document doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random text", Field.Store.NO)); doc.Add(new StringField("id", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text blob", Field.Store.NO)); doc.Add(new StringField("id", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "2", dvType); doc.Add(new TextField("content", "some more random textual data", Field.Store.NO)); doc.Add(new StringField("id", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddField(doc, groupField, "2", dvType); doc.Add(new TextField("content", "some random text", Field.Store.NO)); doc.Add(new StringField("id", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text", Field.Store.NO)); doc.Add(new StringField("id", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random blob", Field.Store.NO)); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); AddField(doc, countField, "1", dvType); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.Reader); w.Dispose(); var cmp = new ComparerAnonymousHelper1(this); // === Search for content:random IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector); //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; // LUCENENET TODO: Try to work out how to do this without an O(n) operation var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(4, gcs.Count); CompareNull(gcs[0].GroupValue); List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); Compare("1", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); countValues.Sort(nullComparer); assertEquals(2, countValues.size()); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[3].GroupValue); countValues = new List <IComparable>(gcs[3].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:some firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(3, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(2, countValues.size()); countValues.Sort(nullComparer); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:blob firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(2, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); // B/c the only one document matched with blob inside the author 1 group assertEquals(1, countValues.Count); Compare("1", countValues[0]); Compare("3", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.Count); Compare("1", countValues[0]); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); AllSortFields = new List <SortField>(Arrays.AsList(new SortField[] { new SortField("byte", SortField.Type_e.BYTE, false), new SortField("short", SortField.Type_e.SHORT, false), new SortField("int", SortField.Type_e.INT, false), new SortField("long", SortField.Type_e.LONG, false), new SortField("float", SortField.Type_e.FLOAT, false), new SortField("double", SortField.Type_e.DOUBLE, false), new SortField("bytes", SortField.Type_e.STRING, false), new SortField("bytesval", SortField.Type_e.STRING_VAL, false), new SortField("byte", SortField.Type_e.BYTE, true), new SortField("short", SortField.Type_e.SHORT, true), new SortField("int", SortField.Type_e.INT, true), new SortField("long", SortField.Type_e.LONG, true), new SortField("float", SortField.Type_e.FLOAT, true), new SortField("double", SortField.Type_e.DOUBLE, true), new SortField("bytes", SortField.Type_e.STRING, true), new SortField("bytesval", SortField.Type_e.STRING_VAL, true), SortField.FIELD_SCORE, SortField.FIELD_DOC })); if (SupportsDocValues) { AllSortFields.AddRange(Arrays.AsList(new SortField[] { new SortField("intdocvalues", SortField.Type_e.INT, false), new SortField("floatdocvalues", SortField.Type_e.FLOAT, false), new SortField("sortedbytesdocvalues", SortField.Type_e.STRING, false), new SortField("sortedbytesdocvaluesval", SortField.Type_e.STRING_VAL, false), new SortField("straightbytesdocvalues", SortField.Type_e.STRING_VAL, false), new SortField("intdocvalues", SortField.Type_e.INT, true), new SortField("floatdocvalues", SortField.Type_e.FLOAT, true), new SortField("sortedbytesdocvalues", SortField.Type_e.STRING, true), new SortField("sortedbytesdocvaluesval", SortField.Type_e.STRING_VAL, true), new SortField("straightbytesdocvalues", SortField.Type_e.STRING_VAL, true) })); } // Also test missing first / last for the "string" sorts: foreach (string field in new string[] { "bytes", "sortedbytesdocvalues" }) { for (int rev = 0; rev < 2; rev++) { bool reversed = rev == 0; SortField sf = new SortField(field, SortField.Type_e.STRING, reversed); sf.MissingValue = SortField.STRING_FIRST; AllSortFields.Add(sf); sf = new SortField(field, SortField.Type_e.STRING, reversed); sf.MissingValue = SortField.STRING_LAST; AllSortFields.Add(sf); } } int limit = AllSortFields.Count; for (int i = 0; i < limit; i++) { SortField sf = AllSortFields[i]; if (sf.Type == SortField.Type_e.INT) { SortField sf2 = new SortField(sf.Field, SortField.Type_e.INT, sf.Reverse); sf2.MissingValue = Random().Next(); AllSortFields.Add(sf2); } else if (sf.Type == SortField.Type_e.LONG) { SortField sf2 = new SortField(sf.Field, SortField.Type_e.LONG, sf.Reverse); sf2.MissingValue = Random().NextLong(); AllSortFields.Add(sf2); } else if (sf.Type == SortField.Type_e.FLOAT) { SortField sf2 = new SortField(sf.Field, SortField.Type_e.FLOAT, sf.Reverse); sf2.MissingValue = (float)Random().NextDouble(); AllSortFields.Add(sf2); } else if (sf.Type == SortField.Type_e.DOUBLE) { SortField sf2 = new SortField(sf.Field, SortField.Type_e.DOUBLE, sf.Reverse); sf2.MissingValue = Random().NextDouble(); AllSortFields.Add(sf2); } } Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(200); for (int i = 0; i < numDocs; i++) { IList <Field> fields = new List <Field>(); fields.Add(NewTextField("english", English.IntToEnglish(i), Field.Store.NO)); fields.Add(NewTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); fields.Add(NewStringField("byte", "" + ((sbyte)Random().Next()), Field.Store.NO)); fields.Add(NewStringField("short", "" + ((short)Random().Next()), Field.Store.NO)); fields.Add(new IntField("int", Random().Next(), Field.Store.NO)); fields.Add(new LongField("long", Random().NextLong(), Field.Store.NO)); fields.Add(new FloatField("float", (float)Random().NextDouble(), Field.Store.NO)); fields.Add(new DoubleField("double", Random().NextDouble(), Field.Store.NO)); fields.Add(NewStringField("bytes", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO)); fields.Add(NewStringField("bytesval", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO)); fields.Add(new DoubleField("double", Random().NextDouble(), Field.Store.NO)); if (SupportsDocValues) { fields.Add(new NumericDocValuesField("intdocvalues", Random().Next())); fields.Add(new FloatDocValuesField("floatdocvalues", (float)Random().NextDouble())); fields.Add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())))); fields.Add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())))); fields.Add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())))); } Document document = new Document(); document.Add(new StoredField("id", "" + i)); if (VERBOSE) { Console.WriteLine(" add doc id=" + i); } foreach (Field field in fields) { // So we are sometimes missing that field: if (Random().Next(5) != 4) { document.Add(field); if (VERBOSE) { Console.WriteLine(" " + field); } } } iw.AddDocument(document); if (Random().Next(50) == 17) { iw.Commit(); } } Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); if (VERBOSE) { Console.WriteLine(" searcher=" + Searcher); } }
public virtual void TestPostings() { Directory dir = NewFSDirectory(CreateTempDir("postings")); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetCodec(Codec.ForName("Lucene40")); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); // id field FieldType idType = new FieldType(StringField.TYPE_NOT_STORED); idType.StoreTermVectors = true; Field idField = new Field("id", "", idType); doc.Add(idField); // title field: short text field FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED); titleType.StoreTermVectors = true; titleType.StoreTermVectorPositions = true; titleType.StoreTermVectorOffsets = true; titleType.IndexOptions = IndexOptions(); Field titleField = new Field("title", "", titleType); doc.Add(titleField); // body field: long text field FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED); bodyType.StoreTermVectors = true; bodyType.StoreTermVectorPositions = true; bodyType.StoreTermVectorOffsets = true; bodyType.IndexOptions = IndexOptions(); Field bodyField = new Field("body", "", bodyType); doc.Add(bodyField); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { idField.SetStringValue(Convert.ToString(i)); titleField.SetStringValue(FieldValue(1)); bodyField.SetStringValue(FieldValue(3)); iw.AddDocument(doc); if (Random().Next(20) == 0) { iw.DeleteDocuments(new Term("id", Convert.ToString(i))); } } if (Random().NextBoolean()) { // delete 1-100% of docs iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)])); } iw.Dispose(); dir.Dispose(); // checkindex }
public virtual void TestDeMorgan() { Directory dir1 = NewDirectory(); RandomIndexWriter iw1 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir1); Document doc1 = new Document(); doc1.Add(NewTextField("field", "foo bar", Field.Store.NO)); iw1.AddDocument(doc1); IndexReader reader1 = iw1.GetReader(); iw1.Dispose(); Directory dir2 = NewDirectory(); RandomIndexWriter iw2 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir2); Document doc2 = new Document(); doc2.Add(NewTextField("field", "foo baz", Field.Store.NO)); iw2.AddDocument(doc2); IndexReader reader2 = iw2.GetReader(); iw2.Dispose(); BooleanQuery query = new BooleanQuery(); // Query: +foo -ba* query.Add(new TermQuery(new Term("field", "foo")), Occur.MUST); WildcardQuery wildcardQuery = new WildcardQuery(new Term("field", "ba*")); wildcardQuery.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query.Add(wildcardQuery, Occur.MUST_NOT); MultiReader multireader = new MultiReader(reader1, reader2); IndexSearcher searcher = NewSearcher(multireader); Assert.AreEqual(0, searcher.Search(query, 10).TotalHits); Task foo = new Task(TestDeMorgan); TaskScheduler es = TaskScheduler.Default; searcher = new IndexSearcher(multireader, es); if (VERBOSE) { Console.WriteLine("rewritten form: " + searcher.Rewrite(query)); } Assert.AreEqual(0, searcher.Search(query, 10).TotalHits); multireader.Dispose(); reader1.Dispose(); reader2.Dispose(); dir1.Dispose(); dir2.Dispose(); }
public virtual void TestBS2DisjunctionNextVsAdvance() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); int numDocs = AtLeast(300); for (int docUpto = 0; docUpto < numDocs; docUpto++) { string contents = "a"; if (Random.Next(20) <= 16) { contents += " b"; } if (Random.Next(20) <= 8) { contents += " c"; } if (Random.Next(20) <= 4) { contents += " d"; } if (Random.Next(20) <= 2) { contents += " e"; } if (Random.Next(20) <= 1) { contents += " f"; } Document doc = new Document(); doc.Add(new TextField("field", contents, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); w.Dispose(); for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++) { if (VERBOSE) { Console.WriteLine("iter=" + iter); } IList <string> terms = new List <string>(Arrays.AsList("a", "b", "c", "d", "e", "f")); int numTerms = TestUtil.NextInt32(Random, 1, terms.Count); while (terms.Count > numTerms) { terms.RemoveAt(Random.Next(terms.Count)); } if (VERBOSE) { Console.WriteLine(" terms=" + terms); } BooleanQuery q = new BooleanQuery(); foreach (string term in terms) { q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD)); } Weight weight = s.CreateNormalizedWeight(q); Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null); // First pass: just use .NextDoc() to gather all hits IList <ScoreDoc> hits = new List <ScoreDoc>(); while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore())); } if (VERBOSE) { Console.WriteLine(" " + hits.Count + " hits"); } // Now, randomly next/advance through the list and // verify exact match: for (int iter2 = 0; iter2 < 10; iter2++) { weight = s.CreateNormalizedWeight(q); scorer = weight.GetScorer(s.m_leafContexts[0], null); if (VERBOSE) { Console.WriteLine(" iter2=" + iter2); } int upto = -1; while (upto < hits.Count) { int nextUpto; int nextDoc; int left = hits.Count - upto; if (left == 1 || Random.nextBoolean()) { // next nextUpto = 1 + upto; nextDoc = scorer.NextDoc(); } else { // advance int inc = TestUtil.NextInt32(Random, 1, left - 1); nextUpto = inc + upto; nextDoc = scorer.Advance(hits[nextUpto].Doc); } if (nextUpto == hits.Count) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc); } else { ScoreDoc hit = hits[nextUpto]; Assert.AreEqual(hit.Doc, nextDoc); // Test for precise float equality: Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore()); } upto = nextUpto; } } } r.Dispose(); d.Dispose(); }
public virtual void TestNullOrSubScorer() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(NewTextField("field", "a b c d", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); // this test relies upon coord being the default implementation, // otherwise scores are different! s.Similarity = new DefaultSimilarity(); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5F, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1 / 3F), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2 / 3F), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Dispose(); w.Dispose(); dir.Dispose(); }
public void TestFuzziness() { //every test with SlowFuzzyQuery.defaultMinSimilarity //is exercising the Automaton, not the brute force linear method Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, Similarity, TimeZone); addDoc("aaaaa", writer); addDoc("aaaab", writer); addDoc("aaabb", writer); addDoc("aabbb", writer); addDoc("abbbb", writer); addDoc("bbbbb", writer); addDoc("ddddd", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); // same with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // test scoring query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("3 documents should match", 3, hits.Length); List <String> order = Arrays.AsList("bbbbb", "abbbb", "aabbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // test pq size by supplying maxExpansions=2 // This query would normally return 3 documents, because 3 terms match (see above): query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("only 2 documents should match", 2, hits.Length); order = Arrays.AsList("bbbbb", "abbbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // query identical to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // different field = no match: query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); reader.Dispose(); directory.Dispose(); }
public void TestFuzzinessLong() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, Similarity, TimeZone); addDoc("aaaaaaa", writer); addDoc("segment", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query; // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // no match, more than half of the characters is wrong: query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" and "stellent" are indeed similar to "segment" by default: query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" doesn't match anymore thanks to increased minimum similarity: query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); try { query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f); fail("Expected IllegalArgumentException"); } #pragma warning disable 168 catch (ArgumentException e) #pragma warning restore 168 { // expecting exception } try { query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f); fail("Expected IllegalArgumentException"); } #pragma warning disable 168 catch (ArgumentException e) #pragma warning restore 168 { // expecting exception } reader.Dispose(); directory.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); long startTime = Environment.TickCount; // TODO: replace w/ the @nightly test data; make this // into an optional @nightly stress test Document doc = new Document(); Field body = NewTextField("body", "", Field.Store.NO); doc.Add(body); StringBuilder sb = new StringBuilder(); for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int numTerms = Random.Next(10); for (int termCount = 0; termCount < numTerms; termCount++) { sb.Append(Random.NextBoolean() ? "aaa" : "bbb"); sb.Append(' '); } body.SetStringValue(sb.ToString()); w.AddDocument(doc); sb.Remove(0, sb.Length); } IndexReader r = w.GetReader(); w.Dispose(); long endTime = Environment.TickCount; if (Verbose) { Console.WriteLine("BUILD took " + (endTime - startTime)); } IndexSearcher s = NewSearcher(r); AtomicBoolean failed = new AtomicBoolean(); AtomicInt64 netSearch = new AtomicInt64(); ThreadJob[] threads = new ThreadJob[NUM_SEARCH_THREADS]; for (int threadID = 0; threadID < NUM_SEARCH_THREADS; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, s, failed, netSearch); threads[threadID].IsBackground = (true); } foreach (ThreadJob t in threads) { t.Start(); } foreach (ThreadJob t in threads) { t.Join(); } if (Verbose) { Console.WriteLine(NUM_SEARCH_THREADS + " threads did " + netSearch + " searches"); } r.Dispose(); dir.Dispose(); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); #pragma warning disable 612, 618 doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); #pragma warning restore 612, 618 w.AddDocument(doc); IndexReader r = w.GetReader(); w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.GetTerms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Count); TermsEnum tvsEnum = tvs.GetIterator(null); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next()); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv", StringComparison.Ordinal)) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next()); Assert.IsNull(tvsEnum.Next()); } r.Dispose(); dir.Dispose(); }
private IndexContext CreateIndexContext() { Random random = Random(); DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.BINARY, DocValuesType.SORTED }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()) ); bool canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER; string[] groupValues = new string[numDocs / 5]; string[] countValues = new string[numDocs / 10]; for (int i = 0; i < groupValues.Length; i++) { groupValues[i] = GenerateRandomNonEmptyString(); } for (int i = 0; i < countValues.Length; i++) { countValues[i] = GenerateRandomNonEmptyString(); } List <string> contentStrings = new List <string>(); IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new HashMap <string, IDictionary <string, ISet <string> > >(); for (int i = 1; i <= numDocs; i++) { string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)]; string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)]; string content = "random" + random.nextInt(numDocs / 20); IDictionary <string, ISet <string> > groupToCounts; if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts)) { // Groups sort always DOCID asc... searchTermToGroupCounts.Add(content, groupToCounts = new LinkedHashMap <string, ISet <string> >()); contentStrings.Add(content); } ISet <string> countsVals; if (!groupToCounts.TryGetValue(groupValue, out countsVals)) { groupToCounts.Add(groupValue, countsVals = new HashSet <string>()); } countsVals.Add(countValue); Document doc = new Document(); doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES)); if (groupValue != null) { AddField(doc, groupField, groupValue, dvType); } if (countValue != null) { AddField(doc, countField, countValue, dvType); } doc.Add(new TextField("content", content, Field.Store.YES)); w.AddDocument(doc); } DirectoryReader reader = w.Reader; if (VERBOSE) { for (int docID = 0; docID < reader.MaxDoc; docID++) { Document doc = reader.Document(docID); Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher")); } } w.Dispose(); return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/))); }
public void assertFromTestData(int[] codePointTable) { if (VERBOSE) { Console.WriteLine("TEST: codePointTable=" + codePointTable); } Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt"); TextReader reader = new StreamReader(stream, Encoding.UTF8); int bits = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); int terms = (int)Math.Pow(2, bits); Store.Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); for (int i = 0; i < terms; i++) { field.SetStringValue(MapInt(codePointTable, i)); writer.AddDocument(doc); } IndexReader r = writer.GetReader(); IndexSearcher searcher = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + searcher); } // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation, // otherwise scores are different! searcher.Similarity = (new DefaultSimilarity()); writer.Dispose(); String line; int lineNum = 0; while ((line = reader.ReadLine()) != null) { lineNum++; String[] @params = line.Split(',').TrimEnd(); String query = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture)); int prefix = int.Parse(@params[1], CultureInfo.InvariantCulture); int pqSize = int.Parse(@params[2], CultureInfo.InvariantCulture); float minScore = float.Parse(@params[3], CultureInfo.InvariantCulture); #pragma warning disable 612, 618 SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix); #pragma warning restore 612, 618 q.MultiTermRewriteMethod = new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize); int expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); TopDocs docs = searcher.Search(q, expectedResults); assertEquals(expectedResults, docs.TotalHits); for (int i = 0; i < expectedResults; i++) { String[] scoreDoc = reader.ReadLine().Split(',').TrimEnd(); assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc); assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon); } } r.Dispose(); dir.Dispose(); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void Test() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); int charsToIndex = AtLeast(100000); int charsIndexed = 0; //System.out.println("bytesToIndex=" + charsToIndex); while (charsIndexed < charsToIndex) { Document doc = docs.NextDoc(); charsIndexed += doc.Get("body").Length; w.AddDocument(doc); //System.out.println(" bytes=" + charsIndexed + " add: " + doc); } IndexReader r = w.GetReader(); //System.out.println("numDocs=" + r.NumDocs); w.Dispose(); IndexSearcher s = NewSearcher(r); Terms terms = MultiFields.GetFields(r).GetTerms("body"); int termCount = 0; TermsEnum termsEnum = terms.GetIterator(null); while (termsEnum.Next() != null) { termCount++; } Assert.IsTrue(termCount > 0); // Target ~10 terms to search: double chance = 10.0 / termCount; termsEnum = terms.GetIterator(termsEnum); IDictionary <BytesRef, TopDocs> answers = new Dictionary <BytesRef, TopDocs>(); while (termsEnum.Next() != null) { if (Random.NextDouble() <= chance) { BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term); answers[term] = s.Search(new TermQuery(new Term("body", term)), 100); } } if (answers.Count > 0) { CountdownEvent startingGun = new CountdownEvent(1); int numThreads = TestUtil.NextInt32(Random, 2, 5); ThreadClass[] threads = new ThreadClass[numThreads]; for (int threadID = 0; threadID < numThreads; threadID++) { ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, s, answers, startingGun); threads[threadID] = thread; thread.Start(); } startingGun.Signal(); foreach (ThreadClass thread in threads) { thread.Join(); } } r.Dispose(); dir.Dispose(); }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (Verbose) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random.NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.GetReader(); w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (Verbose) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random.Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (Verbose) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random.Next(scoreModeLength); if (Verbose) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (Verbose) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult, topScoreDocCollector)); // Asserting bit set... if (Verbose) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality()); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality()); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.GetTopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (Verbose) { string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }
public void TestMinShouldMatch() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone); string[] docs = new string[] { @"this is the end of the world right", @"is this it or maybe not", @"this is the end of the universe as we know it", @"there is the famous restaurant at the end of the universe" }; for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES)); doc.Add(NewTextField(@"field", docs[i], Field.Store.NO)); w.AddDocument(doc); } IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 0.5F; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 2F; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 0.49F; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 1F; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); query.LowFreqMinimumNumberShouldMatch = 1F; query.HighFreqMinimumNumberShouldMatch = 4F; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0F); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(new HashSet <string>(Arrays.AsList(@"2", @"3")), new HashSet <string>(Arrays.AsList(r.Document(search.ScoreDocs[1].Doc).Get(@"id"), r.Document(search.ScoreDocs[2].Doc).Get(@"id")))); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "the")); query.LowFreqMinimumNumberShouldMatch = 1F; query.HighFreqMinimumNumberShouldMatch = 2F; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 4); } { CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "the")); query.LowFreqMinimumNumberShouldMatch = 1F; query.HighFreqMinimumNumberShouldMatch = 2F; TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 2); assertEquals(new HashSet <string>(Arrays.AsList(@"0", @"2")), new HashSet <string>(Arrays.AsList(r.Document(search.ScoreDocs[0].Doc).Get(@"id"), r.Document(search.ScoreDocs[1].Doc).Get(@"id")))); } r.Dispose(); w.Dispose(); dir.Dispose(); }
public void TestSimple() { const string idField = "id"; const string toField = "productId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "random text", Field.Store.NO)); doc.Add(new TextField("name", "name1", Field.Store.NO)); doc.Add(new TextField(idField, "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "1", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "more random text", Field.Store.NO)); doc.Add(new TextField("name", "name2", Field.Store.NO)); doc.Add(new TextField(idField, "4", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "4", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for product Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher, ScoreMode.None); TopDocs result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(4, result.ScoreDocs[0].Doc); assertEquals(5, result.ScoreDocs[1].Doc); joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher, ScoreMode.None); result = indexSearcher.Search(joinQuery, 10); assertEquals(2, result.TotalHits); assertEquals(1, result.ScoreDocs[0].Doc); assertEquals(2, result.ScoreDocs[1].Doc); // Search for offer joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher, ScoreMode.None); result = indexSearcher.Search(joinQuery, 10); assertEquals(1, result.TotalHits); assertEquals(3, result.ScoreDocs[0].Doc); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestRandomPhrases() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy())); IList <IList <string> > docs = new List <IList <string> >(); Documents.Document d = new Documents.Document(); Field f = NewTextField("f", "", Field.Store.NO); d.Add(f); Random r = Random(); int NUM_DOCS = AtLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks int termCount = TestUtil.NextInt(Random(), 4097, 8200); IList <string> doc = new List <string>(); StringBuilder sb = new StringBuilder(); while (doc.Count < termCount) { if (r.Next(5) == 1 || docs.Count == 0) { // make new non-empty-string term string term; while (true) { term = TestUtil.RandomUnicodeString(r); if (term.Length > 0) { break; } } IOException priorException = null; TokenStream ts = analyzer.GetTokenStream("ignore", new StringReader(term)); try { ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { string text = termAttr.ToString(); doc.Add(text); sb.Append(text).Append(' '); } ts.End(); } catch (IOException e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } else { // pick existing sub-phrase IList <string> lastDoc = docs[r.Next(docs.Count)]; int len = TestUtil.NextInt(r, 1, 10); int start = r.Next(lastDoc.Count - len); for (int k = start; k < start + len; k++) { string t = lastDoc[k]; doc.Add(t); sb.Append(t).Append(' '); } } } docs.Add(doc); f.SetStringValue(sb.ToString()); w.AddDocument(d); } IndexReader reader = w.Reader; IndexSearcher s = NewSearcher(reader); w.Dispose(); // now search int num = AtLeast(10); for (int i = 0; i < num; i++) { int docID = r.Next(docs.Count); IList <string> doc = docs[docID]; int numTerm = TestUtil.NextInt(r, 2, 20); int start = r.Next(doc.Count - numTerm); PhraseQuery pq = new PhraseQuery(); StringBuilder sb = new StringBuilder(); for (int t = start; t < start + numTerm; t++) { pq.Add(new Term("f", doc[t])); sb.Append(doc[t]).Append(' '); } TopDocs hits = s.Search(pq, NUM_DOCS); bool found = false; for (int j = 0; j < hits.ScoreDocs.Length; j++) { if (hits.ScoreDocs[j].Doc == docID) { found = true; break; } } Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start); } reader.Dispose(); dir.Dispose(); }
public virtual void TestPhraseQueryInConjunctionScorer() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "foobar", Field.Store.YES)); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), phraseQuery, searcher, Similarity); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(Random(), termQuery, searcher, Similarity); reader.Dispose(); writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE)); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES)); writer.AddDocument(doc); reader = writer.Reader; writer.Dispose(); searcher = NewSearcher(reader); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), booleanQuery, searcher, Similarity); reader.Dispose(); directory.Dispose(); }
public virtual void Test10kPulsed() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1)); DirectoryInfo f = CreateTempDir("10kpulsed"); BaseDirectoryWrapper dir = NewFSDirectory(f); dir.CheckIndexOnDispose = false; // we do this ourselves explicitly RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document document = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); switch (TestUtil.NextInt32(Random, 0, 2)) { case 0: ft.IndexOptions = IndexOptions.DOCS_ONLY; break; case 1: ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; break; default: ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; break; } Field field = NewField("field", "", ft); document.Add(field); //NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 10050; i++) { //field.StringValue = df.format(i); field.SetStringValue(i.ToString("00000", CultureInfo.InvariantCulture)); iw.AddDocument(document); } IndexReader ir = iw.GetReader(); iw.Dispose(); TermsEnum te = MultiFields.GetTerms(ir, "field").GetIterator(null); DocsEnum de = null; for (int i = 0; i < 10050; i++) { //string expected = df.format(i); string expected = i.ToString("00000", CultureInfo.InvariantCulture); assertEquals(expected, te.Next().Utf8ToString()); de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE); assertTrue(de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc()); } ir.Dispose(); TestUtil.CheckIndex(dir); dir.Dispose(); }
public void TestInsideBooleanQuery() { const string idField = "id"; const string toField = "productId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "random text", Field.Store.NO)); doc.Add(new TextField("name", "name1", Field.Store.NO)); doc.Add(new TextField(idField, "7", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "more random text", Field.Store.NO)); doc.Add(new TextField("name", "name2", Field.Store.NO)); doc.Add(new TextField(idField, "0", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for product Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg); BooleanQuery bq = new BooleanQuery(); bq.Add(joinQuery, Occur.SHOULD); bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD); indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper(this)); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestFuzziness() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // test scoring query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "3 documents should match"); IList <string> order = Arrays.AsList("bbbbb", "abbbb", "aabbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].Score); Assert.AreEqual(order[i], term); } // test pq size by supplying maxExpansions=2 // this query would normally return 3 documents, because 3 terms match (see above): query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "only 2 documents should match"); order = Arrays.AsList("bbbbb", "abbbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].Score); Assert.AreEqual(order[i], term); } // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); directory.Dispose(); }
public void TestBasics() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); var docs = new string[] { @"this is the end of the world right", @"is this it or maybe not", @"this is the end of the universe as we know it", @"there is the famous restaurant at the end of the universe" }; for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES)); doc.Add(NewTextField(@"field", docs[i], Field.Store.NO)); w.AddDocument(doc); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 2); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "restaurant")); query.Add(new Term("field", "universe")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } r.Dispose(); w.Dispose(); dir.Dispose(); }
private void DoTest(DocValuesType type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case DocValuesType.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case DocValuesType.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case DocValuesType.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw new InvalidOperationException(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.SetInt64Value(i); switch (type) { case DocValuesType.SORTED: case DocValuesType.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random, 20); } while (((string)vals[i]).Length == 0); f.SetBytesValue(new BytesRef((string)vals[i])); break; case DocValuesType.NUMERIC: int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31); // keep it an int vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue)); f.SetInt64Value((long)vals[i]); break; } iw.AddDocument(document); if (Random.NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case DocValuesType.BINARY: case DocValuesType.SORTED: vs = new BytesRefFieldSource("dv"); break; case DocValuesType.NUMERIC: vs = new Int64FieldSource("dv"); break; default: throw new InvalidOperationException(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is Int64FieldSource) { assertTrue(values.ObjectVal(i) is long?); assertTrue(values.BytesVal(i, bytes)); } else { throw new InvalidOperationException(); } object expected = vals[ids.Int32Val(i)]; switch (type) { case DocValuesType.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd >= 1); goto case DocValuesType.BINARY; case DocValuesType.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case DocValuesType.NUMERIC: assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i)); break; } } } rd.Dispose(); d.Dispose(); }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random, cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); int numDocs = TestUtil.NextInt32(Random, 100, 400); if (Verbose) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList <BytesRef> ids = new List <BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random.Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.IndexWriter, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); CreateRandomIndex(AtLeast(50), w, Random.NextInt64()); DirectoryReader reader = w.GetReader(); AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new AnonymousPriorityQueue(this, 5); Util.PriorityQueue <TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5); try { TermsEnum iterator = terms.GetEnumerator(); while (iterator.MoveNext()) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); List <TermAndFreq> highTerms = QueueToList(highFreqQueue); List <TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new JCG.HashSet <int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); w.ForceMerge(1); DirectoryReader reader2 = w.GetReader(); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, cq, NewSearcher(reader2)); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }