public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(FIELD, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); for (int i = 0; i < Values.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(FIELD, Values[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader = SlowCompositeReaderWrapper.Wrap(writer.Reader); writer.Dispose(); IndexSearcher = NewSearcher(IndexReader); IndexSearcher.Similarity = new DefaultSimilarity(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); Iw = new RandomIndexWriter(Random(), Dir); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.Add(idField); // add 500 docs with id 0..499 for (int i = 0; i < 500; i++) { idField.StringValue = Convert.ToString(i); Iw.AddDocument(doc); } // delete 20 of them for (int i = 0; i < 20; i++) { Iw.DeleteDocuments(new Term("id", Convert.ToString(Random().Next(Iw.MaxDoc())))); } Ir = Iw.Reader; @is = NewSearcher(Ir); }
public virtual void TestChangeGaps() { // LUCENE-5324: check that it is possible to change the wrapper's gaps int positionGap = Random().Next(1000); int offsetGap = Random().Next(1000); Analyzer @delegate = new MockAnalyzer(Random()); Analyzer a = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate); RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory()); Document doc = new Document(); FieldType ft = new FieldType(); ft.Indexed = true; ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; ft.Tokenized = true; ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; ft.StoreTermVectorOffsets = true; doc.Add(new Field("f", "a", ft)); doc.Add(new Field("f", "a", ft)); writer.AddDocument(doc, a); AtomicReader reader = GetOnlySegmentReader(writer.Reader); Fields fields = reader.GetTermVectors(0); Terms terms = fields.Terms("f"); TermsEnum te = terms.Iterator(null); Assert.AreEqual(new BytesRef("a"), te.Next()); DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null); Assert.AreEqual(0, dpe.NextDoc()); Assert.AreEqual(2, dpe.Freq()); Assert.AreEqual(0, dpe.NextPosition()); Assert.AreEqual(0, dpe.StartOffset()); int endOffset = dpe.EndOffset(); Assert.AreEqual(1 + positionGap, dpe.NextPosition()); Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset()); Assert.AreEqual(null, te.Next()); reader.Dispose(); writer.Dispose(); writer.w.Directory.Dispose(); }
public virtual void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); Field field = NewTextField("foo", "", Field.Store.NO); doc.Add(field); Field dvField = new FloatDocValuesField("foo_boost", 0.0F); doc.Add(dvField); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(field2); field.StringValue = "quick brown fox"; field2.StringValue = "quick brown fox"; dvField.FloatValue = 2f; // boost x2 iw.AddDocument(doc); field.StringValue = "jumps over lazy brown dog"; field2.StringValue = "jumps over lazy brown dog"; dvField.FloatValue = 4f; // boost x4 iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); // no boosting IndexSearcher searcher1 = NewSearcher(ir, false); Similarity @base = searcher1.Similarity; // boosting IndexSearcher searcher2 = NewSearcher(ir, false); searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base); // in this case, we searched on field "foo". first document should have 2x the score. TermQuery tq = new TermQuery(new Term("foo", "quick")); QueryUtils.Check(Random(), tq, searcher1); QueryUtils.Check(Random(), tq, searcher2); TopDocs noboost = searcher1.Search(tq, 10); TopDocs boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc)); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON); // this query matches only the second document, which should have 4x the score. tq = new TermQuery(new Term("foo", "jumps")); QueryUtils.Check(Random(), tq, searcher1); QueryUtils.Check(Random(), tq, searcher2); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON); // search on on field bar just for kicks, nothing should happen, since we setup // our sim provider to only use foo_boost for field foo. tq = new TermQuery(new Term("bar", "quick")); QueryUtils.Check(Random(), tq, searcher1); QueryUtils.Check(Random(), tq, searcher2); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON); ir.Dispose(); dir.Dispose(); }
private static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw) { LineFileDocs docs = new LineFileDocs(random); Document doc = null; Field field = null, currentField = null; StringReader bogus = new StringReader(""); if (iw != null) { doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); if (random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = random.NextBoolean(); ft.StoreTermVectorPositions = random.NextBoolean(); if (ft.StoreTermVectorPositions && !OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { ft.StoreTermVectorPayloads = random.NextBoolean(); } } if (random.NextBoolean()) { ft.OmitNorms = true; } string pf = TestUtil.GetPostingsFormat("dummy"); bool supportsOffsets = !DoesntSupportOffsets.Contains(pf); switch (random.Next(4)) { case 0: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; break; case 1: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; break; case 2: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; break; default: if (supportsOffsets && offsetsAreCorrect) { ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } break; } currentField = field = new Field("dummy", bogus, ft); doc.Add(currentField); } try { for (int i = 0; i < iterations; i++) { string text; if (random.Next(10) == 7) { // real data from linedocs text = docs.NextDoc().Get("body"); if (text.Length > maxWordLength) { // Take a random slice from the text...: int startPos = random.Next(text.Length - maxWordLength); if (startPos > 0 && char.IsLowSurrogate(text[startPos])) { // Take care not to split up a surrogate pair: startPos--; Assert.True(char.IsHighSurrogate(text[startPos])); } int endPos = startPos + maxWordLength - 1; if (char.IsHighSurrogate(text[endPos])) { // Take care not to split up a surrogate pair: endPos--; } text = text.Substring(startPos, 1 + endPos - startPos); } } else { // synthetic text = TestUtil.RandomAnalysisString(random, maxWordLength, simple); } try { CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField); if (iw != null) { if (random.Next(7) == 0) { // pile up a multivalued field var ft = (FieldType)field.FieldType(); currentField = new Field("dummy", bogus, ft); doc.Add(currentField); } else { iw.AddDocument(doc); if (doc.Fields.Count > 1) { // back to 1 field currentField = field; doc.RemoveFields("dummy"); doc.Add(currentField); } } } } catch (Exception t) { // TODO: really we should pass a random seed to // checkAnalysisConsistency then print it here too: Console.Error.WriteLine("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + Escape(text) + "'"); throw; } } } finally { IOUtils.CloseWhileHandlingException(docs); } }
public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple, bool offsetsAreCorrect) { CheckResetException(a, "best effort"); long seed = random.Next(); bool useCharFilter = random.NextBoolean(); Directory dir = null; RandomIndexWriter iw = null; string postingsFormat = TestUtil.GetPostingsFormat("dummy"); bool codecOk = iterations * maxWordLength < 100000 || !(postingsFormat.Equals("Memory") || postingsFormat.Equals("SimpleText")); if (Rarely(random) && codecOk) { dir = NewFSDirectory(CreateTempDir("bttc")); iw = new RandomIndexWriter(new Random((int)seed), dir, a); } bool success = false; try { CheckRandomData(new Random((int)seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); // now test with multiple threads: note we do the EXACT same thing we did before in each thread, // so this should only really fail from another thread if its an actual thread problem int numThreads = TestUtil.NextInt(random, 2, 4); var startingGun = new CountDownLatch(1); var threads = new AnalysisThread[numThreads]; for (int i = 0; i < threads.Length; i++) { threads[i] = new AnalysisThread(seed, /*startingGun,*/ a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); } Array.ForEach(threads, thread => thread.Start()); startingGun.countDown(); foreach (var t in threads) { try { t.Join(); } catch (ThreadInterruptedException e) { Fail("Thread interrupted"); } } if (threads.Any(x => x.Failed)) Fail("Thread interrupted"); success = true; } finally { if (success) { IOUtils.Close(iw, dir); } else { IOUtils.CloseWhileHandlingException(iw, dir); // checkindex } } }
internal AnalysisThread(long seed, /*CountDownLatch latch,*/ Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw) { this.Seed = seed; this.a = a; this.Iterations = iterations; this.MaxWordLength = maxWordLength; this.UseCharFilter = useCharFilter; this.Simple = simple; this.OffsetsAreCorrect = offsetsAreCorrect; this.Iw = iw; this._latch = null; }
public virtual void Test2() { Random random = Random(); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); bool allowDups = random.NextBoolean(); HashSet <string> seen = new HashSet <string>(); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random); } else { s = TestUtil.RandomUnicodeString(random); } BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (VERBOSE) { Console.WriteLine(" " + numDocs + ": s=" + s); } Document doc = new Document(); doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); docValues.Add(br); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.Reader.Dispose(); } } writer.ForceMerge(1); DirectoryReader r = writer.Reader; writer.Dispose(); AtomicReader sr = GetOnlySegmentReader(r); long END_TIME = DateTime.Now.Millisecond + (TEST_NIGHTLY ? 30 : 1); int NUM_THREADS = TestUtil.NextInt(Random(), 1, 10); ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new ThreadAnonymousInnerClassHelper2(this, random, docValues, sr, END_TIME); threads[thread].Start(); } foreach (ThreadClass thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public virtual void TestCachingWorks() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); writer.Dispose(); IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); AtomicReaderContext context = (AtomicReaderContext)reader.Context; MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called DocIdSet strongRef = cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs); Assert.IsTrue(filter.WasCalled(), "first time"); // make sure no exception if cache is holding the wrong docIdSet cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs); // second time, nested filter should not be called filter.Clear(); cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Dispose(); dir.Dispose(); }
public static void BeforeClass() { Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddSome(doc, AlwaysTerms); if (Random().Next(100) < 90) { AddSome(doc, CommonTerms); } if (Random().Next(100) < 50) { AddSome(doc, MediumTerms); } if (Random().Next(100) < 10) { AddSome(doc, RareTerms); } iw.AddDocument(doc); } iw.ForceMerge(1); iw.Dispose(); r = DirectoryReader.Open(Dir); atomicReader = GetOnlySegmentReader(r); Searcher = new IndexSearcher(atomicReader); Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(); }
public virtual void TestEmptyTerm() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("foo", "bar", Field.Store.NO)); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; BooleanQuery query = new BooleanQuery(true); query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD); Assert.AreEqual(1, @is.Search(query, 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public virtual void TestEmptyIndex() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; Assert.AreEqual(0, @is.Search(new TermQuery(new Term("foo", "bar")), 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public virtual void TestNullDocIdSetIterator() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); writer.Dispose(); IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); AtomicReaderContext context = (AtomicReaderContext)reader.Context; Filter filter = new FilterAnonymousInnerClassHelper2(this, context); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // the caching filter should return the empty set constant Assert.IsNull(cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs)); reader.Dispose(); dir.Dispose(); }
public virtual void TestIsCacheAble() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); writer.AddDocument(new Document()); writer.Dispose(); IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); // not cacheable: AssertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test", "value"))), false); // returns default empty docidset, always cacheable: AssertDocIdSetCacheable(reader, NumericRangeFilter.NewIntRange("test", Convert.ToInt32(10000), Convert.ToInt32(-10000), true, true), true); // is cacheable: AssertDocIdSetCacheable(reader, FieldCacheRangeFilter.NewIntRange("test", Convert.ToInt32(10), Convert.ToInt32(20), true, true), true); // a fixedbitset filter is always cacheable AssertDocIdSetCacheable(reader, new FilterAnonymousInnerClassHelper3(this), true); reader.Dispose(); dir.Dispose(); }
public virtual void TestEnforceDeletions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10))); // asserts below requires no unexpected merges: // NOTE: cannot use writer.getReader because RIW (on // flipping a coin) may give us a newly opened reader, // but we use .reopen on this reader below and expect to // (must) get an NRT reader: DirectoryReader reader = DirectoryReader.Open(writer.w, true); // same reason we don't wrap? IndexSearcher searcher = NewSearcher(reader, false); // add a doc, refresh the reader, and check that it's there Document doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1); Assert.AreEqual(1, docs.TotalHits, "Should find a hit..."); Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1"))); CachingWrapperFilter filter = new CachingWrapperFilter(startFilter); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.IsTrue(filter.SizeInBytes() > 0); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); Query constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // make sure we get a cache hit when we reopen reader // that had no change to deletions // fake delete (deletes nothing): writer.DeleteDocuments(new Term("foo", "bar")); IndexReader oldReader = reader; reader = RefreshReader(reader); Assert.IsTrue(reader == oldReader); int missCount = filter.MissCount; docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // cache hit: Assert.AreEqual(missCount, filter.MissCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); missCount = filter.MissCount; docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // cache hit Assert.AreEqual(missCount, filter.MissCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // apply deletes dynamically: filter = new CachingWrapperFilter(startFilter); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); missCount = filter.MissCount; Assert.IsTrue(missCount > 0); constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.MissCount); writer.AddDocument(doc); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits..."); Assert.IsTrue(filter.MissCount > missCount); missCount = filter.MissCount; constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.MissCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.MissCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.MissCount); // NOTE: silliness to make sure JRE does not eliminate // our holding onto oldReader to prevent // CachingWrapperFilter's WeakHashMap from dropping the // entry: Assert.IsTrue(oldReader != null); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public static void BeforeClass() { NoDocs = AtLeast(4096); Distance = (1L << 60) / NoDocs; Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 100, 1000)).SetMergePolicy(NewLogMergePolicy())); FieldType storedLong = new FieldType(LongField.TYPE_NOT_STORED); storedLong.Stored = true; storedLong.Freeze(); FieldType storedLong8 = new FieldType(storedLong); storedLong8.NumericPrecisionStep = 8; FieldType storedLong4 = new FieldType(storedLong); storedLong4.NumericPrecisionStep = 4; FieldType storedLong6 = new FieldType(storedLong); storedLong6.NumericPrecisionStep = 6; FieldType storedLong2 = new FieldType(storedLong); storedLong2.NumericPrecisionStep = 2; FieldType storedLongNone = new FieldType(storedLong); storedLongNone.NumericPrecisionStep = int.MaxValue; FieldType unstoredLong = LongField.TYPE_NOT_STORED; FieldType unstoredLong8 = new FieldType(unstoredLong); unstoredLong8.NumericPrecisionStep = 8; FieldType unstoredLong6 = new FieldType(unstoredLong); unstoredLong6.NumericPrecisionStep = 6; FieldType unstoredLong4 = new FieldType(unstoredLong); unstoredLong4.NumericPrecisionStep = 4; FieldType unstoredLong2 = new FieldType(unstoredLong); unstoredLong2.NumericPrecisionStep = 2; LongField field8 = new LongField("field8", 0L, storedLong8), field6 = new LongField("field6", 0L, storedLong6), field4 = new LongField("field4", 0L, storedLong4), field2 = new LongField("field2", 0L, storedLong2), fieldNoTrie = new LongField("field" + int.MaxValue, 0L, storedLongNone), ascfield8 = new LongField("ascfield8", 0L, unstoredLong8), ascfield6 = new LongField("ascfield6", 0L, unstoredLong6), ascfield4 = new LongField("ascfield4", 0L, unstoredLong4), ascfield2 = new LongField("ascfield2", 0L, unstoredLong2); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.Add(field8); doc.Add(field6); doc.Add(field4); doc.Add(field2); doc.Add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc.Add(ascfield8); doc.Add(ascfield6); doc.Add(ascfield4); doc.Add(ascfield2); // Add a series of noDocs docs with increasing long values, by updating the fields for (int l = 0; l < NoDocs; l++) { long val = Distance * l + StartOffset; field8.LongValue = val; field6.LongValue = val; field4.LongValue = val; field2.LongValue = val; fieldNoTrie.LongValue = val; val = l - (NoDocs / 2); ascfield8.LongValue = val; ascfield6.LongValue = val; ascfield4.LongValue = val; ascfield2.LongValue = val; writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestInfiniteValues() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); doc.Add(new DoubleField("double", double.NegativeInfinity, Field.Store.NO)); doc.Add(new LongField("long", long.MinValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", double.PositiveInfinity, Field.Store.NO)); doc.Add(new LongField("long", long.MaxValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", 0.0, Field.Store.NO)); doc.Add(new LongField("long", 0L, Field.Store.NO)); writer.AddDocument(doc); foreach (double d in TestNumericUtils.DOUBLE_NANs) { doc = new Document(); doc.Add(new DoubleField("double", d, Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); IndexReader r = DirectoryReader.Open(dir); IndexSearcher s = NewSearcher(r); Query q = NumericRangeQuery.NewLongRange("long", null, null, true, true); TopDocs topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewLongRange("long", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NaN, double.NaN, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(TestNumericUtils.DOUBLE_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count"); r.Dispose(); dir.Dispose(); }
public virtual void TestPostings() { Directory dir = NewFSDirectory(CreateTempDir("postings")); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetCodec(Codec.ForName("Lucene40")); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); // id field FieldType idType = new FieldType(StringField.TYPE_NOT_STORED); idType.StoreTermVectors = true; Field idField = new Field("id", "", idType); doc.Add(idField); // title field: short text field FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED); titleType.StoreTermVectors = true; titleType.StoreTermVectorPositions = true; titleType.StoreTermVectorOffsets = true; titleType.IndexOptions = IndexOptions(); Field titleField = new Field("title", "", titleType); doc.Add(titleField); // body field: long text field FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED); bodyType.StoreTermVectors = true; bodyType.StoreTermVectorPositions = true; bodyType.StoreTermVectorOffsets = true; bodyType.IndexOptions = IndexOptions(); Field bodyField = new Field("body", "", bodyType); doc.Add(bodyField); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); titleField.StringValue = FieldValue(1); bodyField.StringValue = FieldValue(3); iw.AddDocument(doc); if (Random().Next(20) == 0) { iw.DeleteDocuments(new Term("id", Convert.ToString(i))); } } if (Random().NextBoolean()) { // delete 1-100% of docs iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)])); } iw.Dispose(); dir.Dispose(); // checkindex }
public virtual void TestOmitTFAndNorms() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; ft.OmitNorms = true; ft.Freeze(); Field f = NewField("foo", "bar", ft); doc.Add(f); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; BooleanQuery query = new BooleanQuery(true); query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); Assert.AreEqual(1, @is.Search(query, 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public virtual void TestTermUTF16SortOrder() { Random rnd = Random; Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif rnd, dir); Document d = new Document(); // Single segment Field f = NewStringField("f", "", Field.Store.NO); d.Add(f); char[] chars = new char[2]; ISet <string> allTerms = new JCG.HashSet <string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s; if (rnd.NextBoolean()) { // Single char if (rnd.NextBoolean()) { // Above surrogates chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff); } else { // Below surrogates chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1); } s = new string(chars, 0, 1); } else { // Surrogate pair chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END); Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END); chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END); s = new string(chars, 0, 2); } allTerms.Add(s); f.SetStringValue(s); writer.AddDocument(d); if ((1 + i) % 42 == 0) { writer.Commit(); } } IndexReader r = writer.GetReader(); // Test each sub-segment foreach (AtomicReaderContext ctx in r.Leaves) { CheckTermsOrder(ctx.Reader, allTerms, false); } CheckTermsOrder(r, allTerms, true); // Test multi segment r.Dispose(); writer.ForceMerge(1); // Test single segment r = writer.GetReader(); CheckTermsOrder(r, allTerms, true); r.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.StoreTermVectors = true; docsOnlyType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.StoreTermVectors = true; docsAndFreqsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.StoreTermVectors = true; positionsType.StoreTermVectorPositions = true; positionsType.StoreTermVectorOffsets = true; positionsType.StoreTermVectorPayloads = true; FieldType offsetsType = new FieldType(positionsType); offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.Add(field1); doc.Add(field2); doc.Add(field3); doc.Add(field4); doc.Add(field5); doc.Add(field6); doc.Add(field7); doc.Add(field8); for (int i = 0; i < MAXDOC; i++) { string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random()); field1.StringValue = stringValue; field2.StringValue = stringValue; field3.StringValue = stringValue; field4.StringValue = stringValue; field5.StringValue = stringValue; field6.StringValue = stringValue; field7.StringValue = stringValue; field8.StringValue = stringValue; iw.AddDocument(doc); } iw.Dispose(); Verify(dir); TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.SetOpenMode(OpenMode_e.APPEND); IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); iw2.ForceMerge(1); iw2.Dispose(); Verify(dir); dir.Dispose(); }
public virtual void TestCrazySpans() { // The problem: "normal" lucene queries create scorers, returning null if terms dont exist // this means they never score a term that does not exist. // however with spans, there is only one scorer for the whole hierarchy: // inner queries are not real queries, their boosts are ignored, etc. Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); doc.Add(NewField("foo", "bar", ft)); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar")); SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz")); Query query = new SpanOrQuery(s1, s2); TopDocs td = @is.Search(query, 10); Assert.AreEqual(1, td.TotalHits); float score = td.ScoreDocs[0].Score; Assert.IsTrue(score >= 0.0f); Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim); } ir.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(Sim).SetMergePolicy(NewLogMergePolicy())); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { Document d1 = new Document(); d1.Add(NewField("id", "d1", NonAnalyzedType)); // Field.Keyword("id", // "d1")); d1.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant")); d1.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant")); writer.AddDocument(d1); } // d2 is a "good" match for: albino elephant { Document d2 = new Document(); d2.Add(NewField("id", "d2", NonAnalyzedType)); // Field.Keyword("id", // "d2")); d2.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant")); d2.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek", // "albino")); d2.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant")); writer.AddDocument(d2); } // d3 is a "better" match for: albino elephant { Document d3 = new Document(); d3.Add(NewField("id", "d3", NonAnalyzedType)); // Field.Keyword("id", // "d3")); d3.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed", // "albino")); d3.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant")); writer.AddDocument(d3); } // d4 is the "best" match for: albino elephant { Document d4 = new Document(); d4.Add(NewField("id", "d4", NonAnalyzedType)); // Field.Keyword("id", // "d4")); d4.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed", // "albino")); d4.Add(NewField("hed", "elephant", NonAnalyzedType)); // Field.Text("hed", "elephant")); d4.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek", // "albino")); writer.AddDocument(d4); } r = SlowCompositeReaderWrapper.Wrap(writer.Reader); writer.Dispose(); s = NewSearcher(r); s.Similarity = Sim; }
public IterableAnonymousInnerClassHelper2(RandomIndexWriter outerInstance, IEnumerable <IndexableField> doc) { this.OuterInstance = outerInstance; this.Doc = doc; }