public virtual void TestString() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new SortedDocValuesField("value", new BytesRef("foo"))); doc.Add(NewStringField("value", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SortedDocValuesField("value", new BytesRef("bar"))); doc.Add(NewStringField("value", "bar", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(2, td.TotalHits); // 'bar' comes before 'foo' Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); AssertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestOmitNorms_Mem() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverse d.Add(NewField("f1", "this field has norms", customType)); d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO)); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field titleField = NewTextField("title", "some title", Field.Store.NO); Field field = NewTextField(FN, "", Field.Store.NO); Field footerField = NewTextField("footer", "a footer", Field.Store.NO); doc.Add(titleField); doc.Add(field); doc.Add(footerField); field.StringValue = "\uD866\uDF05abcdef"; writer.AddDocument(doc); field.StringValue = "\uD866\uDF06ghijkl"; writer.AddDocument(doc); // this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!! field.StringValue = "\uFB94mnopqr"; writer.AddDocument(doc); field.StringValue = "\uFB95stuvwx"; // this one too. writer.AddDocument(doc); field.StringValue = "a\uFFFCbc"; writer.AddDocument(doc); field.StringValue = "a\uFFFDbc"; writer.AddDocument(doc); field.StringValue = "a\uFFFEbc"; writer.AddDocument(doc); field.StringValue = "a\uFB94bc"; writer.AddDocument(doc); field.StringValue = "bacadaba"; writer.AddDocument(doc); field.StringValue = "\uFFFD"; writer.AddDocument(doc); field.StringValue = "\uFFFD\uD866\uDF05"; writer.AddDocument(doc); field.StringValue = "\uFFFD\uFFFD"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy())); //writer.setNoCFSRatio(1.0); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; } else if (mod2 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; } else if (mod3 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; } else { ft.StoreTermVectors = true; } doc.Add(new Field("field", English.IntToEnglish(i), ft)); //test no term vectors too doc.Add(new TextField("noTV", English.IntToEnglish(i), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); }
internal virtual void MakeIndex() { // we use RAMDirectory here, because we dont want to stay on open files on Windows: d = new RAMDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(newField("ints", "1", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); w.ForceMerge(1); r = w.Reader; w.Dispose(); SubR = (AtomicReader)(r.Leaves()[0]).Reader(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(FIELD, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); PerFieldSimilarityWrapper sim = new ExampleSimilarityProvider(this); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(sim); RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, iwc); Document doc = new Document(); Field field = NewTextField("foo", "", Field.Store.NO); doc.Add(field); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(field2); field.StringValue = "quick brown fox"; field2.StringValue = "quick brown fox"; iw.AddDocument(doc); field.StringValue = "jumps over lazy brown dog"; field2.StringValue = "jumps over lazy brown dog"; iw.AddDocument(doc); Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); Searcher.Similarity = sim; }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.NO); doc.Add(field); // we generate aweful prefixes: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. string codec = Codec.Default.Name; int num = codec.Equals("Lucene3x") ? 200 * RANDOM_MULTIPLIER : AtLeast(1000); for (int i = 0; i < num; i++) { field.StringValue = TestUtil.RandomUnicodeString(Random(), 10); writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestNot_Mem() { Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store); Document d1 = new Document(); d1.Add(NewTextField("field", "a b", Field.Store.YES)); writer.AddDocument(d1); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("field", "b")), BooleanClause.Occur.MUST_NOT); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); writer.Dispose(); reader.Dispose(); store.Dispose(); }
private Document CreateDocument(string text, long time) { Document document = new Document(); // Add the text field. Field textField = NewTextField(TEXT_FIELD, text, Field.Store.YES); document.Add(textField); // Add the date/time field. string dateTimeString = DateTools.TimeToString(time, DateTools.Resolution.SECOND); Field dateTimeField = NewStringField(DATE_TIME_FIELD, dateTimeString, Field.Store.YES); document.Add(dateTimeField); return document; }
public virtual void TestDoubleMissingLast() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleDocValuesField("value", -1.3)); doc.Add(NewStringField("value", "-1.3", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleDocValuesField("value", 4.2333333333333)); doc.Add(NewStringField("value", "4.2333333333333", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleDocValuesField("value", 4.2333333333332)); doc.Add(NewStringField("value", "4.2333333333332", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); SortField sortField = new SortField("value", SortField.Type_e.DOUBLE); sortField.MissingValue = double.MaxValue; Sort sort = new Sort(sortField); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(4, td.TotalHits); // null treated as Double.MAX_VALUE Assert.AreEqual("-1.3", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("4.2333333333332", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); Assert.AreEqual("4.2333333333333", searcher.Doc(td.ScoreDocs[2].Doc).Get("value")); Assert.IsNull(searcher.Doc(td.ScoreDocs[3].Doc).Get("value")); ir.Dispose(); dir.Dispose(); }
private void AddDoc(string text, IndexWriter iw, float boost) { Document doc = new Document(); Field f = NewTextField("key", text, Field.Store.YES); f.Boost = boost; doc.Add(f); iw.AddDocument(doc); }
/// <summary> /// Indexes at least 1 document with f1, and at least 1 document with f2. /// returns the norms for "field". /// </summary> internal virtual NumericDocValues GetNorms(string field, Field f1, Field f2) { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc); // add f1 Document d = new Document(); d.Add(f1); riw.AddDocument(d); // add f2 d = new Document(); d.Add(f2); riw.AddDocument(d); // add a mix of f1's and f2's int numExtraDocs = TestUtil.NextInt(Random(), 1, 1000); for (int i = 0; i < numExtraDocs; i++) { d = new Document(); d.Add(Random().NextBoolean() ? f1 : f2); riw.AddDocument(d); } IndexReader ir1 = riw.Reader; // todo: generalize NumericDocValues norms1 = MultiDocValues.GetNormValues(ir1, field); // fully merge and validate MultiNorms against single segment. riw.ForceMerge(1); DirectoryReader ir2 = riw.Reader; NumericDocValues norms2 = GetOnlySegmentReader(ir2).GetNormValues(field); if (norms1 == null) { Assert.IsNull(norms2); } else { for (int docID = 0; docID < ir1.MaxDoc(); docID++) { Assert.AreEqual(norms1.Get(docID), norms2.Get(docID)); } } ir1.Dispose(); ir2.Dispose(); riw.Dispose(); dir.Dispose(); return norms1; }
public virtual void TestNoNrmFile() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy; lmp.MergeFactor = 2; lmp.NoCFSRatio = 0.0; Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f1 = NewField("f1", "this field has no norms", customType); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoNrm(ram); // force merge writer.ForceMerge(1); // flush writer.Dispose(); AssertNoNrm(ram); ram.Dispose(); }
private void AddDoc(IndexWriter writer, string value) { Document doc = new Document(); doc.Add(NewTextField("content", value, Field.Store.NO)); writer.AddDocument(doc); }
public virtual void TestNonIndexedFields() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new StoredField("bogusbytes", "bogus")); doc.Add(new StoredField("bogusshorts", "bogus")); doc.Add(new StoredField("bogusints", "bogus")); doc.Add(new StoredField("boguslongs", "bogus")); doc.Add(new StoredField("bogusfloats", "bogus")); doc.Add(new StoredField("bogusdoubles", "bogus")); doc.Add(new StoredField("bogusterms", "bogus")); doc.Add(new StoredField("bogustermsindex", "bogus")); doc.Add(new StoredField("bogusmultivalued", "bogus")); doc.Add(new StoredField("bogusbits", "bogus")); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); FieldCache cache = FieldCache_Fields.DEFAULT; cache.PurgeAllCaches(); Assert.AreEqual(0, cache.CacheEntries.Length); Bytes bytes = cache.GetBytes(ar, "bogusbytes", true); Assert.AreEqual(0, bytes.Get(0)); Shorts shorts = cache.GetShorts(ar, "bogusshorts", true); Assert.AreEqual(0, shorts.Get(0)); Ints ints = cache.GetInts(ar, "bogusints", true); Assert.AreEqual(0, ints.Get(0)); Longs longs = cache.GetLongs(ar, "boguslongs", true); Assert.AreEqual(0, longs.Get(0)); Floats floats = cache.GetFloats(ar, "bogusfloats", true); Assert.AreEqual(0, floats.Get(0), 0.0f); Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true); Assert.AreEqual(0, doubles.Get(0), 0.0D); BytesRef scratch = new BytesRef(); BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true); binaries.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex"); Assert.AreEqual(-1, sorted.GetOrd(0)); sorted.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued"); sortedSet.Document = 0; Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Bits bits = cache.GetDocsWithField(ar, "bogusbits"); Assert.IsFalse(bits.Get(0)); // check that we cached nothing Assert.AreEqual(0, cache.CacheEntries.Length); ir.Dispose(); dir.Dispose(); }
public virtual void TestDocValuesIntegration() { AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues()); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.Add(new NumericDocValuesField("numeric", 42)); if (DefaultCodecSupportsSortedSet()) { doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); } iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); BytesRef scratch = new BytesRef(); // Binary type: can be retrieved via getTerms() try { FieldCache_Fields.DEFAULT.GetInts(ar, "binary", false); Assert.Fail(); } catch (InvalidOperationException expected) { } BinaryDocValues binary = FieldCache_Fields.DEFAULT.GetTerms(ar, "binary", true); binary.Get(0, scratch); Assert.AreEqual("binary value", scratch.Utf8ToString()); try { FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "binary"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "binary"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "binary"); Assert.Fail(); } catch (InvalidOperationException expected) { } Bits bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "binary"); Assert.IsTrue(bits.Get(0)); // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() try { FieldCache_Fields.DEFAULT.GetInts(ar, "sorted", false); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "sorted"); Assert.Fail(); } catch (InvalidOperationException expected) { } binary = FieldCache_Fields.DEFAULT.GetTerms(ar, "sorted", true); binary.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedDocValues sorted = FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "sorted"); Assert.AreEqual(0, sorted.GetOrd(0)); Assert.AreEqual(1, sorted.ValueCount); sorted.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedSetDocValues sortedSet = FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "sorted"); sortedSet.Document = 0; Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.ValueCount); bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "sorted"); Assert.IsTrue(bits.Get(0)); // Numeric type: can be retrieved via getInts() and so on Ints numeric = FieldCache_Fields.DEFAULT.GetInts(ar, "numeric", false); Assert.AreEqual(42, numeric.Get(0)); try { FieldCache_Fields.DEFAULT.GetTerms(ar, "numeric", true); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "numeric"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "numeric"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "numeric"); Assert.Fail(); } catch (InvalidOperationException expected) { } bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "numeric"); Assert.IsTrue(bits.Get(0)); // SortedSet type: can be retrieved via getDocTermOrds() if (DefaultCodecSupportsSortedSet()) { try { FieldCache_Fields.DEFAULT.GetInts(ar, "sortedset", false); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetTerms(ar, "sortedset", true); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "sortedset"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "sortedset"); Assert.Fail(); } catch (InvalidOperationException expected) { } sortedSet = FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "sortedset"); sortedSet.Document = 0; Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(2, sortedSet.ValueCount); bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "sortedset"); Assert.IsTrue(bits.Get(0)); } ir.Dispose(); dir.Dispose(); }
public virtual void TestDoubleReverse() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new DoubleDocValuesField("value", 30.1)); doc.Add(NewStringField("value", "30.1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleDocValuesField("value", -1.3)); doc.Add(NewStringField("value", "-1.3", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleDocValuesField("value", 4.2333333333333)); doc.Add(NewStringField("value", "4.2333333333333", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleDocValuesField("value", 4.2333333333332)); doc.Add(NewStringField("value", "4.2333333333332", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.DOUBLE, true)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(4, td.TotalHits); // numeric order Assert.AreEqual("30.1", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("4.2333333333333", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); Assert.AreEqual("4.2333333333332", searcher.Doc(td.ScoreDocs[2].Doc).Get("value")); Assert.AreEqual("-1.3", searcher.Doc(td.ScoreDocs[3].Doc).Get("value")); AssertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
/* * LUCENE-3528 - NRTManager hangs in certain situations */ public virtual void TestThreadStarvationNoDeleteNRTReader() { IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES); Directory d = NewDirectory(); CountDownLatch latch = new CountDownLatch(1); CountDownLatch signal = new CountDownLatch(1); LatchedIndexWriter _writer = new LatchedIndexWriter(d, conf, latch, signal); TrackingIndexWriter writer = new TrackingIndexWriter(_writer); SearcherManager manager = new SearcherManager(_writer, false, null); Document doc = new Document(); doc.Add(NewTextField("test", "test", Field.Store.YES)); writer.AddDocument(doc); manager.MaybeRefresh(); ThreadClass t = new ThreadAnonymousInnerClassHelper(this, latch, signal, writer, manager); t.Start(); _writer.WaitAfterUpdate = true; // wait in addDocument to let some reopens go through long lastGen = writer.UpdateDocument(new Term("foo", "bar"), doc); // once this returns the doc is already reflected in the last reopen Assert.IsFalse(manager.SearcherCurrent); // false since there is a delete in the queue IndexSearcher searcher = manager.Acquire(); try { Assert.AreEqual(2, searcher.IndexReader.NumDocs()); } finally { manager.Release(searcher); } ControlledRealTimeReopenThread<IndexSearcher> thread = new ControlledRealTimeReopenThread<IndexSearcher>(writer, manager, 0.01, 0.01); thread.Start(); // start reopening if (VERBOSE) { Console.WriteLine("waiting now for generation " + lastGen); } AtomicBoolean finished = new AtomicBoolean(false); ThreadClass waiter = new ThreadAnonymousInnerClassHelper2(this, lastGen, thread, finished); waiter.Start(); manager.MaybeRefresh(); waiter.Join(1000); if (!finished.Get()) { waiter.Interrupt(); Assert.Fail("thread deadlocked on waitForGeneration"); } thread.Dispose(); thread.Join(); IOUtils.Close(manager, _writer, d); }
// LUCENE-5461 public virtual void TestCRTReopen() { //test behaving badly //should be high enough int maxStaleSecs = 20; //build crap data just to store it. string s = " abcdefghijklmnopqrstuvwxyz "; char[] chars = s.ToCharArray(); StringBuilder builder = new StringBuilder(2048); for (int i = 0; i < 2048; i++) { builder.Append(chars[Random().Next(chars.Length)]); } string content = builder.ToString(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new MockAnalyzer(Random())); config.SetIndexDeletionPolicy(sdp); config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(dir, config); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); TrackingIndexWriter tiw = new TrackingIndexWriter(iw); ControlledRealTimeReopenThread<IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread<IndexSearcher>(tiw, sm, maxStaleSecs, 0); controlledRealTimeReopenThread.SetDaemon(true); controlledRealTimeReopenThread.Start(); IList<Thread> commitThreads = new List<Thread>(); for (int i = 0; i < 500; i++) { if (i > 0 && i % 50 == 0) { Thread commitThread = new Thread(new RunnableAnonymousInnerClassHelper(this, sdp, dir, iw)); commitThread.Start(); commitThreads.Add(commitThread); } Document d = new Document(); d.Add(new TextField("count", i + "", Field.Store.NO)); d.Add(new TextField("content", content, Field.Store.YES)); long start = DateTime.Now.Millisecond; long l = tiw.AddDocument(d); controlledRealTimeReopenThread.WaitForGeneration(l); long wait = DateTime.Now.Millisecond - start; Assert.IsTrue(wait < (maxStaleSecs * 1000), "waited too long for generation " + wait); IndexSearcher searcher = sm.Acquire(); TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10); sm.Release(searcher); Assert.AreEqual(1, td.TotalHits); } foreach (Thread commitThread in commitThreads) { commitThread.Join(); } controlledRealTimeReopenThread.Dispose(); sm.Dispose(); iw.Dispose(); dir.Dispose(); }
public static Directory GetDirectory(Analyzer analyzer, string[] vals) { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 100, 1000)).SetMergePolicy(NewLogMergePolicy())); foreach (string s in vals) { Document d = new Document(); d.Add(NewTextField(FIELD, s, Field.Store.YES)); writer.AddDocument(d); } writer.Dispose(); return directory; }
private void AddNoProxDoc(IndexWriter writer) { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IndexOptionsValue = IndexOptions.DOCS_ONLY; Field f = new Field("content3", "aaa", customType); doc.Add(f); FieldType customType2 = new FieldType(); customType2.Stored = true; customType2.IndexOptionsValue = IndexOptions.DOCS_ONLY; f = new Field("content4", "aaa", customType2); doc.Add(f); writer.AddDocument(doc); }
private void AddDoc(IndexWriter writer, int id) { Document doc = new Document(); doc.Add(new TextField("content", "aaa", Field.Store.NO)); doc.Add(new StringField("id", Convert.ToString(id), Field.Store.YES)); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; doc.Add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.Add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.Add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); doc.Add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); // add numeric fields, to test if flex preserves encoding doc.Add(new IntField("trieInt", id, Field.Store.NO)); doc.Add(new LongField("trieLong", (long)id, Field.Store.NO)); // add docvalues fields doc.Add(new NumericDocValuesField("dvByte", (sbyte)id)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef @ref = new BytesRef(bytes); doc.Add(new BinaryDocValuesField("dvBytesDerefFixed", @ref)); doc.Add(new BinaryDocValuesField("dvBytesDerefVar", @ref)); doc.Add(new SortedDocValuesField("dvBytesSortedFixed", @ref)); doc.Add(new SortedDocValuesField("dvBytesSortedVar", @ref)); doc.Add(new BinaryDocValuesField("dvBytesStraightFixed", @ref)); doc.Add(new BinaryDocValuesField("dvBytesStraightVar", @ref)); doc.Add(new DoubleDocValuesField("dvDouble", (double)id)); doc.Add(new FloatDocValuesField("dvFloat", (float)id)); doc.Add(new NumericDocValuesField("dvInt", id)); doc.Add(new NumericDocValuesField("dvLong", id)); doc.Add(new NumericDocValuesField("dvPacked", id)); doc.Add(new NumericDocValuesField("dvShort", (short)id)); // a field with both offsets and term vectors for a cross-check FieldType customType3 = new FieldType(TextField.TYPE_STORED); customType3.StoreTermVectors = true; customType3.StoreTermVectorPositions = true; customType3.StoreTermVectorOffsets = true; customType3.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; doc.Add(new Field("content5", "here is more content with aaa aaa aaa", customType3)); // a field that omits only positions FieldType customType4 = new FieldType(TextField.TYPE_STORED); customType4.StoreTermVectors = true; customType4.StoreTermVectorPositions = false; customType4.StoreTermVectorOffsets = true; customType4.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS; doc.Add(new Field("content6", "here is more content with aaa aaa aaa", customType4)); // TODO: // index different norms types via similarity (we use a random one currently?!) // remove any analyzer randomness, explicitly add payloads for certain fields. writer.AddDocument(doc); }
public static void BeforeClass() { NUM_DOCS = AtLeast(500); NUM_ORDS = AtLeast(2); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; UnicodeStrings = new string[NUM_DOCS]; MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS]; if (VERBOSE) { Console.WriteLine("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO)); doc.Add(NewStringField("theDouble", Convert.ToString(theDouble--), Field.Store.NO)); doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO)); doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO)); doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO)); doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO)); if (i % 2 == 0) { doc.Add(NewStringField("sparse", Convert.ToString(i), Field.Store.NO)); } if (i % 2 == 0) { doc.Add(new IntField("numInt", i, Field.Store.NO)); } // sometimes skip the field: if (Random().Next(40) != 17) { UnicodeStrings[i] = GenerateString(i); doc.Add(NewStringField("theRandomUnicodeString", UnicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (Random().Next(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { string newValue = GenerateString(i); MultiValued[i, j] = new BytesRef(newValue); doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Array.Sort(MultiValued[i]); } writer.AddDocument(doc); } IndexReader r = writer.Reader; Reader = SlowCompositeReaderWrapper.Wrap(r); writer.Dispose(); }
public override void SetUp() { base.SetUp(); // Dir = NewDirectory(); // RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir); // Document d = new Document(); d.Add(NewField(FIELD_T, "Optimize not deleting all files", TextField.TYPE_STORED)); d.Add(NewField(FIELD_C, "Deleted When I run an optimize in our production environment.", TextField.TYPE_STORED)); // writer.AddDocument(d); Reader = writer.Reader; // Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestLongFieldCache() { Directory dir = NewDirectory(); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, cfg); Document doc = new Document(); LongField field = new LongField("f", 0L, Field.Store.YES); doc.Add(field); long[] values = new long[TestUtil.NextInt(Random(), 1, 10)]; for (int i = 0; i < values.Length; ++i) { long v; switch (Random().Next(10)) { case 0: v = long.MinValue; break; case 1: v = 0; break; case 2: v = long.MaxValue; break; default: v = TestUtil.NextLong(Random(), -10, 10); break; } values[i] = v; if (v == 0 && Random().NextBoolean()) { // missing iw.AddDocument(new Document()); } else { field.LongValue = v; iw.AddDocument(doc); } } iw.ForceMerge(1); DirectoryReader reader = iw.Reader; Longs longs = FieldCache_Fields.DEFAULT.GetLongs(GetOnlySegmentReader(reader), "f", false); for (int i = 0; i < values.Length; ++i) { Assert.AreEqual(values[i], longs.Get(i)); } reader.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestBooleanScorerMax() { Directory dir = NewDirectory(); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); int docCount = AtLeast(10000); for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(NewField("field", "a", TextField.TYPE_NOT_STORED)); riw.AddDocument(doc); } riw.ForceMerge(1); IndexReader r = riw.Reader; riw.Dispose(); IndexSearcher s = NewSearcher(r); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); Weight w = s.CreateNormalizedWeight(bq); Assert.AreEqual(1, s.IndexReader.Leaves().Count); BulkScorer scorer = w.BulkScorer(s.IndexReader.Leaves()[0], false, null); FixedBitSet hits = new FixedBitSet(docCount); AtomicInteger end = new AtomicInteger(); Collector c = new CollectorAnonymousInnerClassHelper(this, scorer, hits, end); while (end.Get() < docCount) { int inc = TestUtil.NextInt(Random(), 1, 1000); end.AddAndGet(inc); scorer.Score(c, end.Get()); } Assert.AreEqual(docCount, hits.Cardinality()); r.Dispose(); dir.Dispose(); }
public virtual void TestSimilarity_Mem() { Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(new SimpleSimilarity())); Document d1 = new Document(); d1.Add(NewTextField("field", "a c", Field.Store.YES)); Document d2 = new Document(); d2.Add(NewTextField("field", "a b c", Field.Store.YES)); writer.AddDocument(d1); writer.AddDocument(d2); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("field", "a"); Term b = new Term("field", "b"); Term c = new Term("field", "c"); searcher.Search(new TermQuery(b), new CollectorAnonymousInnerClassHelper(this)); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(a), BooleanClause.Occur.SHOULD); bq.Add(new TermQuery(b), BooleanClause.Occur.SHOULD); //System.out.println(bq.toString("field")); searcher.Search(bq, new CollectorAnonymousInnerClassHelper2(this)); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); //System.out.println(pq.toString("field")); searcher.Search(pq, new CollectorAnonymousInnerClassHelper3(this)); pq.Slop = 2; //System.out.println(pq.toString("field")); searcher.Search(pq, new CollectorAnonymousInnerClassHelper4(this)); reader.Dispose(); store.Dispose(); }
public virtual void TestMixedRAM() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2))); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); for (int i = 0; i < 5; i++) { writer.AddDocument(d); } for (int i = 0; i < 20; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(!fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }