public virtual void TestRollbackIntegrityWithBufferFlush() { Directory dir = NewDirectory(); RandomIndexWriter rw = new RandomIndexWriter(Random(), dir); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES)); rw.AddDocument(doc); } rw.Dispose(); // If buffer size is small enough to cause a flush, errors ensue... IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND)); for (int i = 0; i < 3; i++) { Document doc = new Document(); string value = Convert.ToString(i); doc.Add(NewStringField("pk", value, Field.Store.YES)); doc.Add(NewStringField("text", "foo", Field.Store.YES)); w.UpdateDocument(new Term("pk", value), doc); } w.Rollback(); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback"); r.Dispose(); dir.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetCodec(new Lucene46Codec()); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); // these fields should sometimes get term vectors, etc Field idField = NewStringField("id", "", Field.Store.NO); Field bodyField = NewTextField("body", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 5); doc.Add(idField); doc.Add(bodyField); doc.Add(dvField); for (int i = 0; i < 100; i++) { idField.StringValue = Convert.ToString(i); bodyField.StringValue = TestUtil.RandomUnicodeString(Random()); riw.AddDocument(doc); if (Random().Next(7) == 0) { riw.Commit(); } // TODO: we should make a new format with a clean header... // if (Random().nextInt(20) == 0) { // riw.DeleteDocuments(new Term("id", Integer.toString(i))); // } } riw.Dispose(); CheckHeaders(dir); dir.Dispose(); }
public virtual void TestAddBinaryTwice() { Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!"))); doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!"))); try { iwriter.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException expected) { // expected } iwriter.Dispose(); directory.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, analyzer); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("field", "one two three four five", Field.Store.YES)); doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES)); IndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES); doc.Add(repeatedField); doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); INDEX_SIZE = AtLeast(2000); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index); RandomGen random = new RandomGen(this, Random()); for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the { // problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) // some documents must not have an entry in the first { // sort field doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES)); } if ((i % 7) == 0) // some documents to match the query (see below) { doc.Add(NewTextField("content", "test", Field.Store.YES)); } // every document has a defined 'mandant' field doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Query = new TermQuery(new Term("content", "test")); }
public override void SetUp() { base.SetUp(); Document doc; Rd1 = NewDirectory(); IndexWriter iw1 = new IndexWriter(Rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); doc = new Document(); doc.Add(NewTextField("field1", "the quick brown fox jumps", Field.Store.YES)); doc.Add(NewTextField("field2", "the quick brown fox jumps", Field.Store.YES)); iw1.AddDocument(doc); iw1.Dispose(); Rd2 = NewDirectory(); IndexWriter iw2 = new IndexWriter(Rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); doc = new Document(); doc.Add(NewTextField("field1", "the fox jumps over the lazy dog", Field.Store.YES)); doc.Add(NewTextField("field3", "the fox jumps over the lazy dog", Field.Store.YES)); iw2.AddDocument(doc); iw2.Dispose(); this.Ir1 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd1)); this.Ir2 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd2)); }
public virtual void TestByte() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new NumericDocValuesField("value", 23)); doc.Add(NewStringField("value", "23", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new NumericDocValuesField("value", -1)); doc.Add(NewStringField("value", "-1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new NumericDocValuesField("value", 4)); doc.Add(NewStringField("value", "4", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.BYTE)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(3, td.TotalHits); // numeric order Assert.AreEqual("-1", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("4", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); Assert.AreEqual("23", searcher.Doc(td.ScoreDocs[2].Doc).Get("value")); AssertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public virtual void TestDoubleOffsetCounting() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); FieldType customType = new FieldType(StringField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; Field f = NewField("field", "abcd", customType); doc.Add(f); doc.Add(f); Field f2 = NewField("field", "", customType); doc.Add(f2); doc.Add(f); w.AddDocument(doc); w.Dispose(); IndexReader r = DirectoryReader.Open(dir); Terms vector = r.GetTermVectors(0).Terms("field"); Assert.IsNotNull(vector); TermsEnum termsEnum = vector.Iterator(null); Assert.IsNotNull(termsEnum.Next()); Assert.AreEqual("", termsEnum.Term().Utf8ToString()); // Token "" occurred once Assert.AreEqual(1, termsEnum.TotalTermFreq()); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(8, dpEnum.StartOffset()); Assert.AreEqual(8, dpEnum.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); // Token "abcd" occurred three times Assert.AreEqual(new BytesRef("abcd"), termsEnum.Next()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.AreEqual(3, termsEnum.TotalTermFreq()); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(0, dpEnum.StartOffset()); Assert.AreEqual(4, dpEnum.EndOffset()); dpEnum.NextPosition(); Assert.AreEqual(4, dpEnum.StartOffset()); Assert.AreEqual(8, dpEnum.EndOffset()); dpEnum.NextPosition(); Assert.AreEqual(8, dpEnum.StartOffset()); Assert.AreEqual(12, dpEnum.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); Assert.IsNull(termsEnum.Next()); r.Dispose(); dir.Dispose(); }
/* Run one indexer and 2 searchers against single index as stress test. */ public virtual void RunTest(Directory directory) { TimedThread[] threads = new TimedThread[4]; IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7); ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3; IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random()); // Establish a base index of 100 docs: for (int i = 0; i < 100; i++) { Documents.Document d = new Documents.Document(); d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES)); d.Add(NewTextField("contents", English.IntToEnglish(i), Field.Store.NO)); if ((i - 1) % 7 == 0) { writer.Commit(); } writer.AddDocument(d); } writer.Commit(); IndexReader r = DirectoryReader.Open(directory); Assert.AreEqual(100, r.NumDocs); r.Dispose(); IndexerThread indexerThread = new IndexerThread(writer, threads); threads[0] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(writer, threads); threads[1] = indexerThread2; indexerThread2.Start(); SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[2] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[3] = searcherThread2; searcherThread2.Start(); indexerThread.Join(); indexerThread2.Join(); searcherThread1.Join(); searcherThread2.Join(); writer.Dispose(); Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer"); Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2"); Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1"); Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2"); //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
public virtual void TestForceMergeDeletes() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)); Document document = new Document(); FieldType customType = new FieldType(); customType.Stored = true; FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED); customType1.Tokenized = false; customType1.StoreTermVectors = true; customType1.StoreTermVectorPositions = true; customType1.StoreTermVectorOffsets = true; Field idField = NewStringField("id", "", Field.Store.NO); document.Add(idField); Field storedField = NewField("stored", "stored", customType); document.Add(storedField); Field termVectorField = NewField("termVector", "termVector", customType1); document.Add(termVectorField); for (int i = 0; i < 10; i++) { idField.StringValue = "" + i; writer.AddDocument(document); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); Assert.AreEqual(10, ir.MaxDoc); Assert.AreEqual(10, ir.NumDocs); ir.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, dontMergeConfig); writer.DeleteDocuments(new Term("id", "0")); writer.DeleteDocuments(new Term("id", "7")); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(8, ir.NumDocs); ir.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Assert.AreEqual(8, writer.NumDocs()); Assert.AreEqual(10, writer.MaxDoc); writer.ForceMergeDeletes(); Assert.AreEqual(8, writer.NumDocs()); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(8, ir.MaxDoc); Assert.AreEqual(8, ir.NumDocs); ir.Dispose(); dir.Dispose(); }
public virtual void TestAfter() { // create an index Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); long now = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; Document doc = new Document(); // add time that is in the future doc.Add(NewStringField("datefield", DateTools.TimeToString(now + 888888, DateTools.Resolution.MILLISECOND), Field.Store.YES)); doc.Add(NewTextField("body", "Today is a very sunny day in New York City", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // filter that should preserve matches // DateFilter df1 = DateFilter.After("datefield", now); TermRangeFilter df1 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), true, false); // filter that should discard matches // DateFilter df2 = DateFilter.After("datefield", now + 999999); TermRangeFilter df2 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999999, DateTools.Resolution.MILLISECOND), false, true); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForthis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); reader.Dispose(); indexStore.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); IList<long?> numbers = new List<long?>(); IList<BytesRef> binary = new List<BytesRef>(); IList<BytesRef> sorted = new List<BytesRef>(); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Document d = new Document(); long number = Random().NextLong(); d.Add(new NumericDocValuesField("number", number)); BytesRef bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); d.Add(new BinaryDocValuesField("bytes", bytes)); binary.Add(bytes); bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); d.Add(new SortedDocValuesField("sorted", bytes)); sorted.Add(bytes); w.AddDocument(d); numbers.Add(number); } w.ForceMerge(1); IndexReader r = w.Reader; w.Dispose(); Assert.AreEqual(1, r.Leaves.Count); AtomicReader ar = (AtomicReader)r.Leaves[0].Reader; int numThreads = TestUtil.NextInt(Random(), 2, 5); IList<ThreadClass> threads = new List<ThreadClass>(); CountDownLatch startingGun = new CountDownLatch(1); for (int t = 0; t < numThreads; t++) { Random threadRandom = new Random(Random().Next()); ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, numbers, binary, sorted, numDocs, ar, startingGun, threadRandom); thread.Start(); threads.Add(thread); } startingGun.countDown(); foreach (ThreadClass thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); DirA = NewDirectory(); DirB = NewDirectory(); IndexWriter wA = new IndexWriter(DirA, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); IndexWriter wB = new IndexWriter(DirB, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO)); doc.Add(NewStringField("theDouble", theDouble.ToString("R"), Field.Store.NO)); theDouble--; doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO)); doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO)); doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO)); doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO)); if (0 == i % 3) { wA.AddDocument(doc); } else { wB.AddDocument(doc); } } wA.Dispose(); wB.Dispose(); DirectoryReader rA = DirectoryReader.Open(DirA); ReaderA = SlowCompositeReaderWrapper.Wrap(rA); ReaderAclone = SlowCompositeReaderWrapper.Wrap(rA); ReaderA = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirA)); ReaderB = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirB)); ReaderX = SlowCompositeReaderWrapper.Wrap(new MultiReader(ReaderA, ReaderB)); // LUCENENET specific.Ensure we have an infostream attached to the default FieldCache // when running the tests. In Java, this was done in the Core.Search.TestFieldCache.TestInfoStream() // method (which polluted the state of these tests), but we need to make the tests self-contained // so they can be run correctly regardless of order. Not setting the InfoStream skips an execution // path within these tests, so we should do it to make sure we test all of the code. FieldCache.DEFAULT.InfoStream = new StringWriter(); }
public virtual void TestMixedMerge() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy(2))); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese d.Add(NewField("f1", "this field has norms", customType)); d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO)); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.Add(NewStringField("field", Convert.ToString(i), Field.Store.NO)); doc.Add(NewStringField("field2", Convert.ToString(i % 2 == 0), Field.Store.NO)); iw.AddDocument(doc); } Reader = iw.Reader; iw.Dispose(); }
public virtual void TestBinaryFieldInIndex() { FieldType ft = new FieldType(); ft.Stored = true; IndexableField binaryFldStored = new StoredField("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(BinaryValStored)); IndexableField stringFldStored = new Field("stringStored", BinaryValStored, ft); Documents.Document doc = new Documents.Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /// <summary> /// test for field count </summary> Assert.AreEqual(2, doc.Fields.Count); /// <summary> /// add the doc to a ram index </summary> Directory dir = NewDirectory(); Random r = Random(); RandomIndexWriter writer = new RandomIndexWriter(r, dir); writer.AddDocument(doc); /// <summary> /// open a reader and fetch the document </summary> IndexReader reader = writer.Reader; Documents.Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /// <summary> /// fetch the binary stored field and compare it's content with the original one </summary> BytesRef bytes = docFromReader.GetBinaryValue("binaryStored"); Assert.IsNotNull(bytes); string binaryFldStoredTest = Encoding.UTF8.GetString((byte[])(Array)bytes.Bytes).Substring(bytes.Offset, bytes.Length); //new string(bytes.Bytes, bytes.Offset, bytes.Length, IOUtils.CHARSET_UTF_8); Assert.IsTrue(binaryFldStoredTest.Equals(BinaryValStored)); /// <summary> /// fetch the string field and compare it's content with the original one </summary> string stringFldStoredTest = docFromReader.Get("stringStored"); Assert.IsTrue(stringFldStoredTest.Equals(BinaryValStored)); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestInfoStreamGetsFieldName() { Directory dir = NewDirectory(); IndexWriter writer; IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer()); ByteArrayOutputStream infoBytes = new ByteArrayOutputStream(); StreamWriter infoPrintStream = new StreamWriter(infoBytes, Encoding.UTF8); PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream); c.InfoStream = printStreamInfoStream; writer = new IndexWriter(dir, c); Document doc = new Document(); doc.Add(NewField("distinctiveFieldName", "aaa ", StoredTextType)); try { writer.AddDocument(doc); Assert.Fail("Failed to fail."); } catch (BadNews badNews) { infoPrintStream.Flush(); string infoStream = Encoding.UTF8.GetString(infoBytes.GetBuffer()); Assert.IsTrue(infoStream.Contains("distinctiveFieldName")); } writer.Dispose(); dir.Dispose(); }
public virtual void TestStartPositions() { Directory dir = NewDirectory(); // mimic StopAnalyzer CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer); Document doc = new Document(); doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO)); writer.AddDocument(doc2); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // user queries on "starts-with quick" SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); // user queries on "starts-with the quick" SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2); sfq = new SpanNotQuery(include, sfq); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; Field f = NewField("foo", "this is a test test", ft); doc.Add(f); for (int i = 0; i < 100; i++) { w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS); while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(2, de.Freq()); } reader.Dispose(); dir.Dispose(); }
public virtual void TestBackToTheFuture() { Directory dir = NewDirectory(); IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); Document doc = new Document(); doc.Add(NewStringField("foo", "bar", Field.Store.NO)); iw.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("foo", "baz", Field.Store.NO)); iw.AddDocument(doc); DirectoryReader r1 = DirectoryReader.Open(iw, true); iw.DeleteDocuments(new Term("foo", "baz")); DirectoryReader r2 = DirectoryReader.Open(iw, true); FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r2), "foo"); SortedSetDocValues v = FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r1), "foo"); Assert.AreEqual(2, v.ValueCount); v.Document = 1; Assert.AreEqual(1, v.NextOrd()); iw.Dispose(); r1.Dispose(); r2.Dispose(); dir.Dispose(); }
public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
public virtual void TestPrefixQuery_Mem() { Directory directory = NewDirectory(); string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("category", categories[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "One in /Computers/Mac"); query = new PrefixQuery(new Term("category", "")); Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category"); Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "everything"); writer.Dispose(); reader.Dispose(); directory.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestConstantScoreQueryAndFilter() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(NewStringField("field", "a", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "b", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b")))); Query query = new ConstantScoreQuery(filterB); IndexSearcher s = NewSearcher(r); Assert.AreEqual(1, s.Search(query, filterB, 1).TotalHits); // Query for field:b, Filter field:b Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a")))); query = new ConstantScoreQuery(filterA); Assert.AreEqual(0, s.Search(query, filterB, 1).TotalHits); // Query field:b, Filter field:a r.Dispose(); d.Dispose(); }
public override void SetUp() { base.SetUp(); // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.YES); doc.Add(field); Terms = new SortedSet<BytesRef>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; Terms.Add(new BytesRef(s)); writer.AddDocument(doc); } TermsAutomaton = BasicAutomata.MakeStringUnion(Terms); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestAddIndexes() { Directory dir1 = NewDirectory(); Directory dir2 = NewDirectory(); IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.Add(new TextField("f1", "first field", Field.Store.YES)); d1.Add(new TextField("f2", "second field", Field.Store.YES)); writer.AddDocument(d1); writer.Dispose(); writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d2 = new Document(); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; d2.Add(new TextField("f2", "second field", Field.Store.YES)); d2.Add(new Field("f1", "first field", customType2)); d2.Add(new TextField("f3", "third field", Field.Store.YES)); d2.Add(new TextField("f4", "fourth field", Field.Store.YES)); writer.AddDocument(d2); writer.Dispose(); writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); writer.AddIndexes(dir2); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir1); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); // make sure the ordering of the "external" segment is preserved Assert.AreEqual("f2", fis2.FieldInfo(0).Name); Assert.AreEqual("f1", fis2.FieldInfo(1).Name); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f4", fis2.FieldInfo(3).Name); dir1.Dispose(); dir2.Dispose(); }
public virtual void TestBinaryField() { Documents.Document doc = new Documents.Document(); FieldType ft = new FieldType(); ft.Stored = true; IndexableField stringFld = new Field("string", BinaryVal, ft); IndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8)); IndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8)); doc.Add(stringFld); doc.Add(binaryFld); Assert.AreEqual(2, doc.Fields.Count); Assert.IsTrue(binaryFld.BinaryValue != null); Assert.IsTrue(binaryFld.FieldType.Stored); Assert.IsFalse(binaryFld.FieldType.Indexed); string binaryTest = doc.GetBinaryValue("binary").Utf8ToString(); Assert.IsTrue(binaryTest.Equals(BinaryVal)); string stringTest = doc.Get("string"); Assert.IsTrue(binaryTest.Equals(stringTest)); doc.Add(binaryFld2); Assert.AreEqual(3, doc.Fields.Count); BytesRef[] binaryTests = doc.GetBinaryValues("binary"); Assert.AreEqual(2, binaryTests.Length); binaryTest = binaryTests[0].Utf8ToString(); string binaryTest2 = binaryTests[1].Utf8ToString(); Assert.IsFalse(binaryTest.Equals(binaryTest2)); Assert.IsTrue(binaryTest.Equals(BinaryVal)); Assert.IsTrue(binaryTest2.Equals(BinaryVal2)); doc.RemoveField("string"); Assert.AreEqual(2, doc.Fields.Count); doc.RemoveFields("binary"); Assert.AreEqual(0, doc.Fields.Count); }
public virtual void TestFixedBinary() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); Document doc = new Document(); var bytes = new byte[4]; BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 24); bytes[1] = (byte)(i >> 16); bytes[2] = (byte)(i >> 8); bytes[3] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 24); bytes[1] = (byte)(expectedValue >> 16); bytes[2] = (byte)(expectedValue >> 8); bytes[3] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
private Document Adoc(string[] vals) { Document doc = new Document(); for (int i = 0; i < vals.Length - 2; i += 2) { doc.Add(NewTextField(vals[i], vals[i + 1], Field.Store.YES)); } return doc; }
public virtual void TestSumDocFreq_Mem() { int numDocs = AtLeast(500); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); Field id = NewStringField("id", "", Field.Store.NO); Field field1 = NewTextField("foo", "", Field.Store.NO); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(id); doc.Add(field1); doc.Add(field2); for (int i = 0; i < numDocs; i++) { id.StringValue = "" + i; char ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z'); char ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z'); field1.StringValue = "" + ch1 + " " + ch2; ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z'); ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z'); field2.StringValue = "" + ch1 + " " + ch2; writer.AddDocument(doc); } IndexReader ir = writer.Reader; AssertSumDocFreq(ir); ir.Dispose(); int numDeletions = AtLeast(20); for (int i = 0; i < numDeletions; i++) { writer.DeleteDocuments(new Term("id", "" + Random().Next(numDocs))); } writer.ForceMerge(1); writer.Dispose(); ir = DirectoryReader.Open(dir); AssertSumDocFreq(ir); ir.Dispose(); dir.Dispose(); }
private void CreateIndex(int numHits) { int numDocs = 500; Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); Directory directory = new SeekCountingDirectory(this, new RAMDirectory()); // note: test explicitly disables payloads IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(false))); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); string content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = this.Term1 + " " + this.Term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = this.Term1 + " " + this.Term1; } else { // add a document that contains term2 but not term 1 content = this.Term3 + " " + this.Term2; } doc.Add(NewTextField(this.Field, content, Documents.Field.Store.YES)); writer.AddDocument(doc); } // make sure the index has only a single segment writer.ForceMerge(1); writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(directory)); this.Searcher = NewSearcher(reader); }
public static void MakeIndex() { Dir = new RAMDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); // make sure we have more than one segment occationally int num = AtLeast(31); for (int i = 0; i < num; i++) { Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("f", "a b c d b c d c d d", Field.Store.NO)); w.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("f", "a b c d", Field.Store.NO)); w.AddDocument(doc); } s = NewSearcher(w.Reader); w.Dispose(); }
public virtual void TestTermDocsEnum() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Documents.Document d = new Documents.Document(); d.Add(NewStringField("f", "j", Field.Store.NO)); w.AddDocument(d); w.Commit(); w.AddDocument(d); IndexReader r = w.GetReader(); w.Dispose(); DocsEnum de = MultiFields.GetTermDocsEnum(r, null, "f", new BytesRef("j")); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(1, de.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc()); r.Dispose(); dir.Dispose(); }
public virtual void TestSeparateEnums() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Documents.Document d = new Documents.Document(); d.Add(NewStringField("f", "j", Field.Store.NO)); w.AddDocument(d); w.Commit(); w.AddDocument(d); IndexReader r = w.GetReader(); w.Dispose(); DocsEnum d1 = TestUtil.Docs(Random, r, "f", new BytesRef("j"), null, null, DocsFlags.NONE); DocsEnum d2 = TestUtil.Docs(Random, r, "f", new BytesRef("j"), null, null, DocsFlags.NONE); Assert.AreEqual(0, d1.NextDoc()); Assert.AreEqual(0, d2.NextDoc()); r.Dispose(); dir.Dispose(); }
public virtual void TestMaxMergeCount() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int maxMergeCount = TestUtil.NextInt32(Random, 1, 5); int maxMergeThreads = TestUtil.NextInt32(Random, 1, maxMergeCount); CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount); AtomicInt32 runningMergeCount = new AtomicInt32(0); AtomicBoolean failed = new AtomicBoolean(); if (VERBOSE) { Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads); } ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed); cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads); iwc.SetMergeScheduler(cms); iwc.SetMaxBufferedDocs(2); TieredMergePolicy tmp = new TieredMergePolicy(); iwc.SetMergePolicy(tmp); tmp.MaxMergeAtOnce = 2; tmp.SegmentsPerTier = 2; IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED)); while (enoughMergesWaiting.CurrentCount != 0 && !failed) { for (int i = 0; i < 10; i++) { w.AddDocument(doc); } } w.Dispose(false); dir.Dispose(); }
public override void BeforeClass() // LUCENENET specific - renamed from MakeIndex() to ensure calling order { base.BeforeClass(); Dir = new RAMDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); // make sure we have more than one segment occationally int num = AtLeast(31); for (int i = 0; i < num; i++) { Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("f", "a b c d b c d c d d", Field.Store.NO)); w.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("f", "a b c d", Field.Store.NO)); w.AddDocument(doc); } s = NewSearcher(w.Reader); w.Dispose(); }
public virtual void TestNoOrds() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; doc.Add(new Field("foo", "this is a test", ft)); iw.AddDocument(doc); AtomicReader ir = GetOnlySegmentReader(iw.GetReader()); Terms terms = ir.GetTermVector(0, "foo"); Assert.IsNotNull(terms); TermsEnum termsEnum = terms.GetEnumerator(); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this"))); try { var _ = termsEnum.Ord; Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } try { termsEnum.SeekExact(0); Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.Terms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Size()); TermsEnum tvsEnum = tvs.Iterator(null); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next()); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv")) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next()); Assert.IsNull(tvsEnum.Next()); } r.Dispose(); dir.Dispose(); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Documents.Document doc = new Documents.Document(); #pragma warning disable 612, 618 doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); #pragma warning restore 612, 618 w.AddDocument(doc); IndexReader r = w.GetReader(); w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.GetTerms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Count); TermsEnum tvsEnum = tvs.GetEnumerator(); Assert.IsTrue(tvsEnum.MoveNext()); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Term); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv", StringComparison.Ordinal)) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.IsTrue(tvsEnum.MoveNext()); Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Term); Assert.IsFalse(tvsEnum.MoveNext()); } r.Dispose(); dir.Dispose(); }
public virtual void TestRandomPhrases() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy())); IList <IList <string> > docs = new List <IList <string> >(); Documents.Document d = new Documents.Document(); Field f = NewTextField("f", "", Field.Store.NO); d.Add(f); Random r = Random; int NUM_DOCS = AtLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks int termCount = TestUtil.NextInt32(Random, 4097, 8200); IList <string> doc = new List <string>(); StringBuilder sb = new StringBuilder(); while (doc.Count < termCount) { if (r.Next(5) == 1 || docs.Count == 0) { // make new non-empty-string term string term; while (true) { term = TestUtil.RandomUnicodeString(r); if (term.Length > 0) { break; } } Exception priorException = null; // LUCENENET: No need to cast to IOExcpetion TokenStream ts = analyzer.GetTokenStream("ignore", new StringReader(term)); try { ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { string text = termAttr.ToString(); doc.Add(text); sb.Append(text).Append(' '); } ts.End(); } catch (Exception e) when(e.IsIOException()) { priorException = e; } finally { IOUtils.DisposeWhileHandlingException(priorException, ts); } } else { // pick existing sub-phrase IList <string> lastDoc = docs[r.Next(docs.Count)]; int len = TestUtil.NextInt32(r, 1, 10); int start = r.Next(lastDoc.Count - len); for (int k = start; k < start + len; k++) { string t = lastDoc[k]; doc.Add(t); sb.Append(t).Append(' '); } } } docs.Add(doc); f.SetStringValue(sb.ToString()); w.AddDocument(d); } IndexReader reader = w.GetReader(); IndexSearcher s = NewSearcher(reader); w.Dispose(); // now search int num = AtLeast(10); for (int i = 0; i < num; i++) { int docID = r.Next(docs.Count); IList <string> doc = docs[docID]; int numTerm = TestUtil.NextInt32(r, 2, 20); int start = r.Next(doc.Count - numTerm); PhraseQuery pq = new PhraseQuery(); StringBuilder sb = new StringBuilder(); for (int t = start; t < start + numTerm; t++) { pq.Add(new Term("f", doc[t])); sb.Append(doc[t]).Append(' '); } TopDocs hits = s.Search(pq, NUM_DOCS); bool found = false; for (int j = 0; j < hits.ScoreDocs.Length; j++) { if (hits.ScoreDocs[j].Doc == docID) { found = true; break; } } Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start); } reader.Dispose(); dir.Dispose(); }
public virtual void TestPhraseQueryInConjunctionScorer() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "foobar", Field.Store.YES)); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, termQuery, searcher); reader.Dispose(); writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE)); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES)); writer.AddDocument(doc); reader = writer.GetReader(); writer.Dispose(); searcher = NewSearcher(reader); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, booleanQuery, searcher); reader.Dispose(); directory.Dispose(); }
public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { if ("Lucene3x".Equals(Codec.Default.Name)) { throw new Exception("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } dir.CheckIndexOnClose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random(), TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = Environment.TickCount; w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec"); } savedTerms = ts.SavedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms)); Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public virtual void Test2BTerms_Mem() { if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) { throw RuntimeException.Create("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } dir.CheckIndexOnDispose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random, TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } savedTerms = ts.savedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms is null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public virtual void TestStressAdvance_Mem() { for (int iter = 0; iter < 3; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); HashSet <int> aDocs = new HashSet <int>(); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int num = AtLeast(4097); if (VERBOSE) { Console.WriteLine("\nTEST: numDocs=" + num); } for (int id = 0; id < num; id++) { if (Random().Next(4) == 3) { f.StringValue = "a"; aDocs.Add(id); } else { f.StringValue = "b"; } idField.StringValue = "" + id; w.AddDocument(doc); if (VERBOSE) { Console.WriteLine("\nTEST: doc upto " + id); } } w.ForceMerge(1); IList <int> aDocIDs = new List <int>(); IList <int> bDocIDs = new List <int>(); DirectoryReader r = w.Reader; int[] idToDocID = new int[r.MaxDoc]; for (int docID = 0; docID < idToDocID.Length; docID++) { int id = Convert.ToInt32(r.Document(docID).Get("id")); if (aDocs.Contains(id)) { aDocIDs.Add(docID); } else { bDocIDs.Add(docID); } } TermsEnum te = GetOnlySegmentReader(r).Fields.Terms("field").Iterator(null); DocsEnum de = null; for (int iter2 = 0; iter2 < 10; iter2++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2); } Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a"))); de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE); TestOne(de, aDocIDs); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b"))); de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE); TestOne(de, bDocIDs); } w.Dispose(); r.Dispose(); dir.Dispose(); } }
/* * Run one indexer and 2 searchers against single index as * stress test. */ public virtual void RunTest(Directory directory) { TimedThread[] threads = new TimedThread[4]; IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7); ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3; IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random()); // Establish a base index of 100 docs: for (int i = 0; i < 100; i++) { Documents.Document d = new Documents.Document(); d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES)); d.Add(NewTextField("contents", English.IntToEnglish(i), Field.Store.NO)); if ((i - 1) % 7 == 0) { writer.Commit(); } writer.AddDocument(d); } writer.Commit(); IndexReader r = DirectoryReader.Open(directory); Assert.AreEqual(100, r.NumDocs); r.Dispose(); IndexerThread indexerThread = new IndexerThread(writer, threads); threads[0] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(writer, threads); threads[1] = indexerThread2; indexerThread2.Start(); SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[2] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[3] = searcherThread2; searcherThread2.Start(); indexerThread.Join(); indexerThread2.Join(); searcherThread1.Join(); searcherThread2.Join(); writer.Dispose(); Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer"); Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2"); Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1"); Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2"); //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
public virtual void TestSurrogatesOrder() { Directory dir = NewDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); config.Codec = new PreFlexRWCodec(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, config); int numField = TestUtil.NextInt32(Random, 2, 5); int uniqueTermCount = 0; int tc = 0; var fieldTerms = new JCG.List <Term>(); for (int f = 0; f < numField; f++) { string field = "f" + f; int numTerms = AtLeast(200); ISet <string> uniqueTerms = new JCG.HashSet <string>(); for (int i = 0; i < numTerms; i++) { string term = GetRandomString(Random) + "_ " + (tc++); uniqueTerms.Add(term); fieldTerms.Add(new Term(field, term)); Documents.Document doc = new Documents.Document(); doc.Add(NewStringField(field, term, Field.Store.NO)); w.AddDocument(doc); } uniqueTermCount += uniqueTerms.Count; } IndexReader reader = w.GetReader(); if (Verbose) { fieldTerms.Sort(termAsUTF16Comparer); Console.WriteLine("\nTEST: UTF16 order"); foreach (Term t in fieldTerms) { Console.WriteLine(" " + ToHexString(t)); } } // sorts in code point order: fieldTerms.Sort(); if (Verbose) { Console.WriteLine("\nTEST: codepoint order"); foreach (Term t in fieldTerms) { Console.WriteLine(" " + ToHexString(t)); } } Term[] fieldTermsArray = fieldTerms.ToArray(); //SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms); //FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1)); //Assert.IsNotNull(fields); DoTestStraightEnum(fieldTerms, reader, uniqueTermCount); DoTestSeekExists(Random, fieldTerms, reader); DoTestSeekDoesNotExist(Random, numField, fieldTerms, fieldTermsArray, reader); reader.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { int num = AtLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); // we can do this because we use NoMergePolicy (and dont merge to "nothing") w.KeepFullyDeletedSegments = true; IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >(); ISet <int?> deleted = new JCG.HashSet <int?>(); IList <BytesRef> terms = new List <BytesRef>(); int numDocs = TestUtil.NextInt32(Random, 1, 100 * RANDOM_MULTIPLIER); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field id = NewStringField("id", "", Field.Store.NO); doc.Add(id); bool onlyUniqueTerms = Random.NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && Random.NextBoolean() && terms.Count > 0) { // re-use existing term BytesRef term = terms[Random.Next(terms.Count)]; docs[term].Add(i); f.SetStringValue(term.Utf8ToString()); } else { string s = TestUtil.RandomUnicodeString(Random, 10); BytesRef term = new BytesRef(s); if (!docs.TryGetValue(term, out IList <int?> docsTerm)) { docs[term] = docsTerm = new List <int?>(); } docsTerm.Add(i); terms.Add(term); uniqueTerms.Add(term); f.SetStringValue(s); } id.SetStringValue("" + i); w.AddDocument(doc); if (Random.Next(4) == 1) { w.Commit(); } if (i > 0 && Random.Next(20) == 1) { int delID = Random.Next(i); deleted.Add(delID); w.DeleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { Console.WriteLine("TEST: delete " + delID); } } } if (VERBOSE) { List <BytesRef> termsList = new List <BytesRef>(uniqueTerms); #pragma warning disable 612, 618 termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 Console.WriteLine("TEST: terms in UTF16 order:"); foreach (BytesRef b in termsList) { Console.WriteLine(" " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b); foreach (int docID in docs[b]) { if (deleted.Contains(docID)) { Console.WriteLine(" " + docID + " (deleted)"); } else { Console.WriteLine(" " + docID); } } } } IndexReader reader = w.GetReader(); w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } IBits liveDocs = MultiFields.GetLiveDocs(reader); foreach (int delDoc in deleted) { Assert.IsFalse(liveDocs.Get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms[Random.Next(terms.Count)]; if (VERBOSE) { Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term); } DocsEnum docsEnum = TestUtil.Docs(Random, reader, "field", term, liveDocs, null, DocsFlags.NONE); Assert.IsNotNull(docsEnum); foreach (int docID in docs[term]) { if (!deleted.Contains(docID)) { Assert.AreEqual(docID, docsEnum.NextDoc()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } reader.Dispose(); dir.Dispose(); } }
public virtual void TestFlushExceptions() { #if NETCOREAPP2_0 fail("LUCENENET TODO: Causing fatal crashes intermittently on NETCOREAPP2.0"); #endif MockDirectoryWrapper directory = NewMockDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(this); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2)); Document doc = new Document(); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int extraCount = 0; for (int i = 0; i < 10; i++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + i); } for (int j = 0; j < 20; j++) { idField.SetStringValue(Convert.ToString(i * 20 + j)); writer.AddDocument(doc); } // must cycle here because sometimes the merge flushes // the doc we just added and so there's nothing to // flush, and we don't hit the exception while (true) { writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(true, true); if (failure.hitExc) { Assert.Fail("failed to hit IOException"); } extraCount++; } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine(ioe.StackTrace); } failure.ClearDoFail(); break; } } Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(directory); Assert.AreEqual(200 + extraCount, reader.NumDocs); reader.Dispose(); directory.Dispose(); }
public override void BinaryField(FieldInfo fieldInfo, byte[] value) { doc.Add(new StoredField(fieldInfo.Name, value)); }