public override PostingsFormat GetPostingsFormatForField(string name) { PostingsFormat codec; if (!previousMappings.TryGetValue(name, out codec) || codec == null) { codec = formats[Math.Abs(perFieldSeed ^ name.GetHashCode()) % formats.Count]; if (codec is SimpleTextPostingsFormat && perFieldSeed % 5 != 0) { // make simpletext rarer, choose again codec = formats[Math.Abs(perFieldSeed ^ name.ToUpperInvariant().GetHashCode()) % formats.Count]; } previousMappings[name] = codec; // Safety: Debug.Assert(previousMappings.Count < 10000, "test went insane"); } //if (LuceneTestCase.VERBOSE) //{ Console.WriteLine("RandomCodec.GetPostingsFormatForField(\"" + name + "\") returned '" + codec.Name + "' with underlying type '" + codec.GetType().ToString() + "'."); //} return(codec); }
public void TestRandomChains_() { int numIterations = AtLeast(20); Random random = Random; for (int i = 0; i < numIterations; i++) { MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next()); if (VERBOSE) { Console.WriteLine("Creating random analyzer:" + a); } try { CheckRandomData(random, a, 500 * RANDOM_MULTIPLIER, 20, false, false /* We already validate our own offsets... */); } catch (Exception /*e*/) { Console.WriteLine("Exception from random analyzer: " + a); throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } }
public virtual void TestBooleanSpanQuery() { int hits = 0; Directory directory = NewDirectory(); Analyzer indexerAnalyzer = new MockAnalyzer(Random()); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); IndexWriter writer = new IndexWriter(directory, config); string FIELD = "content"; Document d = new Document(); d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher searcher = NewSearcher(indexReader); DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f); SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); query.Add(sq1); query.Add(sq2); TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(query, collector); hits = collector.GetTopDocs().ScoreDocs.Length; foreach (ScoreDoc scoreDoc in collector.GetTopDocs().ScoreDocs) { Console.WriteLine(scoreDoc.Doc); } indexReader.Dispose(); Assert.AreEqual(hits, 1); directory.Dispose(); }
public override bool IncrementToken() { ClearAttributes(); if (TokenCount >= TokensPerDoc) { return(false); } int shift = 32; for (int i = 0; i < 5; i++) { Bytes.Bytes[i] = unchecked ((byte)((TermCounter >> shift) & 0xFF)); shift -= 8; } TermCounter++; TokenCount++; if (--NextSave == 0) { SavedTerms.Add(BytesRef.DeepCopyOf(Bytes)); Console.WriteLine("TEST: save term=" + Bytes); NextSave = TestUtil.NextInt(Random, 500000, 1000000); } return(true); }
public void TestRandomChainsWithLargeStrings() { int numIterations = AtLeast(20); Random random = Random(); for (int i = 0; i < numIterations; i++) { MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next()); if (VERBOSE) { Console.WriteLine("Creating random analyzer:" + a); } try { CheckRandomData(random, a, 50 * RANDOM_MULTIPLIER, 128, false, false /* We already validate our own offsets... */); } catch (Exception e) { Console.WriteLine("Exception from random analyzer: " + a); throw e; } } }
public override void Warm(AtomicReader reader) { if (VERBOSE) { Console.WriteLine("TEST: now warm merged reader=" + reader); } #if FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE outerInstance.warmed.AddOrUpdate(((SegmentReader)reader).core, true); #else outerInstance.warmed[((SegmentReader)reader).core] = true; #endif int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; int sum = 0; int inc = Math.Max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs == null || liveDocs.Get(docID)) { Document doc = reader.Document(docID); sum += doc.Fields.Count; } } IndexSearcher searcher = #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION outerInstance. #endif NewSearcher(reader); sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits; if (VERBOSE) { Console.WriteLine("TEST: warm visited " + sum + " fields"); } }
public override void Run() { for (int iter = 0; iter < Iters && !Failed.Get(); iter++) { //final int x = Random().nextInt(5); int x = Random.Next(3); try { switch (x) { case 0: RollbackLock.@Lock(); if (VERBOSE) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now rollback"); } try { WriterRef.Value.Rollback(); if (VERBOSE) { Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": rollback done; now open new writer"); } WriterRef.Value = new IndexWriter(d, OuterInstance.NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); } finally { RollbackLock.Unlock(); } break; case 1: CommitLock.@Lock(); if (VERBOSE) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now commit"); } try { if (Random.NextBoolean()) { WriterRef.Value.PrepareCommit(); } WriterRef.Value.Commit(); } catch (ObjectDisposedException) { // ok } catch (NullReferenceException) { // ok } finally { CommitLock.Unlock(); } break; case 2: if (VERBOSE) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now add"); } try { WriterRef.Value.AddDocument(Docs.NextDoc()); } catch (ObjectDisposedException) { // ok } catch (System.NullReferenceException) { // ok } catch (InvalidOperationException) { // ok } break; } } catch (Exception t) { Failed.Set(true); throw new Exception(t.Message, t); } } }
public virtual void TestCloseWithThreads([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { int NUM_THREADS = 3; int numIterations = TEST_NIGHTLY ? 7 : 3; for (int iter = 0; iter < numIterations; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter); } Directory dir = NewDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(10) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(4)); IndexWriter writer = new IndexWriter(dir, config); var scheduler = config.mergeScheduler as IConcurrentMergeScheduler; if (scheduler != null) { scheduler.SetSuppressExceptions(); } IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, false, NewField) // LUCENENET NOTE - ConcurrentMergeScheduler // used to take too long for this test to index a single document // so, increased the time from 200 to 300 ms. // But it has now been restored to 200 ms like Lucene. { TimeToRunInMilliseconds = 200 }; } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } bool done = false; while (!done) { Thread.Sleep(100); for (int i = 0; i < NUM_THREADS; i++) // only stop when at least one thread has added a doc { if (threads[i].AddCount > 0) { done = true; break; } else if (!threads[i].IsAlive) { Assert.Fail("thread failed before indexing a single document"); } } } if (VERBOSE) { Console.WriteLine("\nTEST: now close"); } writer.Dispose(false); // Make sure threads that are adding docs are not hung: for (int i = 0; i < NUM_THREADS; i++) { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].Join(); if (threads[i].IsAlive) { Assert.Fail("thread seems to be hung"); } } // Quick test to make sure index is not corrupt: IndexReader reader = DirectoryReader.Open(dir); DocsEnum tdocs = TestUtil.Docs(Random, reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0); int count = 0; while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } Assert.IsTrue(count > 0); reader.Dispose(); dir.Dispose(); } }
public virtual void TestParsingAndSearching() { string field = "content"; string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" }; // queries that should find all docs Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) }; // queries that should find no docs Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) }; PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } }; WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } }; // prepare the index Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, docs[i], Field.Store.NO)); iw.AddDocument(doc); } iw.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); // test queries that must find all foreach (Query q in matchAll) { if (VERBOSE) { Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none foreach (Query q in matchNone) { if (VERBOSE) { Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); } // thest the prefi queries find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { Query q = matchOneDocPrefix[i][j]; if (VERBOSE) { Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } // test the wildcard queries find only one doc for (int i = 0; i < matchOneDocWild.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { Query q = matchOneDocWild[i][j]; if (VERBOSE) { Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } reader.Dispose(); dir.Dispose(); }
public virtual void Test() { Random random = new Random(Random.Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); Directory d = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif LuceneTestCase.Random, d, analyzer); int numDocs = AtLeast(10); for (int docCount = 0; docCount < numDocs; docCount++) { w.AddDocument(docs.NextDoc()); } IndexReader r = w.GetReader(); w.Dispose(); List <BytesRef> terms = new List <BytesRef>(); TermsEnum termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null); BytesRef term; while ((term = termsEnum.Next()) != null) { terms.Add(BytesRef.DeepCopyOf(term)); } if (VERBOSE) { Console.WriteLine("TEST: " + terms.Count + " terms"); } int upto = -1; int iters = AtLeast(200); for (int iter = 0; iter < iters; iter++) { bool isEnd; if (upto != -1 && LuceneTestCase.Random.NextBoolean()) { // next if (VERBOSE) { Console.WriteLine("TEST: iter next"); } isEnd = termsEnum.Next() == null; upto++; if (isEnd) { if (VERBOSE) { Console.WriteLine(" end"); } Assert.AreEqual(upto, terms.Count); upto = -1; } else { if (VERBOSE) { Console.WriteLine(" got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString()); } Assert.IsTrue(upto < terms.Count); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { BytesRef target; string exists; if (LuceneTestCase.Random.NextBoolean()) { // likely fake term if (LuceneTestCase.Random.NextBoolean()) { target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random)); } else { target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random)); } exists = "likely not"; } else { // real term target = terms[LuceneTestCase.Random.Next(terms.Count)]; exists = "yes"; } upto = terms.BinarySearch(target); if (LuceneTestCase.Random.NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists); } // seekCeil TermsEnum.SeekStatus status = termsEnum.SeekCeil(target); if (VERBOSE) { Console.WriteLine(" got " + status); } if (upto < 0) { upto = -(upto + 1); if (upto >= terms.Count) { Assert.AreEqual(TermsEnum.SeekStatus.END, status); upto = -1; } else { Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { if (VERBOSE) { Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists); } // seekExact bool result = termsEnum.SeekExact(target); if (VERBOSE) { Console.WriteLine(" got " + result); } if (upto < 0) { Assert.IsFalse(result); upto = -1; } else { Assert.IsTrue(result); Assert.AreEqual(target, termsEnum.Term); } } } } r.Dispose(); d.Dispose(); docs.Dispose(); }
public override SeekStatus SeekCeil(BytesRef term) { if (DEBUG_SURROGATES) { Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } skipNext = false; TermInfosReader tis = outerInstance.TermsDict; Term t0 = new Term(fieldInfo.Name, term); Debug.Assert(termEnum != null); tis.SeekEnum(termEnum, t0, false); Term t = termEnum.Term(); if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes)) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { Console.WriteLine(" seek exact match"); } current = t.Bytes; return(SeekStatus.FOUND); } else if (t == null || t.Field != internedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit EOF"); } // We hit EOF; try end-case surrogate dance: if we // find an E, try swapping in S, backwards: scratchTerm.CopyBytes(term); Debug.Assert(scratchTerm.Offset == 0); for (int i = scratchTerm.Length - 1; i >= 0; i--) { if (IsHighBMPChar(scratchTerm.Bytes, i)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + i + "; try seek"); } if (SeekToNonBMP(seekTermEnum, scratchTerm, i)) { scratchTerm.CopyBytes(seekTermEnum.Term().Bytes); outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false); newSuffixStart = 1 + i; DoPushes(); // Found a match // TODO: faster seek? current = termEnum.Term().Bytes; return(SeekStatus.NOT_FOUND); } } } if (DEBUG_SURROGATES) { Console.WriteLine(" seek END"); } current = null; return(SeekStatus.END); } else { // We found a non-exact but non-null term; this one // is fun -- just treat it like next, by pretending // requested term was prev: prevTerm.CopyBytes(term); if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text())); } BytesRef br = t.Bytes; Debug.Assert(br.Offset == 0); SetNewSuffixStart(term, br); SurrogateDance(); Term t2 = termEnum.Term(); if (t2 == null || t2.Field != internedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal)); current = null; return(SeekStatus.END); } else { current = t2.Bytes; Debug.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString())); return(SeekStatus.NOT_FOUND); } } }
// Pre-flex indices store terms in UTF16 sort order, but // certain queries require Unicode codepoint order; this // method carefully seeks around surrogates to handle // this impedance mismatch private void SurrogateDance() { if (!unicodeSortOrder) { return; } // We are invoked after TIS.next() (by UTF16 order) to // possibly seek to a different "next" (by unicode // order) term. // We scan only the "delta" from the last term to the // current term, in UTF8 bytes. We look at 1) the bytes // stripped from the prior term, and then 2) the bytes // appended to that prior term's prefix. // We don't care about specific UTF8 sequences, just // the "category" of the UTF16 character. Category S // is a high/low surrogate pair (it non-BMP). // Category E is any BMP char > UNI_SUR_LOW_END (and < // U+FFFF). Category A is the rest (any unicode char // <= UNI_SUR_HIGH_START). // The core issue is that pre-flex indices sort the // characters as ASE, while flex must sort as AES. So // when scanning, when we hit S, we must 1) seek // forward to E and enum the terms there, then 2) seek // back to S and enum all terms there, then 3) seek to // after E. Three different seek points (1, 2, 3). // We can easily detect S in UTF8: if a byte has // prefix 11110 (0xf0), then that byte and the // following 3 bytes encode a single unicode codepoint // in S. Similarly, we can detect E: if a byte has // prefix 1110111 (0xee), then that byte and the // following 2 bytes encode a single unicode codepoint // in E. // Note that this is really a recursive process -- // maybe the char at pos 2 needs to dance, but any // point in its dance, suddenly pos 4 needs to dance // so you must finish pos 4 before returning to pos // 2. But then during pos 4's dance maybe pos 7 needs // to dance, etc. However, despite being recursive, // we don't need to hold any state because the state // can always be derived by looking at prior term & // current term. // TODO: can we avoid this copy? if (termEnum.Term() == null || termEnum.Term().Field != internedFieldName) { scratchTerm.Length = 0; } else { scratchTerm.CopyBytes(termEnum.Term().Bytes); } if (DEBUG_SURROGATES) { Console.WriteLine(" dance"); Console.WriteLine(" prev=" + UnicodeUtil.ToHexString(prevTerm.Utf8ToString())); Console.WriteLine(" " + prevTerm.ToString()); Console.WriteLine(" term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString())); Console.WriteLine(" " + scratchTerm.ToString()); } // this code assumes TermInfosReader/SegmentTermEnum // always use BytesRef.offset == 0 Debug.Assert(prevTerm.Offset == 0); Debug.Assert(scratchTerm.Offset == 0); // Need to loop here because we may need to do multiple // pops, and possibly a continue in the end, ie: // // cont // pop, cont // pop, pop, cont // <nothing> // while (true) { if (DoContinue()) { break; } else { if (!DoPop()) { break; } } } if (DEBUG_SURROGATES) { Console.WriteLine(" finish bmp ends"); } DoPushes(); }
// FST is complete private void VerifyUnPruned(int inputMode, FST <T> fst) { FST <long?> fstLong; ISet <long?> validOutputs; long minLong = long.MaxValue; long maxLong = long.MinValue; if (doReverseLookup) { FST <long?> fstLong0 = fst as FST <long?>; fstLong = fstLong0; validOutputs = new HashSet <long?>(); foreach (InputOutput <T> pair in pairs) { long?output = pair.Output as long?; maxLong = Math.Max(maxLong, output.Value); minLong = Math.Min(minLong, output.Value); validOutputs.Add(output.Value); } } else { fstLong = null; validOutputs = null; } if (pairs.Count == 0) { Assert.IsNull(fst); return; } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now verify " + pairs.Count + " terms"); foreach (InputOutput <T> pair in pairs) { Assert.IsNotNull(pair); Assert.IsNotNull(pair.Input); Assert.IsNotNull(pair.Output); Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output)); } } Assert.IsNotNull(fst); // visit valid pairs in order -- make sure all words // are accepted, and FSTEnum's next() steps through // them correctly if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check valid terms/next()"); } { Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst); foreach (InputOutput <T> pair in pairs) { Int32sRef term = pair.Input; if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output)); } T output = Run(fst, term, null); Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted"); Assert.IsTrue(OutputsEqual(pair.Output, output)); // verify enum's next Int32sRefFSTEnum.InputOutput <T> t = fstEnum.Next(); Assert.IsNotNull(t); Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input)); Assert.IsTrue(OutputsEqual(pair.Output, t.Output)); } Assert.IsNull(fstEnum.Next()); } IDictionary <Int32sRef, T> termsMap = new Dictionary <Int32sRef, T>(); foreach (InputOutput <T> pair in pairs) { termsMap[pair.Input] = pair.Output; } if (doReverseLookup && maxLong > minLong) { // Do random lookups so we test null (output doesn't // exist) case: Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7)); Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7)); int num = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num; iter++) { long v = TestUtil.NextInt64(random, minLong, maxLong); Int32sRef input = Util.GetByOutput(fstLong, v); Assert.IsTrue(validOutputs.Contains(v) || input == null); } } // find random matching word and make sure it's valid if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify random accepted terms"); } Int32sRef scratch = new Int32sRef(10); int num_ = LuceneTestCase.AtLeast(random, 500); for (int iter = 0; iter < num_; iter++) { T output = RandomAcceptedWord(fst, scratch); Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid"); Assert.IsTrue(OutputsEqual(termsMap[scratch], output)); if (doReverseLookup) { //System.out.println("lookup output=" + output + " outs=" + fst.Outputs); Int32sRef input = Util.GetByOutput(fstLong, (output as long?).Value); Assert.IsNotNull(input); //System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString()); Assert.AreEqual(scratch, input); } } // test IntsRefFSTEnum.Seek: if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify seek"); } Int32sRefFSTEnum <T> fstEnum_ = new Int32sRefFSTEnum <T>(fst); num_ = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num_; iter++) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" iter=" + iter); } if (random.NextBoolean()) { // seek to term that doesn't exist: while (true) { Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode); int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T))); if (pos < 0) { pos = -(pos + 1); // ok doesn't exist //System.out.println(" seek " + inputToString(inputMode, term)); Int32sRefFSTEnum.InputOutput <T> seekResult; if (random.Next(3) == 0) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekExact term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekExact(term); pos = -1; } else if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekFloor term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekFloor(term); pos--; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekCeil term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekCeil(term); } if (pos != -1 && pos < pairs.Count) { //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output)); Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, pairs[pos].Input)); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" got " + InputToString(inputMode, seekResult.Input)); } Assert.AreEqual(pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input)); Assert.IsTrue(OutputsEqual(pairs[pos].Output, seekResult.Output)); } else { // seeked before start or beyond end //System.out.println("seek=" + seekTerm); Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input))); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" got null"); } } break; } } } else { // seek to term that does exist: InputOutput <T> pair = pairs[random.Next(pairs.Count)]; Int32sRefFSTEnum.InputOutput <T> seekResult; if (random.Next(3) == 2) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekExact term=" + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekExact(pair.Input); } else if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekFloor " + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekFloor(pair.Input); } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekCeil " + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekCeil(pair.Input); } Assert.IsNotNull(seekResult); Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input)); Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output)); } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: mixed next/seek"); } // test mixed next/seek num_ = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num_; iter++) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: iter " + iter); } // reset: fstEnum_ = new Int32sRefFSTEnum <T>(fst); int upto = -1; while (true) { bool isDone = false; if (upto == pairs.Count - 1 || random.NextBoolean()) { // next upto++; if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do next"); } isDone = fstEnum_.Next() == null; } else if (upto != -1 && upto < 0.75 * pairs.Count && random.NextBoolean()) { int attempt = 0; for (; attempt < 10; attempt++) { Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode); if (!termsMap.ContainsKey(term) && term.CompareTo(pairs[upto].Input) > 0) { int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T))); Debug.Assert(pos < 0); upto = -(pos + 1); if (random.NextBoolean()) { upto--; Assert.IsTrue(upto != -1); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekFloor(" + InputToString(inputMode, term) + ")"); } isDone = fstEnum_.SeekFloor(term) == null; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekCeil(" + InputToString(inputMode, term) + ")"); } isDone = fstEnum_.SeekCeil(term) == null; } break; } } if (attempt == 10) { continue; } } else { int inc = random.Next(pairs.Count - upto - 1); upto += inc; if (upto == -1) { upto = 0; } if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do seekCeil(" + InputToString(inputMode, pairs[upto].Input) + ")"); } isDone = fstEnum_.SeekCeil(pairs[upto].Input) == null; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do seekFloor(" + InputToString(inputMode, pairs[upto].Input) + ")"); } isDone = fstEnum_.SeekFloor(pairs[upto].Input) == null; } } if (LuceneTestCase.VERBOSE) { if (!isDone) { Console.WriteLine(" got " + InputToString(inputMode, fstEnum_.Current.Input)); } else { Console.WriteLine(" got null"); } } if (upto == pairs.Count) { Assert.IsTrue(isDone); break; } else { Assert.IsFalse(isDone); Assert.AreEqual(pairs[upto].Input, fstEnum_.Current.Input); Assert.IsTrue(OutputsEqual(pairs[upto].Output, fstEnum_.Current.Output)); /* * if (upto < pairs.size()-1) { * int tryCount = 0; * while(tryCount < 10) { * final IntsRef t = toIntsRef(getRandomString(), inputMode); * if (pairs.get(upto).input.compareTo(t) < 0) { * final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0; * if (LuceneTestCase.VERBOSE) { * System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected); * } * Assert.AreEqual(expected, fstEnum.beforeNext(t)); * break; * } * tryCount++; * } * } */ } } } }
internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2); } bool willRewrite = random.NextBoolean(); Builder <T> builder = new Builder <T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, prune1, prune2, prune1 == 0 && prune2 == 0, allowRandomSuffixSharing ? random.NextBoolean() : true, allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue, outputs, null, willRewrite, PackedInt32s.DEFAULT, true, 15); if (LuceneTestCase.VERBOSE) { if (willRewrite) { Console.WriteLine("TEST: packed FST"); } else { Console.WriteLine("TEST: non-packed FST"); } } foreach (InputOutput <T> pair in pairs) { if (pair.Output is IEnumerable) { Builder <object> builderObject = builder as Builder <object>; var values = pair.Output as IEnumerable; foreach (object value in values) { builderObject.Add(pair.Input, value); } } else { builder.Add(pair.Input, pair.Output); } } FST <T> fst = builder.Finish(); if (random.NextBoolean() && fst != null && !willRewrite) { IOContext context = LuceneTestCase.NewIOContext(random); using (IndexOutput @out = dir.CreateOutput("fst.bin", context)) { fst.Save(@out); } IndexInput @in = dir.OpenInput("fst.bin", context); try { fst = new FST <T>(@in, outputs); } finally { @in.Dispose(); dir.DeleteFile("fst.bin"); } } if (LuceneTestCase.VERBOSE && pairs.Count <= 20 && fst != null) { using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8)) { Util.ToDot(fst, w, false, false); } Console.WriteLine("SAVED out.dot"); } if (LuceneTestCase.VERBOSE) { if (fst == null) { Console.WriteLine(" fst has 0 nodes (fully pruned)"); } else { Console.WriteLine(" fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs"); } } if (prune1 == 0 && prune2 == 0) { VerifyUnPruned(inputMode, fst); } else { VerifyPruned(inputMode, fst, prune1, prune2); } return(fst); }
public override void Run() { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread"); } while (Environment.TickCount < stopTimeMS) { try { IndexSearcher s = outerInstance.GetCurrentSearcher(); try { // Verify 1) IW is correctly setting // diagnostics, and 2) segment warming for // merged segments is actually happening: foreach (AtomicReaderContext sub in s.IndexReader.Leaves) { SegmentReader segReader = (SegmentReader)sub.Reader; IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics; assertNotNull(diagnostics); string source; diagnostics.TryGetValue("source", out source); assertNotNull(source); if (source.Equals("merge", StringComparison.Ordinal)) { assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo, !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.ContainsKey(segReader.core)); } } if (s.IndexReader.NumDocs > 0) { outerInstance.SmokeTestSearcher(s); Fields fields = MultiFields.GetFields(s.IndexReader); if (fields == null) { continue; } Terms terms = fields.GetTerms("body"); if (terms == null) { continue; } TermsEnum termsEnum = terms.GetIterator(null); int seenTermCount = 0; int shift; int trigger; if (totTermCount.Get() < 30) { shift = 0; trigger = 1; } else { trigger = totTermCount.Get() / 30; shift = Random.Next(trigger); } while (Environment.TickCount < stopTimeMS) { BytesRef term = termsEnum.Next(); if (term == null) { totTermCount.Set(seenTermCount); break; } seenTermCount++; // search 30 terms if ((seenTermCount + shift) % trigger == 0) { //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString()); //} totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", term)))); } } //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + ": search done"); //} } } finally { outerInstance.ReleaseSearcher(s); } } catch (Exception t) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); outerInstance.m_failed.Set(true); Console.WriteLine(t.ToString()); throw new Exception(t.ToString(), t); } } }
// Swap in S, in place of E: private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos) { int savLength = term.Length; Debug.Assert(term.Offset == 0); // The 3 bytes starting at downTo make up 1 // unicode character: Debug.Assert(IsHighBMPChar(term.Bytes, pos)); // NOTE: we cannot make this assert, because // AutomatonQuery legitimately sends us malformed UTF8 // (eg the UTF8 bytes with just 0xee) // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString(); // Save the bytes && length, since we need to // restore this if seek "back" finds no matching // terms if (term.Bytes.Length < 4 + pos) { term.Grow(4 + pos); } scratch[0] = (sbyte)term.Bytes[pos]; scratch[1] = (sbyte)term.Bytes[pos + 1]; scratch[2] = (sbyte)term.Bytes[pos + 2]; term.Bytes[pos] = 0xf0; term.Bytes[pos + 1] = 0x90; term.Bytes[pos + 2] = 0x80; term.Bytes[pos + 3] = 0x80; term.Length = 4 + pos; if (DEBUG_SURROGATES) { Console.WriteLine(" try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } // Seek "back": outerInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true); // Test if the term we seek'd to in fact found a // surrogate pair at the same position as the E: Term t2 = te.Term(); // Cannot be null (or move to next field) because at // "worst" it'd seek to the same term we are on now, // unless we are being called from seek if (t2 == null || t2.Field != internedFieldName) { return(false); } if (DEBUG_SURROGATES) { Console.WriteLine(" got term=" + UnicodeUtil.ToHexString(t2.Text())); } // Now test if prefix is identical and we found // a non-BMP char at the same position: BytesRef b2 = t2.Bytes; Debug.Assert(b2.Offset == 0); bool matches; if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos)) { matches = true; for (int i = 0; i < pos; i++) { if (term.Bytes[i] != b2.Bytes[i]) { matches = false; break; } } } else { matches = false; } // Restore term: term.Length = savLength; term.Bytes[pos] = (byte)scratch[0]; term.Bytes[pos + 1] = (byte)scratch[1]; term.Bytes[pos + 2] = (byte)scratch[2]; return(matches); }
// Look for seek type 3 ("pop"): if the delta from // prev -> current was replacing an S with an E, // we must now seek to beyond that E. this seek // "finishes" the dance at this character // position. private bool DoPop() { if (DEBUG_SURROGATES) { Console.WriteLine(" try pop"); } Debug.Assert(newSuffixStart <= prevTerm.Length); Debug.Assert(newSuffixStart < scratchTerm.Length || newSuffixStart == 0); if (prevTerm.Length > newSuffixStart && IsNonBMPChar(prevTerm.Bytes, newSuffixStart) && IsHighBMPChar(scratchTerm.Bytes, newSuffixStart)) { // Seek type 2 -- put 0xFF at this position: scratchTerm.Bytes[newSuffixStart] = 0xff; scratchTerm.Length = newSuffixStart + 1; if (DEBUG_SURROGATES) { Console.WriteLine(" seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString()); } // TODO: more efficient seek? can we simply swap // the enums? outerInstance.TermsDict.SeekEnum(termEnum, new Term(fieldInfo.Name, scratchTerm), true); Term t2 = termEnum.Term(); // We could hit EOF or different field since this // was a seek "forward": if (t2 != null && t2.Field == internedFieldName) { if (DEBUG_SURROGATES) { Console.WriteLine(" got term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + t2.Bytes); } BytesRef b2 = t2.Bytes; Debug.Assert(b2.Offset == 0); // Set newSuffixStart -- we can't use // termEnum's since the above seek may have // done no scanning (eg, term was precisely // and index term, or, was in the term seek // cache): scratchTerm.CopyBytes(b2); SetNewSuffixStart(prevTerm, scratchTerm); return(true); } else if (newSuffixStart != 0 || scratchTerm.Length != 0) { if (DEBUG_SURROGATES) { Console.WriteLine(" got term=null (or next field)"); } newSuffixStart = 0; scratchTerm.Length = 0; return(true); } } return(false); }
// FST is pruned private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now verify pruned " + pairs.Count + " terms; outputs=" + outputs); foreach (InputOutput <T> pair in pairs) { Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output)); } } // To validate the FST, we brute-force compute all prefixes // in the terms, matched to their "common" outputs, prune that // set according to the prune thresholds, then assert the FST // matches that same set. // NOTE: Crazy RAM intensive!! //System.out.println("TEST: tally prefixes"); // build all prefixes IDictionary <Int32sRef, CountMinOutput <T> > prefixes = new HashMap <Int32sRef, CountMinOutput <T> >(); Int32sRef scratch = new Int32sRef(10); foreach (InputOutput <T> pair in pairs) { scratch.CopyInt32s(pair.Input); for (int idx = 0; idx <= pair.Input.Length; idx++) { scratch.Length = idx; CountMinOutput <T> cmo = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null; if (cmo == null) { cmo = new CountMinOutput <T>(); cmo.Count = 1; cmo.Output = pair.Output; prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo; } else { cmo.Count++; T output1 = cmo.Output; if (output1.Equals(outputs.NoOutput)) { output1 = outputs.NoOutput; } T output2 = pair.Output; if (output2.Equals(outputs.NoOutput)) { output2 = outputs.NoOutput; } cmo.Output = outputs.Common(output1, output2); } if (idx == pair.Input.Length) { cmo.IsFinal = true; cmo.FinalOutput = cmo.Output; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now prune"); } // prune 'em // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since // in .NET you cannot delete records while enumerating forward through a dictionary. for (int i = prefixes.Count - 1; i >= 0; i--) { KeyValuePair <Int32sRef, CountMinOutput <T> > ent = prefixes.ElementAt(i); Int32sRef prefix = ent.Key; CountMinOutput <T> cmo = ent.Value; if (LuceneTestCase.VERBOSE) { Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal); } bool keep; if (prune1 > 0) { keep = cmo.Count >= prune1; } else { Debug.Assert(prune2 > 0); if (prune2 > 1 && cmo.Count >= prune2) { keep = true; } else if (prefix.Length > 0) { // consult our parent scratch.Length = prefix.Length - 1; Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length); CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null; //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1))); } else if (cmo.Count >= prune2) { keep = true; } else { keep = false; } } if (!keep) { prefixes.Remove(prefix); //System.out.println(" remove"); } else { // clear isLeaf for all ancestors //System.out.println(" keep"); scratch.CopyInt32s(prefix); scratch.Length--; while (scratch.Length >= 0) { CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null; if (cmo2 != null) { //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); cmo2.IsLeaf = false; } scratch.Length--; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: after prune"); foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes) { Console.WriteLine(" " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal); if (ent.Value.IsFinal) { Console.WriteLine(" finalOutput=" + outputs.OutputToString(ent.Value.FinalOutput)); } } } if (prefixes.Count <= 1) { Assert.IsNull(fst); return; } Assert.IsNotNull(fst); // make sure FST only enums valid prefixes if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check pruned enum"); } Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst); Int32sRefFSTEnum.InputOutput <T> current; while ((current = fstEnum.Next()) != null) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + outputs.OutputToString(current.Output)); } CountMinOutput <T> cmo = prefixes.ContainsKey(current.Input) ? prefixes[current.Input] : null; Assert.IsNotNull(cmo); Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal); //if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, current.Output); } else { Assert.AreEqual(cmo.Output, current.Output); } } // make sure all non-pruned prefixes are present in the FST if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify all prefixes"); } int[] stopNode = new int[1]; foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes) { if (ent.Key.Length > 0) { CountMinOutput <T> cmo = ent.Value; T output = Run(fst, ent.Key, stopNode); if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + outputs.OutputToString(cmo.Output)); } // if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, output); } else { Assert.AreEqual(cmo.Output, output); } Assert.AreEqual(ent.Key.Length, stopNode[0]); } } }
// Look for seek type 1 ("push"): if the newly added // suffix contains any S, we must try to seek to the // corresponding E. If we find a match, we go there; // else we keep looking for additional S's in the new // suffix. this "starts" the dance, at this character // position: private void DoPushes() { int upTo = newSuffixStart; if (DEBUG_SURROGATES) { Console.WriteLine(" try push newSuffixStart=" + newSuffixStart + " scratchLen=" + scratchTerm.Length); } while (upTo < scratchTerm.Length) { if (IsNonBMPChar(scratchTerm.Bytes, upTo) && (upTo > newSuffixStart || (upTo >= prevTerm.Length || (!IsNonBMPChar(prevTerm.Bytes, upTo) && !IsHighBMPChar(prevTerm.Bytes, upTo))))) { // A non-BMP char (4 bytes UTF8) starts here: Debug.Assert(scratchTerm.Length >= upTo + 4); int savLength = scratchTerm.Length; scratch[0] = (sbyte)scratchTerm.Bytes[upTo]; scratch[1] = (sbyte)scratchTerm.Bytes[upTo + 1]; scratch[2] = (sbyte)scratchTerm.Bytes[upTo + 2]; scratchTerm.Bytes[upTo] = (byte)UTF8_HIGH_BMP_LEAD; scratchTerm.Bytes[upTo + 1] = 0x80; scratchTerm.Bytes[upTo + 2] = 0x80; scratchTerm.Length = upTo + 3; if (DEBUG_SURROGATES) { Console.WriteLine(" try seek 1 pos=" + upTo + " term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString() + " len=" + scratchTerm.Length); } // Seek "forward": // TODO: more efficient seek? outerInstance.TermsDict.SeekEnum(seekTermEnum, new Term(fieldInfo.Name, scratchTerm), true); scratchTerm.Bytes[upTo] = (byte)scratch[0]; scratchTerm.Bytes[upTo + 1] = (byte)scratch[1]; scratchTerm.Bytes[upTo + 2] = (byte)scratch[2]; scratchTerm.Length = savLength; // Did we find a match? Term t2 = seekTermEnum.Term(); if (DEBUG_SURROGATES) { if (t2 == null) { Console.WriteLine(" hit term=null"); } else { Console.WriteLine(" hit term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + (t2 == null ? null : t2.Bytes)); } } // Since this was a seek "forward", we could hit // EOF or a different field: bool matches; if (t2 != null && t2.Field == internedFieldName) { BytesRef b2 = t2.Bytes; Debug.Assert(b2.Offset == 0); if (b2.Length >= upTo + 3 && IsHighBMPChar(b2.Bytes, upTo)) { matches = true; for (int i = 0; i < upTo; i++) { if (scratchTerm.Bytes[i] != b2.Bytes[i]) { matches = false; break; } } } else { matches = false; } } else { matches = false; } if (matches) { if (DEBUG_SURROGATES) { Console.WriteLine(" matches!"); } // OK seek "back" // TODO: more efficient seek? outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), true); scratchTerm.CopyBytes(seekTermEnum.Term().Bytes); // +3 because we don't need to check the char // at upTo: we know it's > BMP upTo += 3; // NOTE: we keep iterating, now, since this // can easily "recurse". Ie, after seeking // forward at a certain char position, we may // find another surrogate in our [new] suffix // and must then do another seek (recurse) } else { upTo++; } } else { upTo++; } } }
public virtual void TestCommitOnCloseForceMerge() { Directory dir = NewDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 17; j++) { AddDocWithIndex(writer, j); } writer.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND)); writer.ForceMerge(1); // Open a reader before closing (commiting) the writer: DirectoryReader reader = DirectoryReader.Open(dir); // Reader should see index as multi-seg at this // point: Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment"); reader.Dispose(); // Abort the writer: writer.Rollback(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.Open(dir); // Reader should still see index as multi-segment Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment"); reader.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: do real full merge"); } writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND)); writer.ForceMerge(1); writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: writer closed"); } TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.Open(dir); // Reader should see index as one segment Assert.AreEqual(1, reader.Leaves.Count, "Reader incorrectly sees more than one segment"); reader.Dispose(); dir.Dispose(); }
public virtual void TestIntersectRandom() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int numTerms = AtLeast(300); //final int numTerms = 50; HashSet <string> terms = new HashSet <string>(); ICollection <string> pendingTerms = new List <string>(); IDictionary <BytesRef, int?> termToID = new Dictionary <BytesRef, int?>(); int id = 0; while (terms.Count != numTerms) { string s = RandomString; if (!terms.Contains(s)) { terms.Add(s); pendingTerms.Add(s); if (Random.Next(20) == 7) { AddDoc(w, pendingTerms, termToID, id++); } } } AddDoc(w, pendingTerms, termToID, id++); BytesRef[] termsArray = new BytesRef[terms.Count]; HashSet <BytesRef> termsSet = new HashSet <BytesRef>(); { int upto = 0; foreach (string s in terms) { BytesRef b = new BytesRef(s); termsArray[upto++] = b; termsSet.Add(b); } Array.Sort(termsArray); } if (VERBOSE) { Console.WriteLine("\nTEST: indexed terms (unicode order):"); foreach (BytesRef t in termsArray) { Console.WriteLine(" " + t.Utf8ToString() + " -> id:" + termToID[t]); } } IndexReader r = w.GetReader(); w.Dispose(); // NOTE: intentional insanity!! FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false); for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++) { // TODO: can we also test infinite As here...? // From the random terms, pick some ratio and compile an // automaton: HashSet <string> acceptTerms = new HashSet <string>(); SortedSet <BytesRef> sortedAcceptTerms = new SortedSet <BytesRef>(); double keepPct = Random.NextDouble(); Automaton a; if (iter == 0) { if (VERBOSE) { Console.WriteLine("\nTEST: empty automaton"); } a = BasicAutomata.MakeEmpty(); } else { if (VERBOSE) { Console.WriteLine("\nTEST: keepPct=" + keepPct); } foreach (string s in terms) { string s2; if (Random.NextDouble() <= keepPct) { s2 = s; } else { s2 = RandomString; } acceptTerms.Add(s2); sortedAcceptTerms.Add(new BytesRef(s2)); } a = BasicAutomata.MakeStringUnion(sortedAcceptTerms); } if (Random.NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: reduce the automaton"); } a.Reduce(); } CompiledAutomaton c = new CompiledAutomaton(a, true, false); BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.Count]; HashSet <BytesRef> acceptTermsSet = new HashSet <BytesRef>(); int upto = 0; foreach (string s in acceptTerms) { BytesRef b = new BytesRef(s); acceptTermsArray[upto++] = b; acceptTermsSet.Add(b); Assert.IsTrue(Accepts(c, b)); } Array.Sort(acceptTermsArray); if (VERBOSE) { Console.WriteLine("\nTEST: accept terms (unicode order):"); foreach (BytesRef t in acceptTermsArray) { Console.WriteLine(" " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : "")); } Console.WriteLine(a.ToDot()); } for (int iter2 = 0; iter2 < 100; iter2++) { BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString())); if (startTerm != null) { int state = c.RunAutomaton.InitialState; for (int idx = 0; idx < startTerm.Length; idx++) { int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff; Console.WriteLine(" state=" + state + " label=" + label); state = c.RunAutomaton.Step(state, label); Assert.IsTrue(state != -1); } Console.WriteLine(" state=" + state); } } TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm); int loc; if (startTerm == null) { loc = 0; } else { loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm)); if (loc < 0) { loc = -(loc + 1); } else { // startTerm exists in index loc++; } } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc])) { loc++; } DocsEnum docsEnum = null; while (loc < termsArray.Length) { BytesRef expected = termsArray[loc]; BytesRef actual = te.Next(); if (VERBOSE) { Console.WriteLine("TEST: next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString())); } Assert.AreEqual(expected, actual); Assert.AreEqual(1, te.DocFreq); docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE); int docID = docsEnum.NextDoc(); Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]); do { loc++; } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc])); } Assert.IsNull(te.Next()); } } r.Dispose(); dir.Dispose(); }
public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField) { if (VERBOSE) { Console.WriteLine("\nr1 docs:"); PrintDocs(r1); Console.WriteLine("\nr2 docs:"); PrintDocs(r2); } if (r1.NumDocs != r2.NumDocs) { Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs); } bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc); int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping // create mapping from id2 space to id2 based on idField Fields f1 = MultiFields.GetFields(r1); if (f1 == null) { // make sure r2 is empty Assert.IsNull(MultiFields.GetFields(r2)); return; } Terms terms1 = f1.GetTerms(idField); if (terms1 == null) { Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null); return; } TermsEnum termsEnum = terms1.GetIterator(null); IBits liveDocs1 = MultiFields.GetLiveDocs(r1); IBits liveDocs2 = MultiFields.GetLiveDocs(r2); Fields fields = MultiFields.GetFields(r2); if (fields == null) { // make sure r1 is in fact empty (eg has only all // deleted docs): IBits liveDocs = MultiFields.GetLiveDocs(r1); DocsEnum docs = null; while (termsEnum.Next() != null) { docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE); while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.Fail("r1 is not empty but r2 is"); } } return; } Terms terms2 = fields.GetTerms(idField); TermsEnum termsEnum2 = terms2.GetIterator(null); DocsEnum termDocs1 = null; DocsEnum termDocs2 = null; while (true) { BytesRef term = termsEnum.Next(); //System.out.println("TEST: match id term=" + term); if (term == null) { break; } termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE); if (termsEnum2.SeekExact(term)) { termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE); } else { termDocs2 = null; } if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { // this doc is deleted and wasn't replaced Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS); continue; } int id1 = termDocs1.DocID; Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc()); Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int id2 = termDocs2.DocID; Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (Exception /*t*/) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); Console.WriteLine(" d1=" + r1.Document(id1)); Console.WriteLine(" d2=" + r2.Document(id2)); throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2)); } catch (Exception /*e*/) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); Fields tv1 = r1.GetTermVectors(id1); Console.WriteLine(" d1=" + tv1); if (tv1 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv1) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv1.GetTerms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.GetIterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq; Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq; Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); } } } } Fields tv2 = r2.GetTermVectors(id2); Console.WriteLine(" d2=" + tv2); if (tv2 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv2) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv2.GetTerms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.GetIterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq; Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq; Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); } } } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } //System.out.println("TEST: done match id"); // Verify postings //System.out.println("TEST: create te1"); Fields fields1 = MultiFields.GetFields(r1); IEnumerator <string> fields1Enum = fields1.GetEnumerator(); Fields fields2 = MultiFields.GetFields(r2); IEnumerator <string> fields2Enum = fields2.GetEnumerator(); string field1 = null, field2 = null; TermsEnum termsEnum1 = null; termsEnum2 = null; DocsEnum docs1 = null, docs2 = null; // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs]; long[] info2 = new long[r2.NumDocs]; for (; ;) { BytesRef term1 = null, term2 = null; // iterate until we get some docs int len1; for (; ;) { len1 = 0; if (termsEnum1 == null) { if (!fields1Enum.MoveNext()) { break; } field1 = fields1Enum.Current; Terms terms = fields1.GetTerms(field1); if (terms == null) { continue; } termsEnum1 = terms.GetIterator(null); } term1 = termsEnum1.Next(); if (term1 == null) { // no more terms in this field termsEnum1 = null; continue; } //System.out.println("TEST: term1=" + term1); docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS); while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = docs1.DocID; int f = docs1.Freq; info1[len1] = (((long)d) << 32) | (uint)f; len1++; } if (len1 > 0) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; if (termsEnum2 == null) { if (!fields2Enum.MoveNext()) { break; } field2 = fields2Enum.Current; Terms terms = fields2.GetTerms(field2); if (terms == null) { continue; } termsEnum2 = terms.GetIterator(null); } term2 = termsEnum2.Next(); if (term2 == null) { // no more terms in this field termsEnum2 = null; continue; } //System.out.println("TEST: term1=" + term1); docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS); while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = r2r1[docs2.DocID]; int f = docs2.Freq; info2[len2] = (((long)d) << 32) | (uint)f; len2++; } if (len2 > 0) { break; } } Assert.AreEqual(len1, len2); if (len1 == 0) // no more terms { break; } Assert.AreEqual(field1, field2); Assert.IsTrue(term1.BytesEquals(term2)); if (!hasDeletes) { Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq); } Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes); // sort info2 to get it into ascending docid Array.Sort(info2, 0, len2); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString()); } } }
private void TestRandomSeeks(IndexReader r, params string[] validTermStrings) { BytesRef[] validTerms = new BytesRef[validTermStrings.Length]; for (int termIDX = 0; termIDX < validTermStrings.Length; termIDX++) { validTerms[termIDX] = new BytesRef(validTermStrings[termIDX]); } Array.Sort(validTerms); if (VERBOSE) { Console.WriteLine("TEST: " + validTerms.Length + " terms:"); foreach (BytesRef t in validTerms) { Console.WriteLine(" " + t.Utf8ToString() + " " + t); } } TermsEnum te = MultiFields.GetTerms(r, FIELD).GetIterator(null); int END_LOC = -validTerms.Length - 1; IList <TermAndState> termStates = new List <TermAndState>(); for (int iter = 0; iter < 100 * RANDOM_MULTIPLIER; iter++) { BytesRef t; int loc; TermState termState; if (Random.Next(6) == 4) { // pick term that doens't exist: t = GetNonExistTerm(validTerms); termState = null; if (VERBOSE) { Console.WriteLine("\nTEST: invalid term=" + t.Utf8ToString()); } loc = Array.BinarySearch(validTerms, t); } else if (termStates.Count != 0 && Random.Next(4) == 1) { TermAndState ts = termStates[Random.Next(termStates.Count)]; t = ts.Term; loc = Array.BinarySearch(validTerms, t); Assert.IsTrue(loc >= 0); termState = ts.State; if (VERBOSE) { Console.WriteLine("\nTEST: valid termState term=" + t.Utf8ToString()); } } else { // pick valid term loc = Random.Next(validTerms.Length); t = BytesRef.DeepCopyOf(validTerms[loc]); termState = null; if (VERBOSE) { Console.WriteLine("\nTEST: valid term=" + t.Utf8ToString()); } } // seekCeil or seekExact: bool doSeekExact = Random.NextBoolean(); if (termState != null) { if (VERBOSE) { Console.WriteLine(" seekExact termState"); } te.SeekExact(t, termState); } else if (doSeekExact) { if (VERBOSE) { Console.WriteLine(" seekExact"); } Assert.AreEqual(loc >= 0, te.SeekExact(t)); } else { if (VERBOSE) { Console.WriteLine(" seekCeil"); } TermsEnum.SeekStatus result = te.SeekCeil(t); if (VERBOSE) { Console.WriteLine(" got " + result); } if (loc >= 0) { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, result); } else if (loc == END_LOC) { Assert.AreEqual(TermsEnum.SeekStatus.END, result); } else { Debug.Assert(loc >= -validTerms.Length); Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, result); } } if (loc >= 0) { Assert.AreEqual(t, te.Term); } else if (doSeekExact) { // TermsEnum is unpositioned if seekExact returns false continue; } else if (loc == END_LOC) { continue; } else { loc = -loc - 1; Assert.AreEqual(validTerms[loc], te.Term); } // Do a bunch of next's after the seek int numNext = Random.Next(validTerms.Length); for (int nextCount = 0; nextCount < numNext; nextCount++) { if (VERBOSE) { Console.WriteLine("\nTEST: next loc=" + loc + " of " + validTerms.Length); } BytesRef t2 = te.Next(); loc++; if (loc == validTerms.Length) { Assert.IsNull(t2); break; } else { Assert.AreEqual(validTerms[loc], t2); if (Random.Next(40) == 17 && termStates.Count < 100) { termStates.Add(new TermAndState(validTerms[loc], te.GetTermState())); } } } } }
public virtual void IndexDoc() { Document d = new Document(); FieldType customType1 = new FieldType(TextField.TYPE_STORED); customType1.IsTokenized = false; customType1.OmitNorms = true; List <Field> fields = new List <Field>(); string idString = IdString; Field idField = NewField("id", idString, customType1); fields.Add(idField); int nFields = NextInt(MaxFields); for (int i = 0; i < nFields; i++) { FieldType customType = new FieldType(); switch (NextInt(4)) { case 0: break; case 1: customType.StoreTermVectors = true; break; case 2: customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; break; case 3: customType.StoreTermVectors = true; customType.StoreTermVectorOffsets = true; break; } switch (NextInt(4)) { case 0: customType.IsStored = true; customType.OmitNorms = true; customType.IsIndexed = true; fields.Add(NewField("f" + NextInt(100), GetString(1), customType)); break; case 1: customType.IsIndexed = true; customType.IsTokenized = true; fields.Add(NewField("f" + NextInt(100), GetString(0), customType)); break; case 2: customType.IsStored = true; customType.StoreTermVectors = false; customType.StoreTermVectorOffsets = false; customType.StoreTermVectorPositions = false; fields.Add(NewField("f" + NextInt(100), GetString(0), customType)); break; case 3: customType.IsStored = true; customType.IsIndexed = true; customType.IsTokenized = true; fields.Add(NewField("f" + NextInt(100), GetString(BigFieldSize), customType)); break; } } if (SameFieldOrder) { fields.Sort(fieldNameComparer); } else { // random placement of id field also fields.Swap(NextInt(fields.Count), 0); } for (int i = 0; i < fields.Count; i++) { d.Add(fields[i]); } if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": indexing id:" + idString); } w.UpdateDocument(new Term("id", idString), d); //System.out.println(Thread.currentThread().getName() + ": indexing "+d); Docs[idString] = d; }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random.NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.SetInfoStream(new TextWriterInfoStream(Console.Out)); } var ms = iwc.MergeScheduler; if (ms is IConcurrentMergeScheduler) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random.Next(5) == 3) { if (Random.NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs); lastNumDocs = r2.NumDocs; //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random.NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); HashSet <string> files = new HashSet <string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
private PreviousSearchState AssertSame(IndexSearcher mockSearcher, NodeState.ShardIndexSearcher shardSearcher, Query q, Sort sort, PreviousSearchState state) { int numHits = TestUtil.NextInt(Random(), 1, 100); if (state != null && state.SearchAfterLocal == null) { // In addition to what we last searched: numHits += state.NumHitsPaged; } if (VERBOSE) { Console.WriteLine("TEST: query=" + q + " sort=" + sort + " numHits=" + numHits); if (state != null) { Console.WriteLine(" prev: searchAfterLocal=" + state.SearchAfterLocal + " searchAfterShard=" + state.SearchAfterShard + " numHitsPaged=" + state.NumHitsPaged); } } // Single (mock local) searcher: TopDocs hits; if (sort == null) { if (state != null && state.SearchAfterLocal != null) { hits = mockSearcher.SearchAfter(state.SearchAfterLocal, q, numHits); } else { hits = mockSearcher.Search(q, numHits); } } else { hits = mockSearcher.Search(q, numHits, sort); } // Shard searcher TopDocs shardHits; if (sort == null) { if (state != null && state.SearchAfterShard != null) { shardHits = shardSearcher.SearchAfter(state.SearchAfterShard, q, numHits); } else { shardHits = shardSearcher.Search(q, numHits); } } else { shardHits = shardSearcher.Search(q, numHits, sort); } int numNodes = shardSearcher.NodeVersions.Length; int[] @base = new int[numNodes]; IList <IndexReaderContext> subs = mockSearcher.TopReaderContext.Children; Assert.AreEqual(numNodes, subs.Count); for (int nodeID = 0; nodeID < numNodes; nodeID++) { @base[nodeID] = subs[nodeID].DocBaseInParent; } if (VERBOSE) { /* * for(int shardID=0;shardID<shardSearchers.Length;shardID++) { * System.out.println(" shard=" + shardID + " maxDoc=" + shardSearchers[shardID].searcher.getIndexReader().MaxDoc); * } */ Console.WriteLine(" single searcher: " + hits.TotalHits + " totalHits maxScore=" + hits.MaxScore); for (int i = 0; i < hits.ScoreDocs.Length; i++) { ScoreDoc sd = hits.ScoreDocs[i]; Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } Console.WriteLine(" shard searcher: " + shardHits.TotalHits + " totalHits maxScore=" + shardHits.MaxScore); for (int i = 0; i < shardHits.ScoreDocs.Length; i++) { ScoreDoc sd = shardHits.ScoreDocs[i]; Console.WriteLine(" doc=" + sd.Doc + " (rebased: " + (sd.Doc + @base[sd.ShardIndex]) + ") score=" + sd.Score + " shard=" + sd.ShardIndex); } } int numHitsPaged; if (state != null && state.SearchAfterLocal != null) { numHitsPaged = hits.ScoreDocs.Length; if (state != null) { numHitsPaged += state.NumHitsPaged; } } else { numHitsPaged = hits.ScoreDocs.Length; } bool moreHits; ScoreDoc bottomHit; ScoreDoc bottomHitShards; if (numHitsPaged < hits.TotalHits) { // More hits to page through moreHits = true; if (sort == null) { bottomHit = hits.ScoreDocs[hits.ScoreDocs.Length - 1]; ScoreDoc sd = shardHits.ScoreDocs[shardHits.ScoreDocs.Length - 1]; // Must copy because below we rebase: bottomHitShards = new ScoreDoc(sd.Doc, sd.Score, sd.ShardIndex); if (VERBOSE) { Console.WriteLine(" save bottomHit=" + bottomHit); } } else { bottomHit = null; bottomHitShards = null; } } else { Assert.AreEqual(hits.TotalHits, numHitsPaged); bottomHit = null; bottomHitShards = null; moreHits = false; } // Must rebase so Assert.AreEqual passes: for (int hitID = 0; hitID < shardHits.ScoreDocs.Length; hitID++) { ScoreDoc sd = shardHits.ScoreDocs[hitID]; sd.Doc += @base[sd.ShardIndex]; } TestUtil.AssertEquals(hits, shardHits); if (moreHits) { // Return a continuation: return(new PreviousSearchState(q, sort, bottomHit, bottomHitShards, shardSearcher.NodeVersions, numHitsPaged)); } else { return(null); } }
// Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 2; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } MockDirectoryWrapper dir = NewMockDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(2) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(4)); IndexWriter writer = new IndexWriter(dir, config); var scheduler = config.mergeScheduler as IConcurrentMergeScheduler; if (scheduler != null) { scheduler.SetSuppressExceptions(); } IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, true, NewField); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } Thread.Sleep(10); dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Dispose(false); success = true; } catch (IOException) { failure.ClearDoFail(); writer.Dispose(false); } if (VERBOSE) { Console.WriteLine("TEST: success=" + success); } if (success) { IndexReader reader = DirectoryReader.Open(dir); IBits delDocs = MultiFields.GetLiveDocs(reader); for (int j = 0; j < reader.MaxDoc; j++) { if (delDocs == null || !delDocs.Get(j)) { reader.Document(j); reader.GetTermVectors(j); } } reader.Dispose(); } dir.Dispose(); } }
public virtual void TestSimple() { int numNodes = TestUtil.NextInt(Random(), 1, 10); double runTimeSec = AtLeast(3); int minDocsToMakeTerms = TestUtil.NextInt(Random(), 5, 20); int maxSearcherAgeSeconds = TestUtil.NextInt(Random(), 1, 3); if (VERBOSE) { Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds); } Start(numNodes, runTimeSec, maxSearcherAgeSeconds); List <PreviousSearchState> priorSearches = new List <PreviousSearchState>(); List <BytesRef> terms = null; while (Time.NanoTime() < endTimeNanos) { bool doFollowon = priorSearches.Count > 0 && Random().Next(7) == 1; // Pick a random node; we will run the query on this node: int myNodeID = Random().Next(numNodes); NodeState.ShardIndexSearcher localShardSearcher; PreviousSearchState prevSearchState; if (doFollowon) { // Pretend user issued a followon query: prevSearchState = priorSearches[Random().Next(priorSearches.Count)]; if (VERBOSE) { Console.WriteLine("\nTEST: follow-on query age=" + ((Time.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0)); } try { localShardSearcher = Nodes[myNodeID].Acquire(prevSearchState.Versions); } catch (SearcherExpiredException see) { // Expected, sometimes; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (VERBOSE) { Console.WriteLine(" searcher expired during local shard searcher init: " + see); } priorSearches.Remove(prevSearchState); continue; } } else { if (VERBOSE) { Console.WriteLine("\nTEST: fresh query"); } // Do fresh query: localShardSearcher = Nodes[myNodeID].Acquire(); prevSearchState = null; } IndexReader[] subs = new IndexReader[numNodes]; PreviousSearchState searchState = null; try { // Mock: now make a single reader (MultiReader) from all node // searchers. In a real shard env you can't do this... we // do it to confirm results from the shard searcher // are correct: int docCount = 0; try { for (int nodeID = 0; nodeID < numNodes; nodeID++) { long subVersion = localShardSearcher.NodeVersions[nodeID]; IndexSearcher sub = Nodes[nodeID].Searchers.Acquire(subVersion); if (sub == null) { nodeID--; while (nodeID >= 0) { subs[nodeID].DecRef(); subs[nodeID] = null; nodeID--; } throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion); } subs[nodeID] = sub.IndexReader; docCount += subs[nodeID].MaxDoc; } } catch (SearcherExpiredException see) { // Expected if (VERBOSE) { Console.WriteLine(" searcher expired during mock reader init: " + see); } continue; } IndexReader mockReader = new MultiReader(subs); IndexSearcher mockSearcher = new IndexSearcher(mockReader); Query query; Sort sort; if (prevSearchState != null) { query = prevSearchState.Query; sort = prevSearchState.Sort; } else { if (terms == null && docCount > minDocsToMakeTerms) { // TODO: try to "focus" on high freq terms sometimes too // TODO: maybe also periodically reset the terms...? TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").GetIterator(null); terms = new List <BytesRef>(); while (termsEnum.Next() != null) { terms.Add(BytesRef.DeepCopyOf(termsEnum.Term)); } if (VERBOSE) { Console.WriteLine("TEST: init terms: " + terms.Count + " terms"); } if (terms.Count == 0) { terms = null; } } if (VERBOSE) { Console.WriteLine(" maxDoc=" + mockReader.MaxDoc); } if (terms != null) { if (Random().NextBoolean()) { query = new TermQuery(new Term("body", terms[Random().Next(terms.Count)])); } else { string t = terms[Random().Next(terms.Count)].Utf8ToString(); string prefix; if (t.Length <= 1) { prefix = t; } else { prefix = t.Substring(0, TestUtil.NextInt(Random(), 1, 2)); } query = new PrefixQuery(new Term("body", prefix)); } if (Random().NextBoolean()) { sort = null; } else { // TODO: sort by more than 1 field int what = Random().Next(3); if (what == 0) { sort = new Sort(SortField.FIELD_SCORE); } else if (what == 1) { // TODO: this sort doesn't merge // correctly... it's tricky because you // could have > 2.1B docs across all shards: //sort = new Sort(SortField.FIELD_DOC); sort = null; } else if (what == 2) { sort = new Sort(new SortField[] { new SortField("docid", SortFieldType.INT32, Random().NextBoolean()) }); } else { sort = new Sort(new SortField[] { new SortField("title", SortFieldType.STRING, Random().NextBoolean()) }); } } } else { query = null; sort = null; } } if (query != null) { try { searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState); } catch (SearcherExpiredException see) { // Expected; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (VERBOSE) { Console.WriteLine(" searcher expired during search: " + see); Console.Out.Write(see.StackTrace); } // We can't do this in general: on a very slow // computer it's possible the local searcher // expires before we can finish our search: // assert prevSearchState != null; if (prevSearchState != null) { priorSearches.Remove(prevSearchState); } } } } finally { Nodes[myNodeID].Release(localShardSearcher); foreach (IndexReader sub in subs) { if (sub != null) { sub.DecRef(); } } } if (searchState != null && searchState.SearchAfterLocal != null && Random().Next(5) == 3) { priorSearches.Add(searchState); if (priorSearches.Count > 200) { Collections.Shuffle(priorSearches); priorSearches.SubList(100, priorSearches.Count).Clear(); } } } Finish(); }
public override void Run() { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; doc.Add(NewField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); doc.Add(new NumericDocValuesField("dv", 5)); int idUpto = 0; int fullCount = 0; long stopTime = Environment.TickCount + TimeToRunInMilliseconds; // LUCENENET specific: added the ability to change how much time to alot do { try { Writer.UpdateDocument(new Term("id", "" + (idUpto++)), doc); AddCount++; } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: expected exc:"); Console.WriteLine(ioe.StackTrace); } //System.out.println(Thread.currentThread().getName() + ": hit exc"); //ioConsole.WriteLine(e.StackTrace); if (ioe.Message.StartsWith("fake disk full at", StringComparison.Ordinal) || ioe.Message.Equals("now failing on purpose", StringComparison.Ordinal)) { DiskFull = true; //#if !NETSTANDARD1_6 // try // { //#endif Thread.Sleep(1); //#if !NETSTANDARD1_6 // } // catch (ThreadInterruptedException ie) // LUCENENET NOTE: Senseless to catch and rethrow the same exception type // { // throw new ThreadInterruptedException(ie.toString(), ie); // } //#endif if (fullCount++ >= 5) { break; } } else { if (NoErrors) { Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected IOException:"); Console.WriteLine(ioe.StackTrace); Error = ioe; } break; } } catch (Exception t) { //Console.WriteLine(t.StackTrace); if (NoErrors) { Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected Throwable:"); Console.WriteLine(t.StackTrace); Error = t; } break; } } while (Environment.TickCount < stopTime); }
public override BytesRef Next() { if (DEBUG_SURROGATES) { Console.WriteLine("TE.next()"); } if (skipNext) { if (DEBUG_SURROGATES) { Console.WriteLine(" skipNext=true"); } skipNext = false; if (termEnum.Term() == null) { return(null); // PreFlex codec interns field names: } else if (termEnum.Term().Field != internedFieldName) { return(null); } else { return(current = termEnum.Term().Bytes); } } // TODO: can we use STE's prevBuffer here? prevTerm.CopyBytes(termEnum.Term().Bytes); if (termEnum.Next() && termEnum.Term().Field == internedFieldName) { newSuffixStart = termEnum.newSuffixStart; if (DEBUG_SURROGATES) { Console.WriteLine(" newSuffixStart=" + newSuffixStart); } SurrogateDance(); Term t = termEnum.Term(); if (t == null || t.Field != internedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal)); current = null; } else { current = t.Bytes; } return(current); } else { // this field is exhausted, but we have to give // surrogateDance a chance to seek back: if (DEBUG_SURROGATES) { Console.WriteLine(" force cont"); } //newSuffixStart = prevTerm.length; newSuffixStart = 0; SurrogateDance(); Term t = termEnum.Term(); if (t == null || t.Field != internedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal)); return(null); } else { current = t.Bytes; return(current); } } }