public SimpleTextTermsEnum(SimpleTextFieldsReader outerInstance, FST <PairOutputs.Pair <long?, PairOutputs.Pair <long?, long?> > > fst, FieldInfo.IndexOptions indexOptions) { this.outerInstance = outerInstance; this.indexOptions = indexOptions; fstEnum = new BytesRefFSTEnum <>(fst); }
internal virtual SepDocsEnum Init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) { _liveDocs = liveDocs; if (fieldInfo.FieldIndexOptions.HasValue) { _indexOptions = fieldInfo.FieldIndexOptions.Value; } _omitTf = _indexOptions == FieldInfo.IndexOptions.DOCS_ONLY; _storePayloads = fieldInfo.HasPayloads(); // TODO: can't we only do this if consumer // skipped consuming the previous docs? _docIndex.CopyFrom(termState.DOC_INDEX); _docIndex.Seek(_docReader); if (!_omitTf) { _freqIndex.CopyFrom(termState.FREQ_INDEX); _freqIndex.Seek(_freqReader); } _docFreq = termState.DocFreq; // NOTE: unused if docFreq < skipMinimum: _skipFp = termState.SKIP_FP; _count = 0; _doc = -1; _accum = 0; _freq = 1; _skipped = false; return(this); }
public virtual SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, FieldInfo.IndexOptions indexOptions, int docFreq) { this.liveDocs = liveDocs; nextDocStart = fp; docID_Renamed = -1; readPositions = indexOptions >= IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; readOffsets = indexOptions >= IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (!readOffsets) { startOffset_Renamed = -1; endOffset_Renamed = -1; } cost_Renamed = docFreq; return(this); }
// Currently, this instance is re-used across fields, so // our parent calls setField whenever the field changes public override int SetField(FieldInfo fi) { FIELD_INFO = fi; if (FIELD_INFO.FieldIndexOptions.HasValue) { INDEX_OPTIONS = FIELD_INFO.FieldIndexOptions.Value; } if (INDEX_OPTIONS >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { throw new System.NotSupportedException("this codec cannot index offsets"); } SKIP_LIST_WRITER.IndexOptions = INDEX_OPTIONS; STORE_PAYLOADS = INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && FIELD_INFO.HasPayloads(); LAST_PAYLOAD_FP = 0; LAST_SKIP_FP = 0; _lastState = SetEmptyState(); return(0); }
public SeedPostings(long seed, int minDocFreq, int maxDocFreq, Bits liveDocs, FieldInfo.IndexOptions options) { Random = new Random((int)seed); DocRandom = new Random(Random.Next()); DocFreq = TestUtil.NextInt(Random, minDocFreq, maxDocFreq); this.LiveDocs = liveDocs; // TODO: more realistic to inversely tie this to numDocs: MaxDocSpacing = TestUtil.NextInt(Random, 1, 100); if (Random.Next(10) == 7) { // 10% of the time create big payloads: PayloadSize = 1 + Random.Next(3); } else { PayloadSize = 1 + Random.Next(1); } FixedPayloads = Random.NextBoolean(); var payloadBytes = new byte[PayloadSize]; Payload_Renamed = new BytesRef(payloadBytes); this.Options = options; DoPositions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.CompareTo(options) <= 0; }
public TestThread(BasePostingsFormatTestCase testCase, Fields fieldsSource, ISet<Option> options, FieldInfo.IndexOptions maxTestOptions, FieldInfo.IndexOptions maxIndexOptions, bool alwaysTestMax) { this.FieldsSource = fieldsSource; this.Options = options; this.MaxTestOptions = maxTestOptions; this.MaxIndexOptions = maxIndexOptions; this.AlwaysTestMax = alwaysTestMax; this.TestCase = testCase; }
public virtual void DoTestLongPostingsNoPositions(FieldInfo.IndexOptions options) { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong())); int NUM_DOCS = AtLeast(2000); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (VERBOSE) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.Length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.Length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random().NextBoolean()) { isS1.Set(idx); } } IndexReader r; if (true) { IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = options; for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewField("field", s, ft); int count = TestUtil.NextInt(Random(), 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.Reader; riw.Dispose(); } else { r = DirectoryReader.Open(dir); } /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.Length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random().NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term); } DocsEnum docs; DocsEnum postings; if (options == FieldInfo.IndexOptions.DOCS_ONLY) { docs = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_NONE); postings = null; } else { docs = postings = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_FREQS); Debug.Assert(postings != null); } Debug.Assert(docs != null); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random().Next(3); if (what == 0) { if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.NextDoc(); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random().Next(6) == 3 && postings != null) { int freq = postings.Freq(); Assert.IsTrue(freq >= 1 && freq <= 4); } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random().Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID); } if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.Advance(targetDocID); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random().Next(6) == 3 && postings != null) { int freq = postings.Freq(); Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq); } } } } r.Dispose(); dir.Dispose(); }
// Currently, this instance is re-used across fields, so // our parent calls setField whenever the field changes public override int SetField(FieldInfo fi) { FIELD_INFO = fi; if (FIELD_INFO.FieldIndexOptions.HasValue) INDEX_OPTIONS = FIELD_INFO.FieldIndexOptions.Value; if (INDEX_OPTIONS >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { throw new System.NotSupportedException("this codec cannot index offsets"); } SKIP_LIST_WRITER.IndexOptions = INDEX_OPTIONS; STORE_PAYLOADS = INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && FIELD_INFO.HasPayloads(); LAST_PAYLOAD_FP = 0; LAST_SKIP_FP = 0; _lastState = SetEmptyState(); return 0; }
internal virtual SepDocsEnum Init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) { _liveDocs = liveDocs; if (fieldInfo.FieldIndexOptions.HasValue) _indexOptions = fieldInfo.FieldIndexOptions.Value; _omitTf = _indexOptions == FieldInfo.IndexOptions.DOCS_ONLY; _storePayloads = fieldInfo.HasPayloads(); // TODO: can't we only do this if consumer // skipped consuming the previous docs? _docIndex.CopyFrom(termState.DOC_INDEX); _docIndex.Seek(_docReader); if (!_omitTf) { _freqIndex.CopyFrom(termState.FREQ_INDEX); _freqIndex.Seek(_freqReader); } _docFreq = termState.DocFreq; // NOTE: unused if docFreq < skipMinimum: _skipFp = termState.SKIP_FP; _count = 0; _doc = -1; _accum = 0; _freq = 1; _skipped = false; return this; }