public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) { this.docState = threadState.docState; this.docFieldProcessor = docFieldProcessor; this.fieldInfos = docFieldProcessor.fieldInfos; this.consumer = docFieldProcessor.consumer.addThread(this); }
internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; doClose = false; }
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize) { bool success = false; try { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); size = origEnum.size; totalIndexInterval = origEnum.indexInterval; indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); size = input.ReadLong(); // read the size if (format == - 1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
public void Read(IndexInput input, FieldInfos fieldInfos) { this.term = null; // invalidate cache int start = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = start + length; if (preUTF8Strings) { text.SetLength(totalLength); input.ReadChars(text.result, start, length); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.FieldName(input.ReadVInt()); }
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.CreateOutput(segment + ".fdt"); indexStream = d.CreateOutput(segment + ".fdx"); doClose = true; }
/// <summary> /// Create a {@code SegmentReadState}. </summary> public SegmentReadState(Directory dir, SegmentInfo info, FieldInfos fieldInfos, IOContext context, int termsIndexDivisor, string segmentSuffix) { this.Directory = dir; this.SegmentInfo = info; this.FieldInfos = fieldInfos; this.Context = context; this.TermsIndexDivisor = termsIndexDivisor; this.SegmentSuffix = segmentSuffix; }
public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) { InitBlock(); this.docState = threadState.docState; this.docFieldProcessor = docFieldProcessor; this.fieldInfos = docFieldProcessor.fieldInfos; this.consumer = docFieldProcessor.consumer.AddThread(this); fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState); }
/*internal*/ public FieldsReader(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.OpenInput(segment + ".fdt"); indexStream = d.OpenInput(segment + ".fdx"); size = (int) (indexStream.Length() / 8); }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { // Load the index into memory string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION); indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM) { indexStream.ReadVLong(); // the end of the data file CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION); vectorsStream = d.OpenInput(vectorsStreamFN, context); string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT; int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (version != version2) { throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.FilePointer); packedIntsVersion = vectorsStream.ReadVInt(); chunkSize = vectorsStream.ReadVInt(); decompressor = compressionMode.NewDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
public void Read(IndexInput input, FieldInfos fieldInfos) { this.term = null; // invalidate cache int start = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = start + length; SetTextLength(totalLength); input.ReadChars(this.text, start, length); this.field = fieldInfos.FieldName(input.ReadVInt()); }
public void AddDocument(System.String segment, Document doc) { // write field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* for (int i = 0; i < postings.length; i++) { Posting posting = postings[i]; System.out.print(posting.term); System.out.print(" freq=" + posting.freq); System.out.print(" pos="); System.out.print(posting.positions[0]); for (int j = 1; j < posting.freq; j++) System.out.print("," + posting.positions[j]); System.out.println(""); } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(segment); }
public /*internal*/ TermInfosReader(Directory dir, System.String seg, FieldInfos fis) { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false); size = origEnum.size; indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true); }
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(segment + (isIndex ? ".tii" : ".tis")); output.WriteInt(FORMAT); // write format output.WriteLong(0); // leave space for size output.WriteInt(indexInterval); // write indexInterval output.WriteInt(skipInterval); // write skipInterval }
public override void SetUp() { base.SetUp(); fieldInfos = new FieldInfos(); DocHelper.SetupDoc(testDoc); fieldInfos.Add(testDoc); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); segmentName = writer.NewestSegment().name; }
public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); this.fieldInfos = fieldInfos; }
/// <summary> Returns a deep clone of this FieldInfos instance.</summary> public System.Object Clone() { FieldInfos fis = new FieldInfos(); int numField = byNumber.Count; for (int i = 0; i < numField; i++) { FieldInfo fi = (FieldInfo) ((FieldInfo) byNumber[i]).Clone(); fis.byNumber.Add(fi); fis.byName[fi.name] = fi; } return fis; }
// Used only by clone private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.format = format; this.formatSize = formatSize; this.docStoreOffset = docStoreOffset; this.cloneableFieldsStream = cloneableFieldsStream; this.cloneableIndexStream = cloneableIndexStream; fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); indexStream = (IndexInput) cloneableIndexStream.Clone(); }
public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + TVX_EXTENSION); tvx.WriteInt(FORMAT_VERSION); tvd = directory.CreateOutput(segment + TVD_EXTENSION); tvd.WriteInt(FORMAT_VERSION); tvf = directory.CreateOutput(segment + TVF_EXTENSION); tvf.WriteInt(FORMAT_VERSION); this.fieldInfos = fieldInfos; fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size())); terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); }
// used by clone private CompressingTermVectorsReader(CompressingTermVectorsReader reader) { this.fieldInfos = reader.fieldInfos; this.vectorsStream = (IndexInput)reader.vectorsStream.Clone(); this.indexReader = (CompressingStoredFieldsIndexReader)reader.indexReader.Clone(); this.packedIntsVersion = reader.packedIntsVersion; this.compressionMode = reader.compressionMode; this.decompressor = (Decompressor)reader.decompressor.Clone(); this.chunkSize = reader.chunkSize; this.numDocs = reader.numDocs; this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); this.version = reader.version; this.closed = false; }
/*internal*/ public TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) { if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION)) { tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION); CheckValidFormat(tvx); tvd = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION); tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION); tvfFormat = CheckValidFormat(tvf); size = (int) tvx.Length() / 8; } this.fieldInfos = fieldInfos; }
// used by clone private CompressingStoredFieldsReader(CompressingStoredFieldsReader reader) { this.Version_Renamed = reader.Version_Renamed; this.FieldInfos = reader.FieldInfos; this.FieldsStream = (IndexInput)reader.FieldsStream.Clone(); this.IndexReader = (CompressingStoredFieldsIndexReader)reader.IndexReader.Clone(); this.MaxPointer = reader.MaxPointer; this.ChunkSize_Renamed = reader.ChunkSize_Renamed; this.PackedIntsVersion = reader.PackedIntsVersion; this.CompressionMode_Renamed = reader.CompressionMode_Renamed; this.Decompressor = (Decompressor)reader.Decompressor.Clone(); this.NumDocs = reader.NumDocs; this.Bytes = new BytesRef(reader.Bytes.Bytes.Length); this.Closed = false; }
public virtual void Test() { //Positive test of FieldInfos Assert.IsTrue(testDoc != null); FieldInfos fieldInfos = new FieldInfos(); fieldInfos.Add(testDoc); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); System.String name = "testFile"; IndexOutput output = dir.CreateOutput(name); Assert.IsTrue(output != null); //Use a RAMOutputStream try { fieldInfos.Write(output); output.Close(); Assert.IsTrue(output.Length() > 0); FieldInfos readIn = new FieldInfos(dir, name); Assert.IsTrue(fieldInfos.Size() == readIn.Size()); FieldInfo info = readIn.FieldInfo("textField1"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField2"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == true); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField3"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); info = readIn.FieldInfo("omitNorms"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); dir.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
protected virtual void SetUp() { fieldInfos = new FieldInfos(); DocHelper.SetupDoc(testDoc); fieldInfos.Add(testDoc); DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(), Similarity.GetDefault(), 50); Assert.IsTrue(writer != null); try { writer.AddDocument("test", testDoc); } catch (System.IO.IOException e) { } }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT) throw new System.IO.IOException("Unknown format version:" + format); size = input.ReadLong(); // read the size if (format == - 1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); } } }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) { tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize); CheckValidFormat(tvx); tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize); tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize); tvfFormat = CheckValidFormat(tvf); if (- 1 == docStoreOffset) { this.docStoreOffset = 0; this.size = (int) (tvx.Length() >> 3); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int) (tvx.Length() / 8)) >= size + docStoreOffset); } } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate) { // validate incoming readers if (validate) { foreach (AtomicReader reader in readers) { reader.CheckIntegrity(); } } MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); Directory = dir; this.TermIndexInterval = termIndexInterval; this.Codec = segmentInfo.Codec; this.Context = context; this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); MergeState.SegmentInfo.DocCount = SetDocMaps(); }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + ".fdt", readBufferSize); fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); indexStream = d.OpenInput(segment + ".fdx", readBufferSize); if (docStoreOffset != - 1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexStream.Length() / 8)) >= size + this.docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int) (indexStream.Length() >> 3); } numTotalDocs = (int) (indexStream.Length() >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base() { dir = state.directory; segment = state.segmentName; totalNumDocs = state.numDocs; this.fieldInfos = fieldInfos; termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval); // TODO: this is a nasty abstraction violation (that we // peek down to find freqOut/proxOut) -- we need a // better abstraction here whereby these child consumers // can provide skip data or not skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)); termsWriter = new FormatPostingsTermsWriter(state, this); }
// public static boolean DEBUG = false; /// <summary> /// Sole constructor. </summary> public Lucene41PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, string segmentSuffix) { bool success = false; IndexInput docIn = null; IndexInput posIn = null; IndexInput payIn = null; try { docIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), ioContext); Version = CodecUtil.CheckHeader(docIn, Lucene41PostingsWriter.DOC_CODEC, Lucene41PostingsWriter.VERSION_START, Lucene41PostingsWriter.VERSION_CURRENT); forUtil = new ForUtil(docIn); if (fieldInfos.HasProx()) { posIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), ioContext); CodecUtil.CheckHeader(posIn, Lucene41PostingsWriter.POS_CODEC, Version, Version); if (fieldInfos.HasPayloads() || fieldInfos.HasOffsets()) { payIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), ioContext); CodecUtil.CheckHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, Version, Version); } } this.DocIn = docIn; this.PosIn = posIn; this.PayIn = payIn; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(docIn, posIn, payIn); } } }
public virtual void TestPayloadFieldBit() { Directory ram = NewDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field won't have any payloads d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // only add payload data for field f2 #pragma warning disable 612, 618 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); #pragma warning restore 612, 618 writer.AddDocument(d); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads, "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").HasPayloads, "Payload field bit should not be set."); reader.Dispose(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field analyzer = new PayloadAnalyzer(); // Clear payload state for each field writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE)); d = new Document(); d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // add payload data for field f2 and f3 #pragma warning disable 612, 618 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3); #pragma warning restore 612, 618 writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads, "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").HasPayloads, "Payload field bit should be set."); reader.Dispose(); ram.Dispose(); }
public virtual void TestFixedPostings() { const int NUM_TERMS = 100; TermData[] terms = new TermData[NUM_TERMS]; for (int i = 0; i < NUM_TERMS; i++) { int[] docs = new int[] { i }; string text = Convert.ToString(i); terms[i] = new TermData(this, text, docs, null); } FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData field = new FieldData(this, "field", builder, terms, true, false); FieldData[] fields = new FieldData[] { field }; FieldInfos fieldInfos = builder.Finish(); Directory dir = NewDirectory(); this.Write(fieldInfos, dir, fields, true); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); FieldsProducer reader = codec.PostingsFormat().FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); IEnumerator <string> fieldsEnum = reader.GetEnumerator(); fieldsEnum.MoveNext(); string fieldName = fieldsEnum.Current; Assert.IsNotNull(fieldName); Terms terms2 = reader.Terms(fieldName); Assert.IsNotNull(terms2); TermsEnum termsEnum = terms2.Iterator(null); DocsEnum docsEnum = null; for (int i = 0; i < NUM_TERMS; i++) { BytesRef term = termsEnum.Next(); Assert.IsNotNull(term); Assert.AreEqual(terms[i].Text2, term.Utf8ToString()); // do this twice to stress test the codec's reuse, ie, // make sure it properly fully resets (rewinds) its // internal state: for (int iter = 0; iter < 2; iter++) { docsEnum = TestUtil.Docs(Random(), termsEnum, null, docsEnum, DocsEnum.FLAG_NONE); Assert.AreEqual(terms[i].Docs[0], docsEnum.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < NUM_TERMS; i++) { Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].Text2)), TermsEnum.SeekStatus.FOUND); } Assert.IsFalse(fieldsEnum.MoveNext()); reader.Dispose(); dir.Dispose(); }
internal override void SetFieldInfos(FieldInfos fieldInfos) { base.SetFieldInfos(fieldInfos); one.SetFieldInfos(fieldInfos); two.SetFieldInfos(fieldInfos); }
/// <summary> /// Expert: create a <see cref="ParallelAtomicReader"/> based on the provided /// <paramref name="readers"/> and <paramref name="storedFieldsReaders"/>; when a document is /// loaded, only <paramref name="storedFieldsReaders"/> will be used. /// </summary> public ParallelAtomicReader(bool closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders) { InitializeInstanceFields(); this.closeSubReaders = closeSubReaders; if (readers.Length == 0 && storedFieldsReaders.Length > 0) { throw new System.ArgumentException("There must be at least one main reader if storedFieldsReaders are used."); } this.parallelReaders = (AtomicReader[])readers.Clone(); this.storedFieldsReaders = (AtomicReader[])storedFieldsReaders.Clone(); if (parallelReaders.Length > 0) { AtomicReader first = parallelReaders[0]; this.maxDoc = first.MaxDoc; this.numDocs = first.NumDocs; this.hasDeletions = first.HasDeletions; } else { this.maxDoc = this.numDocs = 0; this.hasDeletions = false; } Collections.AddAll(completeReaderSet, this.parallelReaders); Collections.AddAll(completeReaderSet, this.storedFieldsReaders); // check compatibility: foreach (AtomicReader reader in completeReaderSet) { if (reader.MaxDoc != maxDoc) { throw new System.ArgumentException("All readers must have same MaxDoc: " + maxDoc + "!=" + reader.MaxDoc); } } // TODO: make this read-only in a cleaner way? FieldInfos.Builder builder = new FieldInfos.Builder(); // build FieldInfos and fieldToReader map: foreach (AtomicReader reader in this.parallelReaders) { FieldInfos readerFieldInfos = reader.FieldInfos; foreach (FieldInfo fieldInfo in readerFieldInfos) { // NOTE: first reader having a given field "wins": if (!fieldToReader.ContainsKey(fieldInfo.Name)) { builder.Add(fieldInfo); fieldToReader[fieldInfo.Name] = reader; if (fieldInfo.HasVectors) { tvFieldToReader[fieldInfo.Name] = reader; } } } } fieldInfos = builder.Finish(); // build Fields instance foreach (AtomicReader reader in this.parallelReaders) { Fields readerFields = reader.Fields; if (readerFields != null) { foreach (string field in readerFields) { // only add if the reader responsible for that field name is the current: if (fieldToReader[field].Equals(reader)) { this.fields.AddField(field, readerFields.GetTerms(field)); } } } } // do this finally so any Exceptions occurred before don't affect refcounts: foreach (AtomicReader reader in completeReaderSet) { if (!closeSubReaders) { reader.IncRef(); } reader.RegisterParentReader(this); } }
public virtual void TestFieldNumberGaps() { int numIters = AtLeast(13); for (int i = 0; i < numIters; i++) { Directory dir = NewDirectory(); { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d1 first field", Field.Store.YES)); d.Add(new TextField("f2", "d1 second field", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d2 first field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d3 first field", Field.Store.YES)); d.Add(new TextField("f2", "d3 second field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(2)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f1", fis3.FieldInfo(0).Name); Assert.AreEqual("f2", fis3.FieldInfo(1).Name); Assert.AreEqual("f3", fis3.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); writer.DeleteDocuments(new Term("f1", "d1")); // nuke the first segment entirely so that the segment with gaps is // loaded first! writer.ForceMergeDeletes(); writer.Dispose(); } IndexWriter writer_ = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()).SetInfoStream(new FailOnNonBulkMergesInfoStream())); writer_.ForceMerge(1); writer_.Dispose(); SegmentInfos sis_ = new SegmentInfos(); sis_.Read(dir); Assert.AreEqual(1, sis_.Size()); FieldInfos fis1_ = SegmentReader.ReadFieldInfos(sis_.Info(0)); Assert.AreEqual("f1", fis1_.FieldInfo(0).Name); Assert.AreEqual("f2", fis1_.FieldInfo(1).Name); Assert.AreEqual("f3", fis1_.FieldInfo(2).Name); dir.Dispose(); } }
internal virtual void SetFieldInfos(FieldInfos fieldInfos) { this.fieldInfos = fieldInfos; }
internal override void setFieldInfos(FieldInfos fieldInfos) { base.setFieldInfos(fieldInfos); consumer.setFieldInfos(fieldInfos); endConsumer.setFieldInfos(fieldInfos); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(() => (StoredFieldsReader)fieldsReaderOrig.Clone()); termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(() => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone()); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(fields != null); } // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(normsProducer != null); } } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis) : this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE) { }
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor) { bool success = false; if (indexDivisor < 1 && indexDivisor != -1) { throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); } try { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); size = origEnum.size; if (indexDivisor != -1) { // Load terms index totalIndexInterval = origEnum.indexInterval * indexDivisor; var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true); try { int indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor; // otherwise read index indexTerms = new Term[indexSize]; indexInfos = new TermInfo[indexSize]; indexPointers = new long[indexSize]; for (int i = 0; indexEnum.Next(); i++) { indexTerms[i] = indexEnum.Term; indexInfos[i] = indexEnum.TermInfo(); indexPointers[i] = indexEnum.indexPointer; for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } } finally { indexEnum.Close(); } } else { // Do not load terms index: totalIndexInterval = -1; indexTerms = null; indexInfos = null; indexPointers = null; } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
internal void setFieldInfos(FieldInfos fieldInfos) { this.fieldInfos = fieldInfos; }
public virtual void TestPositions() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // f1,f2,f3: docs only FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_ONLY; Field f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); Field f2 = NewField("f2", "this field has docs only", ft); d.Add(f2); Field f3 = NewField("f3", "this field has docs only", ft); d.Add(f3); FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED); ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS; // f4,f5,f6 docs and freqs Field f4 = NewField("f4", "this field has docs and freqs", ft2); d.Add(f4); Field f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); Field f6 = NewField("f6", "this field has docs and freqs", ft2); d.Add(f6); FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED); ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; // f7,f8,f9 docs/freqs/positions Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3); d.Add(f7); Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3); d.Add(f8); Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8, // and docs/freqs/positions for f3, f6, f9 d = new Document(); // f1,f4,f7: docs only f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); f4 = NewField("f4", "this field has docs only", ft); d.Add(f4); f7 = NewField("f7", "this field has docs only", ft); d.Add(f7); // f2, f5, f8: docs and freqs f2 = NewField("f2", "this field has docs and freqs", ft2); d.Add(f2); f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); f8 = NewField("f8", "this field has docs and freqs", ft2); d.Add(f8); // f3, f6, f9: docs and freqs and positions f3 = NewField("f3", "this field has docs and freqs and positions", ft3); d.Add(f3); f6 = NewField("f6", "this field has docs and freqs and positions", ft3); d.Add(f6); f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; // docs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions); // docs + docs/freqs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions); // docs + docs/freqs/pos = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions); // docs/freqs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions); // docs/freqs + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions); // docs/freqs + docs/freqs/pos = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions); // docs/freqs/pos + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions); // docs/freqs/pos + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions); // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions); reader.Dispose(); ram.Dispose(); }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); } size = input.ReadLong(); // read the size if (format == -1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
public override void SetUp() { base.SetUp(); /* * for (int i = 0; i < testFields.Length; i++) { * fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); * } */ Array.Sort(TestTerms); int tokenUpto = 0; for (int i = 0; i < TestTerms.Length; i++) { Positions[i] = new int[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order Positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10); TestToken token = Tokens[tokenUpto++] = new TestToken(this); token.Text = TestTerms[i]; token.Pos = Positions[i][j]; token.StartOffset = j * 10; token.EndOffset = j * 10 + TestTerms[i].Length; } } Array.Sort(Tokens); Dir = NewDirectory(); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false)); Document doc = new Document(); for (int i = 0; i < TestFields.Length; i++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); if (TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; } else if (TestFieldsStorePos[i] && !TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; } else if (!TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorOffsets = true; } else { customType.StoreTermVectors = true; } doc.Add(new Field(TestFields[i], "", customType)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) { writer.AddDocument(doc); } writer.Commit(); Seg = writer.NewestSegment(); writer.Dispose(); FieldInfos = SegmentReader.ReadFieldInfos(Seg); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
// Writes field updates (new _X_N updates files) to the directory public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { lock (this) { //Debug.Assert(Thread.holdsLock(Writer)); //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); Debug.Assert(dvUpdates.Any()); // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader; try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes() != null) { foreach (KeyValuePair <string, string> e in fi.Attributes()) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.NumericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.BinaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.fType); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; string segmentSuffix = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX)); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat(); DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.CloseWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.Reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (IsMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.GenUpdatesFiles = newGenUpdatesFiles; // wrote new files, should checkpoint() Writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (Reader != null) { SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed); bool reopened = false; try { Reader.DecRef(); Reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions()) { docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Close(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long)docCount) * 8 != fdxFileLength) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { docCount += ((IndexReader)iter.Current).NumDocs(); } } return(docCount); }
public override void SetUp() { base.SetUp(); /* * for (int i = 0; i < testFields.length; i++) { * fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); * } */ System.Array.Sort(testTerms); int tokenUpto = 0; for (int i = 0; i < testTerms.Length; i++) { positions[i] = new int[TERM_FREQ]; offsets[i] = new TermVectorOffsetInfo[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order positions[i][j] = (int)(j * 10 + (new System.Random().NextDouble()) * 10); // offsets are always sorted in increasing order offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length); TestToken token = tokens[tokenUpto++] = new TestToken(this); token.text = testTerms[i]; token.pos = positions[i][j]; token.startOffset = offsets[i][j].StartOffset; token.endOffset = offsets[i][j].EndOffset; } } System.Array.Sort(tokens); IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = false; Document doc = new Document(); for (int i = 0; i < testFields.Length; i++) { Field.TermVector tv; if (testFieldsStorePos[i] && testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_POSITIONS; } else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_OFFSETS; } else { tv = Field.TermVector.YES; } doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) { writer.AddDocument(doc, null); } writer.Commit(null); seg = writer.NewestSegment().name; writer.Close(); fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION, null); }
public virtual void TestSameFieldNumbersAcrossSegments() { for (int i = 0; i < 2; i++) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.Add(new StringField("f1", "first field", Field.Store.YES)); d1.Add(new StringField("f2", "second field", Field.Store.YES)); writer.AddDocument(d1); if (i == 1) { writer.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); } else { writer.Commit(); } Document d2 = new Document(); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; d2.Add(new TextField("f2", "second field", Field.Store.NO)); d2.Add(new Field("f1", "first field", customType2)); d2.Add(new TextField("f3", "third field", Field.Store.NO)); d2.Add(new TextField("f4", "fourth field", Field.Store.NO)); writer.AddDocument(d2); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Count); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.AreEqual("f2", fis2.FieldInfo(1).Name); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f4", fis2.FieldInfo(3).Name); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); writer.ForceMerge(1); writer.Dispose(); sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Count); FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(0)); Assert.AreEqual("f1", fis3.FieldInfo(0).Name); Assert.AreEqual("f2", fis3.FieldInfo(1).Name); Assert.AreEqual("f3", fis3.FieldInfo(2).Name); Assert.AreEqual("f4", fis3.FieldInfo(3).Name); dir.Dispose(); } }