public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo) { intPool = termsHash.intPool; bytePool = termsHash.bytePool; termBytePool = termsHash.termBytePool; docState = termsHash.docState; this.termsHash = termsHash; bytesUsed = termsHash.bytesUsed; fieldState = docInverterPerField.fieldState; this.consumer = termsHash.consumer.AddField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); streamCount = consumer.StreamCount; numPostingInt = 2 * streamCount; this.fieldInfo = fieldInfo; if (nextTermsHash != null) { nextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo); } else { nextPerField = null; } }
public override void FinishDocument(TermsHash termsHash) { Debug.Assert(DocWriter.TestPoint("TermVectorsTermsWriter.finishDocument start")); if (!HasVectors) { return; } InitTermVectorsWriter(); Fill(DocState.DocID); // Append term vectors to the real outputs: Writer.StartDocument(NumVectorFields); for (int i = 0; i < NumVectorFields; i++) { PerFields[i].FinishDocument(); } Writer.FinishDocument(); Debug.Assert(LastDocID == DocState.DocID, "lastDocID=" + LastDocID + " docState.docID=" + DocState.DocID); LastDocID++; termsHash.Reset(); Reset(); Debug.Assert(DocWriter.TestPoint("TermVectorsTermsWriter.finishDocument end")); }
internal override void FinishDocument(TermsHash termsHash) { Debug.Assert(docWriter.TestPoint("TermVectorsTermsWriter.finishDocument start")); if (!hasVectors) { return; } InitTermVectorsWriter(); Fill(docState.docID); // Append term vectors to the real outputs: writer.StartDocument(numVectorFields); for (int i = 0; i < numVectorFields; i++) { perFields[i].FinishDocument(); } writer.FinishDocument(); Debug.Assert(lastDocID == docState.docID, "lastDocID=" + lastDocID + " docState.docID=" + docState.docID); lastDocID++; termsHash.Reset(); Reset(); Debug.Assert(docWriter.TestPoint("TermVectorsTermsWriter.finishDocument end")); }
public TermsHashPerThread(DocInverterPerThread docInverterPerThread, TermsHash termsHash, TermsHash nextTermsHash, TermsHashPerThread primaryPerThread) { docState = docInverterPerThread.docState; this.termsHash = termsHash; this.consumer = termsHash.consumer.AddThread(this); if (nextTermsHash != null) { // We are primary charPool = new CharBlockPool(termsHash.docWriter); primary = true; } else { charPool = primaryPerThread.charPool; primary = false; } intPool = new IntBlockPool(termsHash.docWriter, termsHash.trackAllocations); bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator, termsHash.trackAllocations); if (nextTermsHash != null) { nextPerThread = nextTermsHash.AddThread(docInverterPerThread, this); } else { nextPerThread = null; } }
public override DocConsumer GetChain(DocumentsWriterPerThread documentsWriterPerThread) { /* * this is the current indexing chain: * * DocConsumer / DocConsumerPerThread * --> code: DocFieldProcessor * --> DocFieldConsumer / DocFieldConsumerPerField * --> code: DocFieldConsumers / DocFieldConsumersPerField * --> code: DocInverter / DocInverterPerField * --> InvertedDocConsumer / InvertedDocConsumerPerField * --> code: TermsHash / TermsHashPerField * --> TermsHashConsumer / TermsHashConsumerPerField * --> code: FreqProxTermsWriter / FreqProxTermsWriterPerField * --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerField * --> InvertedDocEndConsumer / InvertedDocConsumerPerField * --> code: NormsConsumer / NormsConsumerPerField * --> StoredFieldsConsumer * --> TwoStoredFieldConsumers * -> code: StoredFieldsProcessor * -> code: DocValuesProcessor */ // Build up indexing chain: TermsHashConsumer termVectorsWriter = new TermVectorsConsumer(documentsWriterPerThread); TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true, new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null)); NormsConsumer normsWriter = new NormsConsumer(); DocInverter docInverter = new DocInverter(documentsWriterPerThread.docState, termsHash, normsWriter); StoredFieldsConsumer storedFields = new TwoStoredFieldsConsumers(new StoredFieldsProcessor(documentsWriterPerThread), new DocValuesProcessor(documentsWriterPerThread.bytesUsed)); return(new DocFieldProcessor(documentsWriterPerThread, docInverter, storedFields)); }
public TermsHashPerThread(DocInverterPerThread docInverterPerThread, TermsHash termsHash, TermsHash nextTermsHash, TermsHashPerThread primaryPerThread) { docState = docInverterPerThread.docState; this.termsHash = termsHash; this.consumer = termsHash.consumer.AddThread(this); if (nextTermsHash != null) { // We are primary charPool = new CharBlockPool(termsHash.docWriter); primary = true; } else { charPool = primaryPerThread.charPool; primary = false; } intPool = new IntBlockPool(termsHash.docWriter, termsHash.trackAllocations); bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator, termsHash.trackAllocations); if (nextTermsHash != null) nextPerThread = nextTermsHash.AddThread(docInverterPerThread, this); else nextPerThread = null; }
public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo) { IntPool = termsHash.IntPool; BytePool = termsHash.BytePool; TermBytePool = termsHash.TermBytePool; DocState = termsHash.DocState; this.TermsHash = termsHash; BytesUsed = termsHash.BytesUsed; FieldState = docInverterPerField.FieldState; this.Consumer = termsHash.Consumer.AddField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed); BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts); StreamCount = Consumer.StreamCount; NumPostingInt = 2 * StreamCount; this.FieldInfo = fieldInfo; if (nextTermsHash != null) { NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo); } else { NextPerField = null; } }
public TermsHash(DocumentsWriter docWriter, bool trackAllocations, TermsHashConsumer consumer, TermsHash nextTermsHash) { this.docWriter = docWriter; this.consumer = consumer; this.nextTermsHash = nextTermsHash; this.trackAllocations = trackAllocations; // Why + 4*POINTER_NUM_BYTE below? // +1: Posting is referenced by postingsFreeList array // +3: Posting is referenced by hash, which // targets 25-50% fill factor; approximate this // as 3X # pointers bytesPerPosting = consumer.BytesPerPosting() + 4 * DocumentsWriter.POINTER_NUM_BYTE; postingsFreeChunk = (int)(DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting); }
public TermsHash(DocumentsWriter docWriter, bool trackAllocations, TermsHashConsumer consumer, TermsHash nextTermsHash) { this.docWriter = docWriter; this.consumer = consumer; this.nextTermsHash = nextTermsHash; this.trackAllocations = trackAllocations; // Why + 4*POINTER_NUM_BYTE below? // +1: Posting is referenced by postingsFreeList array // +3: Posting is referenced by hash, which // targets 25-50% fill factor; approximate this // as 3X # pointers bytesPerPosting = consumer.BytesPerPosting() + 4 * DocumentsWriter.POINTER_NUM_BYTE; postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting); }
public TermsHash(DocumentsWriterPerThread docWriter, TermsHashConsumer consumer, bool trackAllocations, TermsHash nextTermsHash) { this.docState = docWriter.docState; this.consumer = consumer; this.trackAllocations = trackAllocations; this.nextTermsHash = nextTermsHash; this.bytesUsed = trackAllocations ? docWriter.bytesUsed : Counter.NewCounter(); intPool = new Int32BlockPool(docWriter.intBlockAllocator); bytePool = new ByteBlockPool(docWriter.byteBlockAllocator); if (nextTermsHash != null) { // We are primary primary = true; termBytePool = bytePool; nextTermsHash.termBytePool = bytePool; } else { primary = false; } }
public TermsHash(DocumentsWriterPerThread docWriter, TermsHashConsumer consumer, bool trackAllocations, TermsHash nextTermsHash) { this.DocState = docWriter.docState; this.Consumer = consumer; this.TrackAllocations = trackAllocations; this.NextTermsHash = nextTermsHash; this.BytesUsed = trackAllocations ? docWriter.bytesUsed : Counter.NewCounter(); IntPool = new IntBlockPool(docWriter.intBlockAllocator); BytePool = new ByteBlockPool(docWriter.ByteBlockAllocator); if (nextTermsHash != null) { // We are primary Primary = true; TermBytePool = BytePool; nextTermsHash.TermBytePool = BytePool; } else { Primary = false; } }
internal override void FinishDocument(TermsHash termsHash) { // LUCENENET: .NET doesn't support asserts in release mode if (Lucene.Net.Diagnostics.Debugging.AssertsEnabled) { docWriter.TestPoint("TermVectorsTermsWriter.finishDocument start"); } if (!hasVectors) { return; } InitTermVectorsWriter(); Fill(docState.docID); // Append term vectors to the real outputs: writer.StartDocument(numVectorFields); for (int i = 0; i < numVectorFields; i++) { perFields[i].FinishDocument(); } writer.FinishDocument(); Debug.Assert(lastDocID == docState.docID, "lastDocID=" + lastDocID + " docState.docID=" + docState.docID); lastDocID++; termsHash.Reset(); Reset(); // LUCENENET: .NET doesn't support asserts in release mode if (Lucene.Net.Diagnostics.Debugging.AssertsEnabled) { docWriter.TestPoint("TermVectorsTermsWriter.finishDocument end"); } }
internal DocumentsWriter(Directory directory, IndexWriter writer) { this.directory = directory; this.writer = writer; this.similarity = writer.GetSimilarity(); flushedDocCount = writer.MaxDoc(); byteBlockAllocator = new ByteBlockAllocator(this); waitQueue = new WaitQueue(this); /* This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / DocFieldProcessorPerThread --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField --> code: DocInverter / DocInverterPerThread / DocInverterPerField --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: TermsHash / TermsHashPerThread / TermsHashPerField --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField */ // TODO FI: this should be something the user can pass in // Build up indexing chain: TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this); TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); InvertedDocConsumer termsHash = new TermsHash(this, true, freqProxWriter, new TermsHash(this, false, termVectorsWriter, null)); NormsWriter normsWriter = new NormsWriter(); DocInverter docInverter = new DocInverter(termsHash, normsWriter); StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this); DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter); consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers); }
internal abstract void FinishDocument(TermsHash termsHash);
public override void FinishDocument(TermsHash termsHash) { }
internal override void FinishDocument(TermsHash termsHash) { }
internal override DocConsumer GetChain(DocumentsWriter documentsWriter) { /* This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / DocFieldProcessorPerThread --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField --> code: DocInverter / DocInverterPerThread / DocInverterPerField --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: TermsHash / TermsHashPerThread / TermsHashPerField --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField */ // Build up indexing chain: TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, new TermsHash(documentsWriter, false, termVectorsWriter, null)); NormsWriter normsWriter = new NormsWriter(); DocInverter docInverter = new DocInverter(termsHash, normsWriter); return new DocFieldProcessor(documentsWriter, docInverter); }
public abstract void FinishDocument(TermsHash termsHash);
public override DocConsumer GetChain(DocumentsWriterPerThread documentsWriterPerThread) { /* this is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor --> DocFieldConsumer / DocFieldConsumerPerField --> code: DocFieldConsumers / DocFieldConsumersPerField --> code: DocInverter / DocInverterPerField --> InvertedDocConsumer / InvertedDocConsumerPerField --> code: TermsHash / TermsHashPerField --> TermsHashConsumer / TermsHashConsumerPerField --> code: FreqProxTermsWriter / FreqProxTermsWriterPerField --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerField --> InvertedDocEndConsumer / InvertedDocConsumerPerField --> code: NormsConsumer / NormsConsumerPerField --> StoredFieldsConsumer --> TwoStoredFieldConsumers -> code: StoredFieldsProcessor -> code: DocValuesProcessor */ // Build up indexing chain: TermsHashConsumer termVectorsWriter = new TermVectorsConsumer(documentsWriterPerThread); TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true, new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null)); NormsConsumer normsWriter = new NormsConsumer(); DocInverter docInverter = new DocInverter(documentsWriterPerThread.docState, termsHash, normsWriter); StoredFieldsConsumer storedFields = new TwoStoredFieldsConsumers(new StoredFieldsProcessor(documentsWriterPerThread), new DocValuesProcessor(documentsWriterPerThread.bytesUsed)); return new DocFieldProcessor(documentsWriterPerThread, docInverter, storedFields); }
public override void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates IList <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>(); foreach (TermsHashConsumerPerField f in fieldsToFlush.Values) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)f; if (perField.termsHashPerField.bytesHash.Count > 0) { allFields.Add(perField); } } int numAllFields = allFields.Count; // Sort by field name CollectionUtil.IntroSort(allFields); FieldsConsumer consumer = state.SegmentInfo.Codec.PostingsFormat.FieldsConsumer(state); bool success = false; try { TermsHash termsHash = null; /* * Current writer chain: * FieldsConsumer * -> IMPL: FormatPostingsTermsDictWriter * -> TermsConsumer * -> IMPL: FormatPostingsTermsDictWriter.TermsWriter * -> DocsConsumer * -> IMPL: FormatPostingsDocsWriter * -> PositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { FieldInfo fieldInfo = allFields[fieldNumber].fieldInfo; FreqProxTermsWriterPerField fieldWriter = allFields[fieldNumber]; // If this field has postings then add them to the // segment fieldWriter.Flush(fieldInfo.Name, consumer, state); TermsHashPerField perField = fieldWriter.termsHashPerField; if (Debugging.AssertsEnabled) { Debugging.Assert(termsHash == null || termsHash == perField.termsHash); } termsHash = perField.termsHash; int numPostings = perField.bytesHash.Count; perField.Reset(); perField.ShrinkHash(numPostings); fieldWriter.Reset(); } if (termsHash != null) { termsHash.Reset(); } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
static DocumentsWriter() { DefaultIndexingChain = new IndexingChain() { GetChain = (documentsWriter) => { /* This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / DocFieldProcessorPerThread --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField --> code: DocInverter / DocInverterPerThread / DocInverterPerField --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: TermsHash / TermsHashPerThread / TermsHashPerField --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField */ // Build up indexing chain: TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, new TermsHash(documentsWriter, false, termVectorsWriter, null)); NormsWriter normsWriter = new NormsWriter(); DocInverter docInverter = new DocInverter(termsHash, normsWriter); return new DocFieldProcessor(documentsWriter, docInverter); } }; POINTER_NUM_BYTE = Constants.JRE_IS_64BIT?8:4; }