C# (CSharp) Lucene.Net.Index TermsHashPerField.InitReader Exemples

Langage de programmation: C# (CSharp)

Espace de nommage/Pack: Lucene.Net.Index

Méthode/Fonction: InitReader

Exemples au hotexamples.com: 2

C# (CSharp) Lucene.Net.Index TermsHashPerField.InitReader - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de Lucene.Net.Index.TermsHashPerField.InitReader extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Reset(5)

ShrinkHash(4)

WriteBytes(3)

reset(3)

WriteVInt(3)

Start(3)

Abort(2)

shrinkHash(2)

writeVInt(2)

InitReader(2)

SortPostings(2)

Finish(2)

Add(1)

abort(1)

add(1)

finish(1)

initReader(1)

sortPostings(1)

start(1)

writeBytes(1)

WriteVInt32(1)

Méthodes fréquemment utilisées

Reset (5)

ShrinkHash (4)

WriteBytes (3)

reset (3)

WriteVInt (3)

Start (3)

Abort (2)

shrinkHash (2)

writeVInt (2)

InitReader (2)

Méthodes fréquemment utilisées

SortPostings (2)

Finish (2)

Add (1)

abort (1)

add (1)

finish (1)

initReader (1)

sortPostings (1)

start (1)

writeBytes (1)

WriteVInt32 (1)

Méthodes fréquemment utilisées

WriteVInt32 (1)

Associées

Node

UpdateDriverModel

DearVisualStudio

StaffDataService

DBCallback

Utilities

ProxyObservable

CajaRefuerzo

Inv_LeatherTransferReceiveItem

Scene

Related in langs

jalemurldecode (PHP)

JsonParser (PHP)

hdr_base64_decode (C++)

array_expression (C++)

Log_ON (Go)

Resource (Go)

WeightItem (Java)

OComplexObjects (Java)

infer_psd_type (Python)

gethostbyname (Python)

Exemple #1

0

Afficher le fichier

/* Walk through all unique text tokens (Posting * instances) found in this field and serialize them * into a single RAM segment. */ internal void Flush(string fieldName, FieldsConsumer consumer, SegmentWriteState state) { if (!fieldInfo.Indexed) { return; // nothing to flush, don't bother the codec with the unindexed field } TermsConsumer termsConsumer = consumer.AddField(fieldInfo); IComparer <BytesRef> termComp = termsConsumer.Comparator; // CONFUSING: this.indexOptions holds the index options // that were current when we first saw this field. But // it's possible this has changed, eg when other // documents are indexed that cause a "downgrade" of the // IndexOptions. So we must decode the in-RAM buffer // according to this.indexOptions, but then write the // new segment to the directory according to // currentFieldIndexOptions: FieldInfo.IndexOptions?currentFieldIndexOptions = fieldInfo.FieldIndexOptions; Debug.Assert(currentFieldIndexOptions != null); bool writeTermFreq = currentFieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS; bool writePositions = currentFieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; bool writeOffsets = currentFieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; bool readTermFreq = this.HasFreq; bool readPositions = this.HasProx; bool readOffsets = this.HasOffsets; //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets); // Make sure FieldInfo.update is working correctly!: Debug.Assert(!writeTermFreq || readTermFreq); Debug.Assert(!writePositions || readPositions); Debug.Assert(!writeOffsets || readOffsets); Debug.Assert(!writeOffsets || writePositions); IDictionary <Term, int?> segDeletes; if (state.SegUpdates != null && state.SegUpdates.Terms.Count > 0) { segDeletes = state.SegUpdates.Terms; } else { segDeletes = null; } int[] termIDs = TermsHashPerField.SortPostings(termComp); int numTerms = TermsHashPerField.BytesHash.Size(); BytesRef text = new BytesRef(); FreqProxPostingsArray postings = (FreqProxPostingsArray)TermsHashPerField.PostingsArray; ByteSliceReader freq = new ByteSliceReader(); ByteSliceReader prox = new ByteSliceReader(); FixedBitSet visitedDocs = new FixedBitSet(state.SegmentInfo.DocCount); long sumTotalTermFreq = 0; long sumDocFreq = 0; Term protoTerm = new Term(fieldName); for (int i = 0; i < numTerms; i++) { int termID = termIDs[i]; // Get BytesRef int textStart = postings.TextStarts[termID]; TermsHashPerField.BytePool.SetBytesRef(text, textStart); TermsHashPerField.InitReader(freq, termID, 0); if (readPositions || readOffsets) { TermsHashPerField.InitReader(prox, termID, 1); } // TODO: really TermsHashPerField should take over most // of this loop, including merge sort of terms from // multiple threads and interacting with the // TermsConsumer, only calling out to us (passing us the // DocsConsumer) to handle delivery of docs/positions PostingsConsumer postingsConsumer = termsConsumer.StartTerm(text); int?delDocLimit; if (segDeletes != null) { protoTerm.Bytes_Renamed = text; int?docIDUpto; segDeletes.TryGetValue(protoTerm, out docIDUpto); if (docIDUpto != null) { delDocLimit = docIDUpto; } else { delDocLimit = 0; } } else { delDocLimit = 0; } // Now termStates has numToMerge FieldMergeStates // which all share the same term. Now we must // interleave the docID streams. int docFreq = 0; long totalTermFreq = 0; int docID = 0; while (true) { //System.out.println(" cycle"); int termFreq; if (freq.Eof()) { if (postings.LastDocCodes[termID] != -1) { // Return last doc docID = postings.LastDocIDs[termID]; if (readTermFreq) { termFreq = postings.TermFreqs[termID]; } else { termFreq = -1; } postings.LastDocCodes[termID] = -1; } else { // EOF break; } } else { int code = freq.ReadVInt(); if (!readTermFreq) { docID += code; termFreq = -1; } else { docID += (int)((uint)code >> 1); if ((code & 1) != 0) { termFreq = 1; } else { termFreq = freq.ReadVInt(); } } Debug.Assert(docID != postings.LastDocIDs[termID]); } docFreq++; Debug.Assert(docID < state.SegmentInfo.DocCount, "doc=" + docID + " maxDoc=" + state.SegmentInfo.DocCount); // NOTE: we could check here if the docID was // deleted, and skip it. However, this is somewhat // dangerous because it can yield non-deterministic // behavior since we may see the docID before we see // the term that caused it to be deleted. this // would mean some (but not all) of its postings may // make it into the index, which'd alter the docFreq // for those terms. We could fix this by doing two // passes, ie first sweep marks all del docs, and // 2nd sweep does the real flush, but I suspect // that'd add too much time to flush. visitedDocs.Set(docID); postingsConsumer.StartDoc(docID, writeTermFreq ? termFreq : -1); if (docID < delDocLimit) { // Mark it deleted. TODO: we could also skip // writing its postings; this would be // deterministic (just for this Term's docs). // TODO: can we do this reach-around in a cleaner way???? if (state.LiveDocs == null) { state.LiveDocs = DocState.DocWriter.Codec.LiveDocsFormat().NewLiveDocs(state.SegmentInfo.DocCount); } if (state.LiveDocs.Get(docID)) { state.DelCountOnFlush++; state.LiveDocs.Clear(docID); } } totalTermFreq += termFreq; // Carefully copy over the prox + payload info, // changing the format to match Lucene's segment // format. if (readPositions || readOffsets) { // we did record positions (& maybe payload) and/or offsets int position = 0; int offset = 0; for (int j = 0; j < termFreq; j++) { BytesRef thisPayload; if (readPositions) { int code = prox.ReadVInt(); position += (int)((uint)code >> 1); if ((code & 1) != 0) { // this position has a payload int payloadLength = prox.ReadVInt(); if (Payload == null) { Payload = new BytesRef(); Payload.Bytes = new sbyte[payloadLength]; } else if (Payload.Bytes.Length < payloadLength) { Payload.Grow(payloadLength); } prox.ReadBytes(Payload.Bytes, 0, payloadLength); Payload.Length = payloadLength; thisPayload = Payload; } else { thisPayload = null; } if (readOffsets) { int startOffset = offset + prox.ReadVInt(); int endOffset = startOffset + prox.ReadVInt(); if (writePositions) { if (writeOffsets) { Debug.Assert(startOffset >= 0 && endOffset >= startOffset, "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset); postingsConsumer.AddPosition(position, thisPayload, startOffset, endOffset); } else { postingsConsumer.AddPosition(position, thisPayload, -1, -1); } } offset = startOffset; } else if (writePositions) { postingsConsumer.AddPosition(position, thisPayload, -1, -1); } } } } postingsConsumer.FinishDoc(); } termsConsumer.FinishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1)); sumTotalTermFreq += totalTermFreq; sumDocFreq += docFreq; } termsConsumer.Finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.Cardinality()); }

Exemple #2

0

Afficher le fichier

Fichier : TermVectorsTermsWriterPerField.cs Projet : stgwilli/ravendb

/// <summary>Called once per field per document if term vectors /// are enabled, to write the vectors to /// RAMOutputStream, which is then quickly flushed to /// * the real term vectors files in the Directory. /// </summary> internal override void Finish() { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start")); int numPostings = termsHashPerField.numPostings; System.Diagnostics.Debug.Assert(numPostings >= 0); if (!doVectors || numPostings == 0) { return; } if (numPostings > maxNumPostings) { maxNumPostings = numPostings; } IndexOutput tvf = perThread.doc.tvf; // This is called once, after inverting all occurences // of a given field in the doc. At this point we flush // our hash into the DocWriter. System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector); System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo)); perThread.doc.AddField(termsHashPerField.fieldInfo.number); RawPostingList[] postings = termsHashPerField.SortPostings(); tvf.WriteVInt(numPostings); byte bits = (byte)(0x0); if (doVectorPositions) { bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (doVectorOffsets) { bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } tvf.WriteByte(bits); int encoderUpto = 0; int lastTermBytesCount = 0; ByteSliceReader reader = perThread.vectorSliceReader; char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers; for (int j = 0; j < numPostings; j++) { TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j]; int freq = posting.freq; char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK; // We swap between two encoders to save copying // last Term's byte array UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto]; // TODO: we could do this incrementally UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result); int termBytesCount = utf8Result.length; // TODO: UTF16toUTF8 could tell us this prefix // Compute common prefix between last term and // this term int prefix = 0; if (j > 0) { byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result; byte[] termBytes = perThread.utf8Results[encoderUpto].result; while (prefix < lastTermBytesCount && prefix < termBytesCount) { if (lastTermBytes[prefix] != termBytes[prefix]) { break; } prefix++; } } encoderUpto = 1 - encoderUpto; lastTermBytesCount = termBytesCount; int suffix = termBytesCount - prefix; tvf.WriteVInt(prefix); tvf.WriteVInt(suffix); tvf.WriteBytes(utf8Result.result, prefix, suffix); tvf.WriteVInt(freq); if (doVectorPositions) { termsHashPerField.InitReader(reader, posting, 0); reader.WriteTo(tvf); } if (doVectorOffsets) { termsHashPerField.InitReader(reader, posting, 1); reader.WriteTo(tvf); } } termsHashPerField.Reset(); perThread.termsHashPerThread.Reset(false); }