public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) { this.terms = terms; this.sliceArray = sliceArray; this.numTokens = numTokens; this.numOverlapTokens = numOverlapTokens; this.boost = boost; this.sumTotalTermFreq = sumTotalTermFreq; this.lastPosition = lastPosition; this.lastOffset = lastOffset; }
/// <summary> /// Returns a String representation of the index data for debugging purposes. /// </summary> /// <returns> the string representation </returns> public override string ToString() { StringBuilder result = new StringBuilder(256); SortFields(); int sumPositions = 0; int sumTerms = 0; BytesRef spare = new BytesRef(); for (int i = 0; i < sortedFields.Length; i++) { KeyValuePair <string, Info> entry = sortedFields[i]; string fieldName = entry.Key; Info info = entry.Value; info.SortTerms(); result.Append(fieldName + ":\n"); SliceByteStartArray sliceArray = info.sliceArray; int numPositions = 0; Int32BlockPool.SliceReader postingsReader = new Int32BlockPool.SliceReader(intBlockPool); for (int j = 0; j < info.terms.Count; j++) { int ord = info.sortedTerms[j]; info.terms.Get(ord, spare); int freq = sliceArray.freq[ord]; result.Append("\t'" + spare + "':" + freq + ":"); postingsReader.Reset(sliceArray.start[ord], sliceArray.end[ord]); result.Append(" ["); int iters = storeOffsets ? 3 : 1; while (!postingsReader.IsEndOfSlice) { result.Append("("); for (int k = 0; k < iters; k++) { result.Append(postingsReader.ReadInt32()); if (k < iters - 1) { result.Append(", "); } } result.Append(")"); if (!postingsReader.IsEndOfSlice) { result.Append(","); } } result.Append("]"); result.Append("\n"); numPositions += freq; } result.Append("\tterms=" + info.terms.Count); result.Append(", positions=" + numPositions); result.Append(", memory=" + RamUsageEstimator.HumanReadableUnits(RamUsageEstimator.SizeOf(info))); result.Append("\n"); sumPositions += numPositions; sumTerms += info.terms.Count; } result.Append("\nfields=" + sortedFields.Length); result.Append(", terms=" + sumTerms); result.Append(", positions=" + sumPositions); result.Append(", memory=" + RamUsageEstimator.HumanReadableUnits(GetMemorySize())); return(result.ToString()); }
/// <summary> /// Iterates over the given token stream and adds the resulting terms to the index; /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, /// Lucene <see cref="Documents.Field"/>. /// Finally closes the token stream. Note that untokenized keywords can be added with this method via /// <see cref="T:KeywordTokenStream{T}(ICollection{T}"/>)"/>, the Lucene <c>KeywordTokenizer</c> or similar utilities. /// /// </summary> /// <param name="fieldName"> a name to be associated with the text </param> /// <param name="stream"> the token stream to retrieve tokens from. </param> /// <param name="boost"> the boost factor for hits for this field </param> /// <param name="positionIncrementGap"> the position increment gap if fields with the same name are added more than once </param> /// <param name="offsetGap"> the offset gap if fields with the same name are added more than once </param> /// <seealso cref="Documents.Field.Boost"/> public virtual void AddField(string fieldName, TokenStream stream, float boost, int positionIncrementGap, int offsetGap) { try { if (fieldName == null) { throw new ArgumentException("fieldName must not be null"); } if (stream == null) { throw new ArgumentException("token stream must not be null"); } if (boost <= 0.0f) { throw new ArgumentException("boost factor must be greater than 0.0"); } int numTokens = 0; int numOverlapTokens = 0; int pos = -1; BytesRefHash terms; SliceByteStartArray sliceArray; long sumTotalTermFreq = 0; int offset = 0; if (fields.TryGetValue(fieldName, out Info info)) { numTokens = info.numTokens; numOverlapTokens = info.numOverlapTokens; pos = info.lastPosition + positionIncrementGap; offset = info.lastOffset + offsetGap; terms = info.terms; boost *= info.boost; sliceArray = info.sliceArray; sumTotalTermFreq = info.sumTotalTermFreq; } else { sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY); terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray); } if (!fieldInfos.ContainsKey(fieldName)) { fieldInfos[fieldName] = new FieldInfo(fieldName, true, fieldInfos.Count, false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.NONE, DocValuesType.NONE, null); } ITermToBytesRefAttribute termAtt = stream.GetAttribute <ITermToBytesRefAttribute>(); IPositionIncrementAttribute posIncrAttribute = stream.AddAttribute <IPositionIncrementAttribute>(); IOffsetAttribute offsetAtt = stream.AddAttribute <IOffsetAttribute>(); BytesRef @ref = termAtt.BytesRef; stream.Reset(); while (stream.IncrementToken()) { termAtt.FillBytesRef(); // if (DEBUG) System.err.println("token='" + term + "'"); numTokens++; int posIncr = posIncrAttribute.PositionIncrement; if (posIncr == 0) { numOverlapTokens++; } pos += posIncr; int ord = terms.Add(@ref); if (ord < 0) { ord = (-ord) - 1; postingsWriter.Reset(sliceArray.end[ord]); } else { sliceArray.start[ord] = postingsWriter.StartNewSlice(); } sliceArray.freq[ord]++; sumTotalTermFreq++; if (!storeOffsets) { postingsWriter.WriteInt32(pos); } else { postingsWriter.WriteInt32(pos); postingsWriter.WriteInt32(offsetAtt.StartOffset + offset); postingsWriter.WriteInt32(offsetAtt.EndOffset + offset); } sliceArray.end[ord] = postingsWriter.CurrentOffset; } stream.End(); // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() if (numTokens > 0) { fields[fieldName] = new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.EndOffset + offset, sumTotalTermFreq); sortedFields = null; // invalidate sorted view, if any } } // can never happen catch (Exception e) { throw new Exception(e.ToString(), e); } finally { try { if (stream != null) { stream.Dispose(); } } catch (IOException e2) { throw new Exception(e2.ToString(), e2); } } }