示例#1
0
 public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq)
 {
     this.terms            = terms;
     this.sliceArray       = sliceArray;
     this.numTokens        = numTokens;
     this.numOverlapTokens = numOverlapTokens;
     this.boost            = boost;
     this.sumTotalTermFreq = sumTotalTermFreq;
     this.lastPosition     = lastPosition;
     this.lastOffset       = lastOffset;
 }
 public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq)
 {
     this.terms = terms;
     this.sliceArray = sliceArray;
     this.numTokens = numTokens;
     this.numOverlapTokens = numOverlapTokens;
     this.boost = boost;
     this.sumTotalTermFreq = sumTotalTermFreq;
     this.lastPosition = lastPosition;
     this.lastOffset = lastOffset;
 }
示例#3
0
        /// <summary>
        /// Returns a String representation of the index data for debugging purposes.
        /// </summary>
        /// <returns> the string representation </returns>
        public override string ToString()
        {
            StringBuilder result = new StringBuilder(256);

            SortFields();
            int      sumPositions = 0;
            int      sumTerms     = 0;
            BytesRef spare        = new BytesRef();

            for (int i = 0; i < sortedFields.Length; i++)
            {
                KeyValuePair <string, Info> entry = sortedFields[i];
                string fieldName = entry.Key;
                Info   info      = entry.Value;
                info.SortTerms();
                result.Append(fieldName + ":\n");
                SliceByteStartArray sliceArray = info.sliceArray;
                int numPositions = 0;
                Int32BlockPool.SliceReader postingsReader = new Int32BlockPool.SliceReader(intBlockPool);
                for (int j = 0; j < info.terms.Count; j++)
                {
                    int ord = info.sortedTerms[j];
                    info.terms.Get(ord, spare);
                    int freq = sliceArray.freq[ord];
                    result.Append("\t'" + spare + "':" + freq + ":");
                    postingsReader.Reset(sliceArray.start[ord], sliceArray.end[ord]);
                    result.Append(" [");
                    int iters = storeOffsets ? 3 : 1;
                    while (!postingsReader.IsEndOfSlice)
                    {
                        result.Append("(");

                        for (int k = 0; k < iters; k++)
                        {
                            result.Append(postingsReader.ReadInt32());
                            if (k < iters - 1)
                            {
                                result.Append(", ");
                            }
                        }
                        result.Append(")");
                        if (!postingsReader.IsEndOfSlice)
                        {
                            result.Append(",");
                        }
                    }
                    result.Append("]");
                    result.Append("\n");
                    numPositions += freq;
                }

                result.Append("\tterms=" + info.terms.Count);
                result.Append(", positions=" + numPositions);
                result.Append(", memory=" + RamUsageEstimator.HumanReadableUnits(RamUsageEstimator.SizeOf(info)));
                result.Append("\n");
                sumPositions += numPositions;
                sumTerms     += info.terms.Count;
            }

            result.Append("\nfields=" + sortedFields.Length);
            result.Append(", terms=" + sumTerms);
            result.Append(", positions=" + sumPositions);
            result.Append(", memory=" + RamUsageEstimator.HumanReadableUnits(GetMemorySize()));
            return(result.ToString());
        }
示例#4
0
        /// <summary>
        /// Iterates over the given token stream and adds the resulting terms to the index;
        /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
        /// Lucene <see cref="Documents.Field"/>.
        /// Finally closes the token stream. Note that untokenized keywords can be added with this method via
        /// <see cref="T:KeywordTokenStream{T}(ICollection{T}"/>)"/>, the Lucene <c>KeywordTokenizer</c> or similar utilities.
        ///
        /// </summary>
        /// <param name="fieldName"> a name to be associated with the text </param>
        /// <param name="stream"> the token stream to retrieve tokens from. </param>
        /// <param name="boost"> the boost factor for hits for this field </param>
        /// <param name="positionIncrementGap"> the position increment gap if fields with the same name are added more than once </param>
        /// <param name="offsetGap"> the offset gap if fields with the same name are added more than once </param>
        /// <seealso cref="Documents.Field.Boost"/>
        public virtual void AddField(string fieldName, TokenStream stream, float boost, int positionIncrementGap, int offsetGap)
        {
            try
            {
                if (fieldName == null)
                {
                    throw new ArgumentException("fieldName must not be null");
                }
                if (stream == null)
                {
                    throw new ArgumentException("token stream must not be null");
                }
                if (boost <= 0.0f)
                {
                    throw new ArgumentException("boost factor must be greater than 0.0");
                }
                int                 numTokens        = 0;
                int                 numOverlapTokens = 0;
                int                 pos = -1;
                BytesRefHash        terms;
                SliceByteStartArray sliceArray;
                long                sumTotalTermFreq = 0;
                int                 offset           = 0;
                if (fields.TryGetValue(fieldName, out Info info))
                {
                    numTokens        = info.numTokens;
                    numOverlapTokens = info.numOverlapTokens;
                    pos              = info.lastPosition + positionIncrementGap;
                    offset           = info.lastOffset + offsetGap;
                    terms            = info.terms;
                    boost           *= info.boost;
                    sliceArray       = info.sliceArray;
                    sumTotalTermFreq = info.sumTotalTermFreq;
                }
                else
                {
                    sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
                    terms      = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
                }

                if (!fieldInfos.ContainsKey(fieldName))
                {
                    fieldInfos[fieldName] = new FieldInfo(fieldName,
                                                          true,
                                                          fieldInfos.Count,
                                                          false,
                                                          false,
                                                          false,
                                                          this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                                                          DocValuesType.NONE,
                                                          DocValuesType.NONE,
                                                          null);
                }
                ITermToBytesRefAttribute    termAtt          = stream.GetAttribute <ITermToBytesRefAttribute>();
                IPositionIncrementAttribute posIncrAttribute = stream.AddAttribute <IPositionIncrementAttribute>();
                IOffsetAttribute            offsetAtt        = stream.AddAttribute <IOffsetAttribute>();
                BytesRef @ref = termAtt.BytesRef;
                stream.Reset();

                while (stream.IncrementToken())
                {
                    termAtt.FillBytesRef();
                    //        if (DEBUG) System.err.println("token='" + term + "'");
                    numTokens++;
                    int posIncr = posIncrAttribute.PositionIncrement;
                    if (posIncr == 0)
                    {
                        numOverlapTokens++;
                    }
                    pos += posIncr;
                    int ord = terms.Add(@ref);
                    if (ord < 0)
                    {
                        ord = (-ord) - 1;
                        postingsWriter.Reset(sliceArray.end[ord]);
                    }
                    else
                    {
                        sliceArray.start[ord] = postingsWriter.StartNewSlice();
                    }
                    sliceArray.freq[ord]++;
                    sumTotalTermFreq++;
                    if (!storeOffsets)
                    {
                        postingsWriter.WriteInt32(pos);
                    }
                    else
                    {
                        postingsWriter.WriteInt32(pos);
                        postingsWriter.WriteInt32(offsetAtt.StartOffset + offset);
                        postingsWriter.WriteInt32(offsetAtt.EndOffset + offset);
                    }
                    sliceArray.end[ord] = postingsWriter.CurrentOffset;
                }
                stream.End();

                // ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
                if (numTokens > 0)
                {
                    fields[fieldName] = new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.EndOffset + offset, sumTotalTermFreq);
                    sortedFields      = null; // invalidate sorted view, if any
                }
            } // can never happen
            catch (Exception e)
            {
                throw new Exception(e.ToString(), e);
            }
            finally
            {
                try
                {
                    if (stream != null)
                    {
                        stream.Dispose();
                    }
                }
                catch (IOException e2)
                {
                    throw new Exception(e2.ToString(), e2);
                }
            }
        }