Exemplo n.º 1
0
            public override void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper)
            {
                if (DEBUG)
                {
                    System.Diagnostics.Debug.WriteLine("MemoryIndexReader.getTermFreqVector");
                }
                Info info = GetInfo(field);

                if (info == null)
                {
                    return;
                }
                info.SortTerms();
                mapper.SetExpectations(field, info.SortedTerms.Length, _index.stride != 1, true);
                for (int i = info.SortedTerms.Length; --i >= 0;)
                {
                    ArrayIntList positions = info.SortedTerms[i].Value;
                    int          size      = positions.Size();
                    var          offsets   = new TermVectorOffsetInfo[size / _index.stride];

                    for (int k = 0, j = 1; j < size; k++, j += _index.stride)
                    {
                        int start = positions.Get(j);
                        int end   = positions.Get(j + 1);
                        offsets[k] = new TermVectorOffsetInfo(start, end);
                    }
                    mapper.Map(info.SortedTerms[i].Key, _index.NumPositions(info.SortedTerms[i].Value), offsets,
                               (info.SortedTerms[i].Value).ToArray(_index.stride));
                }
            }
Exemplo n.º 2
0
        /*
         * Returns a String representation of the index data for debugging purposes.
         *
         * @return the string representation
         */

        public override String ToString()
        {
            StringBuilder result = new StringBuilder(256);

            SortFields();
            int sumChars     = 0;
            int sumPositions = 0;
            int sumTerms     = 0;

            for (int i = 0; i < sortedFields.Length; i++)
            {
                KeyValuePair <String, Info> entry = sortedFields[i];
                String fieldName = entry.Key;
                Info   info      = entry.Value;
                info.SortTerms();
                result.Append(fieldName + ":\n");

                int numChars = 0;
                int numPos   = 0;
                for (int j = 0; j < info.SortedTerms.Length; j++)
                {
                    KeyValuePair <String, ArrayIntList> e = info.SortedTerms[j];
                    String       term      = e.Key;
                    ArrayIntList positions = e.Value;
                    result.Append("\t'" + term + "':" + NumPositions(positions) + ":");
                    result.Append(positions.ToString(stride)); // ignore offsets
                    result.Append("\n");
                    numPos   += NumPositions(positions);
                    numChars += term.Length;
                }

                result.Append("\tterms=" + info.SortedTerms.Length);
                result.Append(", positions=" + numPos);
                result.Append(", Kchars=" + (numChars / 1000.0f));
                result.Append("\n");
                sumPositions += numPos;
                sumChars     += numChars;
                sumTerms     += info.SortedTerms.Length;
            }

            result.Append("\nfields=" + sortedFields.Length);
            result.Append(", terms=" + sumTerms);
            result.Append(", positions=" + sumPositions);
            result.Append(", Kchars=" + (sumChars / 1000.0f));
            return(result.ToString());
        }
                public TermVectorOffsetInfo[] GetOffsets(int index)
                {
                    if (_index.stride == 1)
                    {
                        return(null);                    // no offsets stored
                    }
                    ArrayIntList positions = sortedTerms[index].Value;
                    int          size      = positions.Size();

                    TermVectorOffsetInfo[] offsets = new TermVectorOffsetInfo[size / _index.stride];

                    for (int i = 0, j = 1; j < size; i++, j += _index.stride)
                    {
                        int start = positions.Get(j);
                        int end   = positions.Get(j + 1);
                        offsets[i] = new TermVectorOffsetInfo(start, end);
                    }
                    return(offsets);
                }
Exemplo n.º 4
0
        /*
         * Returns a reasonable approximation of the main memory [bytes] consumed by
         * this instance. Useful for smart memory sensititive caches/pools. Assumes
         * fieldNames are interned, whereas tokenized terms are memory-overlaid.
         *
         * @return the main memory consumption
         */
        public int GetMemorySize()
        {
            // for example usage in a smart cache see nux.xom.pool.Pool
            int PTR  = VM.PTR;
            int INT  = VM.INT;
            int size = 0;

            size += VM.SizeOfObject(2 * PTR + INT); // memory index
            if (sortedFields != null)
            {
                size += VM.SizeOfObjectArray(sortedFields.Length);
            }

            size += VM.SizeOfHashMap(fields.Count);
            foreach (var entry in fields)
            {
                // for each Field Info
                Info info = entry.Value;
                size += VM.SizeOfObject(2 * INT + 3 * PTR); // Info instance vars
                if (info.SortedTerms != null)
                {
                    size += VM.SizeOfObjectArray(info.SortedTerms.Length);
                }

                int len = info.Terms.Count;
                size += VM.SizeOfHashMap(len);

                var iter2 = info.Terms.GetEnumerator();
                while (--len >= 0)
                {
                    iter2.MoveNext();
                    // for each term
                    KeyValuePair <String, ArrayIntList> e = iter2.Current;
                    size += VM.SizeOfObject(PTR + 3 * INT); // assumes substring() memory overlay
//        size += STR + 2 * ((String) e.getKey()).length();
                    ArrayIntList positions = e.Value;
                    size += VM.SizeOfArrayIntList(positions.Size());
                }
            }
            return(size);
        }
Exemplo n.º 5
0
                public void Seek(Term term)
                {
                    this.term = term;

                    if (DEBUG)
                    {
                        System.Diagnostics.Debug.WriteLine(".seek: " + term);
                    }

                    if (term == null)
                    {
                        hasNext = true; // term==null means match all docs
                    }
                    else
                    {
                        Info info = _reader.GetInfo(term.Field);
                        current = info == null ? null : info.GetPositions(term.Text);
                        hasNext = (current != null);
                        cursor  = 0;
                    }
                }
Exemplo n.º 6
0
 private int NumPositions(ArrayIntList positions)
 {
     return(positions.Size() / stride);
 }
Exemplo n.º 7
0
        /*
         * Iterates over the given token stream and adds the resulting terms to the index;
         * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
         * Lucene {@link org.apache.lucene.document.Field}.
         * Finally closes the token stream. Note that untokenized keywords can be added with this method via
         * {@link #CreateKeywordTokenStream(Collection)}, the Lucene contrib <c>KeywordTokenizer</c> or similar utilities.
         *
         * @param fieldName
         *            a name to be associated with the text
         * @param stream
         *            the token stream to retrieve tokens from.
         * @param boost
         *            the boost factor for hits for this field
         * @see org.apache.lucene.document.Field#setBoost(float)
         */
        public void AddField(String fieldName, TokenStream stream, float boost)
        {
            try
            {
                if (fieldName == null)
                {
                    throw new ArgumentException("fieldName must not be null");
                }
                if (stream == null)
                {
                    throw new ArgumentException("token stream must not be null");
                }
                if (boost <= 0.0f)
                {
                    throw new ArgumentException("boost factor must be greater than 0.0");
                }
                if (fields[fieldName] != null)
                {
                    throw new ArgumentException("field must not be added more than once");
                }

                var terms            = new HashMap <String, ArrayIntList>();
                int numTokens        = 0;
                int numOverlapTokens = 0;
                int pos = -1;

                var termAtt          = stream.AddAttribute <ITermAttribute>();
                var posIncrAttribute = stream.AddAttribute <IPositionIncrementAttribute>();
                var offsetAtt        = stream.AddAttribute <IOffsetAttribute>();

                stream.Reset();
                while (stream.IncrementToken())
                {
                    String term = termAtt.Term;
                    if (term.Length == 0)
                    {
                        continue;                   // nothing to do
                    }
                    //        if (DEBUG) System.Diagnostics.Debug.WriteLine("token='" + term + "'");
                    numTokens++;
                    int posIncr = posIncrAttribute.PositionIncrement;
                    if (posIncr == 0)
                    {
                        numOverlapTokens++;
                    }
                    pos += posIncr;

                    ArrayIntList positions = terms[term];
                    if (positions == null)
                    {
                        // term not seen before
                        positions   = new ArrayIntList(stride);
                        terms[term] = positions;
                    }
                    if (stride == 1)
                    {
                        positions.Add(pos);
                    }
                    else
                    {
                        positions.Add(pos, offsetAtt.StartOffset, offsetAtt.EndOffset);
                    }
                }
                stream.End();

                // ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
                if (numTokens > 0)
                {
                    boost             = boost * docBoost; // see DocumentWriter.addDocument(...)
                    fields[fieldName] = new Info(terms, numTokens, numOverlapTokens, boost);
                    sortedFields      = null;             // invalidate sorted view, if any
                }
            }
            catch (IOException e)
            {
                // can never happen
                throw new SystemException(string.Empty, e);
            }
            finally
            {
                try
                {
                    if (stream != null)
                    {
                        stream.Close();
                    }
                }
                catch (IOException e2)
                {
                    throw new SystemException(string.Empty, e2);
                }
            }
        }
Exemplo n.º 8
0
                public void Seek(Term term)
                {
                    this.term = term;

                    if (DEBUG) System.Diagnostics.Debug.WriteLine(".seek: " + term);

                    if (term == null)
                    {
                        hasNext = true; // term==null means match all docs
                    }
                    else
                    {
                        Info info = _reader.GetInfo(term.Field);
                        current = info == null ? null : info.GetPositions(term.Text);
                        hasNext = (current != null);
                        cursor = 0;
                    }
                }