示例#1
0
        private readonly Query _fromQuery; // Used for equals() only

        /// <summary>
        ///
        /// </summary>
        /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param>
        /// <param name="fromQuery"></param>
        /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param>
        internal TermsQuery(string field, Query fromQuery, BytesRefHash terms)
            : base(field)
        {
            _fromQuery = fromQuery;
            _terms     = terms;
            _ords      = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
        }
示例#2
0
        public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo)
        {
            IntPool        = termsHash.IntPool;
            BytePool       = termsHash.BytePool;
            TermBytePool   = termsHash.TermBytePool;
            DocState       = termsHash.DocState;
            this.TermsHash = termsHash;
            BytesUsed      = termsHash.BytesUsed;
            FieldState     = docInverterPerField.FieldState;
            this.Consumer  = termsHash.Consumer.AddField(this, fieldInfo);
            PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed);

            BytesHash      = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts);
            StreamCount    = Consumer.StreamCount;
            NumPostingInt  = 2 * StreamCount;
            this.FieldInfo = fieldInfo;
            if (nextTermsHash != null)
            {
                NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo);
            }
            else
            {
                NextPerField = null;
            }
        }
示例#3
0
        public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo)
        {
            intPool        = termsHash.intPool;
            bytePool       = termsHash.bytePool;
            termBytePool   = termsHash.termBytePool;
            docState       = termsHash.docState;
            this.termsHash = termsHash;
            bytesUsed      = termsHash.bytesUsed;
            fieldState     = docInverterPerField.fieldState;
            this.consumer  = termsHash.consumer.AddField(this, fieldInfo);
            PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);

            bytesHash      = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
            streamCount    = consumer.StreamCount;
            numPostingInt  = 2 * streamCount;
            this.fieldInfo = fieldInfo;
            if (nextTermsHash != null)
            {
                nextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo);
            }
            else
            {
                nextPerField = null;
            }
        }
 public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
 {
     this.fieldInfo   = fieldInfo;
     this.iwBytesUsed = iwBytesUsed;
     hash             = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new BytesRefHash.DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     pending          = new AppendingDeltaPackedInt64Buffer(PackedInt32s.COMPACT);
     bytesUsed        = pending.RamBytesUsed();
     iwBytesUsed.AddAndGet(bytesUsed);
 }
示例#5
0
 internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum)
 {
     Terms        = terms;
     Ords         = ords;
     _comparator  = BytesRef.UTF8SortedAsUnicodeComparer;
     _lastElement = terms.Size() - 1;
     _lastTerm    = terms.Get(ords[_lastElement], new BytesRef());
     _seekTerm    = terms.Get(ords[_upto], _spare);
 }
 public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
 {
     this.FieldInfo = fieldInfo;
     this.IwBytesUsed = iwBytesUsed;
     Hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     Pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
     PendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     BytesUsed = Pending.RamBytesUsed() + PendingCounts.RamBytesUsed();
     iwBytesUsed.AddAndGet(BytesUsed);
 }
示例#7
0
 public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
 {
     this.FieldInfo   = fieldInfo;
     this.IwBytesUsed = iwBytesUsed;
     Hash             = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     Pending          = new AppendingPackedLongBuffer(PackedInts.COMPACT);
     PendingCounts    = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     BytesUsed        = Pending.RamBytesUsed() + PendingCounts.RamBytesUsed();
     iwBytesUsed.AddAndGet(BytesUsed);
 }
示例#8
0
 private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
                                  float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
 {
     _field = field;
     _multipleValuesPerDocument = multipleValuesPerDocument;
     _terms                  = terms;
     _scores                 = scores;
     _originalQuery          = originalQuery;
     _ords                   = ords;
     _unwrittenOriginalQuery = unwrittenOriginalQuery;
 }
示例#9
0
 public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq)
 {
     this.terms            = terms;
     this.sliceArray       = sliceArray;
     this.numTokens        = numTokens;
     this.numOverlapTokens = numOverlapTokens;
     this.boost            = boost;
     this.sumTotalTermFreq = sumTotalTermFreq;
     this.lastPosition     = lastPosition;
     this.lastOffset       = lastOffset;
 }
示例#10
0
 internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
                                   float[] scores, Query originalQuery)
 {
     _field = field;
     _multipleValuesPerDocument = multipleValuesPerDocument;
     _terms                  = terms;
     _scores                 = scores;
     _originalQuery          = originalQuery;
     _ords                   = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
     _unwrittenOriginalQuery = originalQuery;
 }
        public override Query Rewrite(IndexReader reader, MultiTermQuery query)
        {
            // Get the enum and start visiting terms.  If we
            // exhaust the enum before hitting either of the
            // cutoffs, we use ConstantBooleanQueryRewrite; else,
            // ConstantFilterRewrite:
            int docCountCutoff = (int)((docCountPercent / 100.0) * reader.MaxDoc);
            int termCountLimit = Math.Min(BooleanQuery.MaxClauseCount, termCountCutoff);

            CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit);

            CollectTerms(reader, query, col);
            int size = col.pendingTerms.Count;

            if (col.hasCutOff)
            {
                return(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.Rewrite(reader, query));
            }
            else
            {
                BooleanQuery bq = GetTopLevelQuery();
                if (size > 0)
                {
                    BytesRefHash pendingTerms = col.pendingTerms;
                    int[]        sort         = pendingTerms.Sort(col.termsEnum.Comparer);
                    for (int i = 0; i < size; i++)
                    {
                        int pos = sort[i];
                        // docFreq is not used for constant score here, we pass 1
                        // to explicitely set a fake value, so it's not calculated
                        AddClause(bq, new Term(query.m_field, pendingTerms.Get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
                    }
                }
                // Strip scores
                Query result = new ConstantScoreQuery(bq);
                result.Boost = query.Boost;
                return(result);
            }
        }
示例#12
0
 public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo)
 {
     IntPool = termsHash.IntPool;
     BytePool = termsHash.BytePool;
     TermBytePool = termsHash.TermBytePool;
     DocState = termsHash.DocState;
     this.TermsHash = termsHash;
     BytesUsed = termsHash.BytesUsed;
     FieldState = docInverterPerField.FieldState;
     this.Consumer = termsHash.Consumer.AddField(this, fieldInfo);
     PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed);
     BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts);
     StreamCount = Consumer.StreamCount;
     NumPostingInt = 2 * StreamCount;
     this.FieldInfo = fieldInfo;
     if (nextTermsHash != null)
     {
         NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo);
     }
     else
     {
         NextPerField = null;
     }
 }
示例#13
0
 public SynonymMap(FST <BytesRef> fst, BytesRefHash words, int maxHorizontalContext)
 {
     this.fst   = fst;
     this.words = words;
     this.maxHorizontalContext = maxHorizontalContext;
 }
示例#14
0
        /// <summary>
        /// Iterates over the given token stream and adds the resulting terms to the index;
        /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
        /// Lucene <see cref="Documents.Field"/>.
        /// Finally closes the token stream. Note that untokenized keywords can be added with this method via
        /// <see cref="T:KeywordTokenStream{T}(ICollection{T}"/>)"/>, the Lucene <c>KeywordTokenizer</c> or similar utilities.
        ///
        /// </summary>
        /// <param name="fieldName"> a name to be associated with the text </param>
        /// <param name="stream"> the token stream to retrieve tokens from. </param>
        /// <param name="boost"> the boost factor for hits for this field </param>
        /// <param name="positionIncrementGap"> the position increment gap if fields with the same name are added more than once </param>
        /// <param name="offsetGap"> the offset gap if fields with the same name are added more than once </param>
        /// <seealso cref="Documents.Field.Boost"/>
        public virtual void AddField(string fieldName, TokenStream stream, float boost, int positionIncrementGap, int offsetGap)
        {
            try
            {
                if (fieldName == null)
                {
                    throw new ArgumentException("fieldName must not be null");
                }
                if (stream == null)
                {
                    throw new ArgumentException("token stream must not be null");
                }
                if (boost <= 0.0f)
                {
                    throw new ArgumentException("boost factor must be greater than 0.0");
                }
                int                 numTokens        = 0;
                int                 numOverlapTokens = 0;
                int                 pos = -1;
                BytesRefHash        terms;
                SliceByteStartArray sliceArray;
                long                sumTotalTermFreq = 0;
                int                 offset           = 0;
                if (fields.TryGetValue(fieldName, out Info info))
                {
                    numTokens        = info.numTokens;
                    numOverlapTokens = info.numOverlapTokens;
                    pos              = info.lastPosition + positionIncrementGap;
                    offset           = info.lastOffset + offsetGap;
                    terms            = info.terms;
                    boost           *= info.boost;
                    sliceArray       = info.sliceArray;
                    sumTotalTermFreq = info.sumTotalTermFreq;
                }
                else
                {
                    sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
                    terms      = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
                }

                if (!fieldInfos.ContainsKey(fieldName))
                {
                    fieldInfos[fieldName] = new FieldInfo(fieldName,
                                                          true,
                                                          fieldInfos.Count,
                                                          false,
                                                          false,
                                                          false,
                                                          this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                                                          DocValuesType.NONE,
                                                          DocValuesType.NONE,
                                                          null);
                }
                ITermToBytesRefAttribute    termAtt          = stream.GetAttribute <ITermToBytesRefAttribute>();
                IPositionIncrementAttribute posIncrAttribute = stream.AddAttribute <IPositionIncrementAttribute>();
                IOffsetAttribute            offsetAtt        = stream.AddAttribute <IOffsetAttribute>();
                BytesRef @ref = termAtt.BytesRef;
                stream.Reset();

                while (stream.IncrementToken())
                {
                    termAtt.FillBytesRef();
                    //        if (DEBUG) System.err.println("token='" + term + "'");
                    numTokens++;
                    int posIncr = posIncrAttribute.PositionIncrement;
                    if (posIncr == 0)
                    {
                        numOverlapTokens++;
                    }
                    pos += posIncr;
                    int ord = terms.Add(@ref);
                    if (ord < 0)
                    {
                        ord = (-ord) - 1;
                        postingsWriter.Reset(sliceArray.end[ord]);
                    }
                    else
                    {
                        sliceArray.start[ord] = postingsWriter.StartNewSlice();
                    }
                    sliceArray.freq[ord]++;
                    sumTotalTermFreq++;
                    if (!storeOffsets)
                    {
                        postingsWriter.WriteInt32(pos);
                    }
                    else
                    {
                        postingsWriter.WriteInt32(pos);
                        postingsWriter.WriteInt32(offsetAtt.StartOffset + offset);
                        postingsWriter.WriteInt32(offsetAtt.EndOffset + offset);
                    }
                    sliceArray.end[ord] = postingsWriter.CurrentOffset;
                }
                stream.End();

                // ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
                if (numTokens > 0)
                {
                    fields[fieldName] = new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.EndOffset + offset, sumTotalTermFreq);
                    sortedFields      = null; // invalidate sorted view, if any
                }
            } // can never happen
            catch (Exception e)
            {
                throw new Exception(e.ToString(), e);
            }
            finally
            {
                try
                {
                    if (stream != null)
                    {
                        stream.Dispose();
                    }
                }
                catch (IOException e2)
                {
                    throw new Exception(e2.ToString(), e2);
                }
            }
        }
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestRandomSortedBytes()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            if (!DefaultCodecSupportsDocsWithField())
            {
                // if the codec doesnt support missing, we expect missing to be mapped to byte[]
                // by the impersonator, but we have to give it a chance to merge them to this
                cfg.SetMergePolicy(NewLogMergePolicy());
            }
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, cfg);
            int numDocs = AtLeast(100);
            BytesRefHash hash = new BytesRefHash();
            IDictionary<string, string> docToString = new Dictionary<string, string>();
            int maxLength = TestUtil.NextInt(Random(), 1, 50);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("id", "" + i, Field.Store.YES));
                string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength);
                BytesRef br = new BytesRef(@string);
                doc.Add(new SortedDocValuesField("field", br));
                hash.Add(br);
                docToString["" + i] = @string;
                w.AddDocument(doc);
            }
            if (Rarely())
            {
                w.Commit();
            }
            int numDocsNoValue = AtLeast(10);
            for (int i = 0; i < numDocsNoValue; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("id", "noValue", Field.Store.YES));
                w.AddDocument(doc);
            }
            if (!DefaultCodecSupportsDocsWithField())
            {
                BytesRef bytesRef = new BytesRef();
                hash.Add(bytesRef); // add empty value for the gaps
            }
            if (Rarely())
            {
                w.Commit();
            }
            if (!DefaultCodecSupportsDocsWithField())
            {
                // if the codec doesnt support missing, we expect missing to be mapped to byte[]
                // by the impersonator, but we have to give it a chance to merge them to this
                w.ForceMerge(1);
            }
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string id = "" + i + numDocs;
                doc.Add(NewTextField("id", id, Field.Store.YES));
                string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength);
                BytesRef br = new BytesRef(@string);
                hash.Add(br);
                docToString[id] = @string;
                doc.Add(new SortedDocValuesField("field", br));
                w.AddDocument(doc);
            }
            w.Commit();
            IndexReader reader = w.Reader;
            SortedDocValues docValues = MultiDocValues.GetSortedValues(reader, "field");
            int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
            BytesRef expected = new BytesRef();
            BytesRef actual = new BytesRef();
            Assert.AreEqual(hash.Size(), docValues.ValueCount);
            for (int i = 0; i < hash.Size(); i++)
            {
                hash.Get(sort[i], expected);
                docValues.LookupOrd(i, actual);
                Assert.AreEqual(expected.Utf8ToString(), actual.Utf8ToString());
                int ord = docValues.LookupTerm(expected);
                Assert.AreEqual(i, ord);
            }
            AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(reader);
            ISet<KeyValuePair<string, string>> entrySet = docToString.EntrySet();

            foreach (KeyValuePair<string, string> entry in entrySet)
            {
                // pk lookup
                DocsEnum termDocsEnum = slowR.TermDocsEnum(new Term("id", entry.Key));
                int docId = termDocsEnum.NextDoc();
                expected = new BytesRef(entry.Value);
                docValues.Get(docId, actual);
                Assert.AreEqual(expected, actual);
            }

            reader.Dispose();
            w.Dispose();
            dir.Dispose();
        }
示例#16
0
            public ParallelArraysTermCollector(ScoringRewrite <Q> outerInstance)
            {
                this.outerInstance = outerInstance;

                terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
            }
 private void InitializeInstanceFields()
 {
     pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
 }
示例#18
0
                internal int BinarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords, IComparer <BytesRef> comparer)
                {
                    int mid; // LUCENENET: IDE0059: Remove unnecessary value assignment

                    while (low <= high)
                    {
                        mid = (low + high).TripleShift(1);
                        hash.Get(ords[mid], bytesRef);
                        int cmp = comparer.Compare(bytesRef, b);
                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            return(mid);
                        }
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(comparer.Compare(bytesRef, b) != 0);
                    }
                    return(-(low + 1));
                }
                internal int BinarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords, IComparer <BytesRef> comparer)
                {
                    int mid = 0;

                    while (low <= high)
                    {
                        mid = (int)((uint)(low + high) >> 1);
                        hash.Get(ords[mid], bytesRef);
                        int cmp = comparer.Compare(bytesRef, b);
                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            return(mid);
                        }
                    }
                    Debug.Assert(comparer.Compare(bytesRef, b) != 0);
                    return(-(low + 1));
                }
示例#20
0
 internal void InitializeInstanceFields()
 {
     terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
 }
示例#21
0
	  public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext)
	  {
		this.fst = fst;
		this.words = words;
		this.maxHorizontalContext = maxHorizontalContext;
	  }