示例#1
0
 internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum)
 {
     Terms        = terms;
     Ords         = ords;
     _comparator  = BytesRef.UTF8SortedAsUnicodeComparer;
     _lastElement = terms.Size() - 1;
     _lastTerm    = terms.Get(ords[_lastElement], new BytesRef());
     _seekTerm    = terms.Get(ords[_upto], _spare);
 }
示例#2
0
            protected override AcceptStatus Accept(BytesRef term)
            {
                if (_comparer.Compare(term, _lastTerm) > 0)
                {
                    return(AcceptStatus.END);
                }

                BytesRef currentTerm = terms.Get(ords[_upto], _spare);

                if (_comparer.Compare(term, currentTerm) == 0)
                {
                    if (_upto == _lastElement)
                    {
                        return(AcceptStatus.YES);
                    }

                    _seekTerm = terms.Get(ords[++_upto], _spare);
                    return(AcceptStatus.YES_AND_SEEK);
                }

                if (_upto == _lastElement)
                {
                    return(AcceptStatus.NO);
                } // Our current term doesn't match the the given term.

                int cmp;

                do // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
                {
                    if (_upto == _lastElement)
                    {
                        return(AcceptStatus.NO);
                    }
                    // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
                    // our terms so we don't do a binary search here
                    _seekTerm = terms.Get(ords[++_upto], _spare);
                } while ((cmp = _comparer.Compare(_seekTerm, term)) < 0);
                if (cmp == 0)
                {
                    if (_upto == _lastElement)
                    {
                        return(AcceptStatus.YES);
                    }
                    _seekTerm = terms.Get(ords[++_upto], _spare);
                    return(AcceptStatus.YES_AND_SEEK);
                }

                return(AcceptStatus.NO_AND_SEEK);
            }
示例#3
0
                internal int BinarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords, IComparer <BytesRef> comparer)
                {
                    int mid; // LUCENENET: IDE0059: Remove unnecessary value assignment

                    while (low <= high)
                    {
                        mid = (low + high).TripleShift(1);
                        hash.Get(ords[mid], bytesRef);
                        int cmp = comparer.Compare(bytesRef, b);
                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            return(mid);
                        }
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(comparer.Compare(bytesRef, b) != 0);
                    }
                    return(-(low + 1));
                }
                internal int BinarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords, IComparer <BytesRef> comparer)
                {
                    int mid = 0;

                    while (low <= high)
                    {
                        mid = (int)((uint)(low + high) >> 1);
                        hash.Get(ords[mid], bytesRef);
                        int cmp = comparer.Compare(bytesRef, b);
                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            return(mid);
                        }
                    }
                    Debug.Assert(comparer.Compare(bytesRef, b) != 0);
                    return(-(low + 1));
                }
示例#5
0
 private IEnumerable <BytesRef> GetBytesRefEnumberable(int valueCount, int[] sortedValues)
 {
     for (int i = 0; i < valueCount; ++i)
     {
         var scratch = new BytesRef();
         yield return(hash.Get(sortedValues[i], scratch));
     }
 }
示例#6
0
            /// <summary>
            /// Returns a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary>
            /// <returns> a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns>
            /// <exception cref="IOException"> if an <see cref="IOException"/> occurs; </exception>
            public virtual StemmerOverrideMap Build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
                Builder <BytesRef>  builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

                int[]     sort      = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
                Int32sRef intsSpare = new Int32sRef();
                int       size      = hash.Count;

                for (int i = 0; i < size; i++)
                {
                    int      id       = sort[i];
                    BytesRef bytesRef = hash.Get(id, spare);
                    UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
                    builder.Add(intsSpare, new BytesRef(outputValues[id]));
                }
                return(new StemmerOverrideMap(builder.Finish(), ignoreCase));
            }
        public override Query Rewrite(IndexReader reader, MultiTermQuery query)
        {
            // Get the enum and start visiting terms.  If we
            // exhaust the enum before hitting either of the
            // cutoffs, we use ConstantBooleanQueryRewrite; else,
            // ConstantFilterRewrite:
            int docCountCutoff = (int)((docCountPercent / 100.0) * reader.MaxDoc);
            int termCountLimit = Math.Min(BooleanQuery.MaxClauseCount, termCountCutoff);

            CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit);

            CollectTerms(reader, query, col);
            int size = col.pendingTerms.Count;

            if (col.hasCutOff)
            {
                return(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.Rewrite(reader, query));
            }
            else
            {
                BooleanQuery bq = GetTopLevelQuery();
                if (size > 0)
                {
                    BytesRefHash pendingTerms = col.pendingTerms;
                    int[]        sort         = pendingTerms.Sort(col.termsEnum.Comparer);
                    for (int i = 0; i < size; i++)
                    {
                        int pos = sort[i];
                        // docFreq is not used for constant score here, we pass 1
                        // to explicitely set a fake value, so it's not calculated
                        AddClause(bq, new Term(query.m_field, pendingTerms.Get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
                    }
                }
                // Strip scores
                Query result = new ConstantScoreQuery(bq);
                result.Boost = query.Boost;
                return(result);
            }
        }
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestRandomSortedBytes()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            if (!DefaultCodecSupportsDocsWithField())
            {
                // if the codec doesnt support missing, we expect missing to be mapped to byte[]
                // by the impersonator, but we have to give it a chance to merge them to this
                cfg.SetMergePolicy(NewLogMergePolicy());
            }
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, cfg);
            int numDocs = AtLeast(100);
            BytesRefHash hash = new BytesRefHash();
            IDictionary<string, string> docToString = new Dictionary<string, string>();
            int maxLength = TestUtil.NextInt(Random(), 1, 50);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("id", "" + i, Field.Store.YES));
                string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength);
                BytesRef br = new BytesRef(@string);
                doc.Add(new SortedDocValuesField("field", br));
                hash.Add(br);
                docToString["" + i] = @string;
                w.AddDocument(doc);
            }
            if (Rarely())
            {
                w.Commit();
            }
            int numDocsNoValue = AtLeast(10);
            for (int i = 0; i < numDocsNoValue; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("id", "noValue", Field.Store.YES));
                w.AddDocument(doc);
            }
            if (!DefaultCodecSupportsDocsWithField())
            {
                BytesRef bytesRef = new BytesRef();
                hash.Add(bytesRef); // add empty value for the gaps
            }
            if (Rarely())
            {
                w.Commit();
            }
            if (!DefaultCodecSupportsDocsWithField())
            {
                // if the codec doesnt support missing, we expect missing to be mapped to byte[]
                // by the impersonator, but we have to give it a chance to merge them to this
                w.ForceMerge(1);
            }
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string id = "" + i + numDocs;
                doc.Add(NewTextField("id", id, Field.Store.YES));
                string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength);
                BytesRef br = new BytesRef(@string);
                hash.Add(br);
                docToString[id] = @string;
                doc.Add(new SortedDocValuesField("field", br));
                w.AddDocument(doc);
            }
            w.Commit();
            IndexReader reader = w.Reader;
            SortedDocValues docValues = MultiDocValues.GetSortedValues(reader, "field");
            int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
            BytesRef expected = new BytesRef();
            BytesRef actual = new BytesRef();
            Assert.AreEqual(hash.Size(), docValues.ValueCount);
            for (int i = 0; i < hash.Size(); i++)
            {
                hash.Get(sort[i], expected);
                docValues.LookupOrd(i, actual);
                Assert.AreEqual(expected.Utf8ToString(), actual.Utf8ToString());
                int ord = docValues.LookupTerm(expected);
                Assert.AreEqual(i, ord);
            }
            AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(reader);
            ISet<KeyValuePair<string, string>> entrySet = docToString.EntrySet();

            foreach (KeyValuePair<string, string> entry in entrySet)
            {
                // pk lookup
                DocsEnum termDocsEnum = slowR.TermDocsEnum(new Term("id", entry.Key));
                int docId = termDocsEnum.NextDoc();
                expected = new BytesRef(entry.Value);
                docValues.Get(docId, actual);
                Assert.AreEqual(expected, actual);
            }

            reader.Dispose();
            w.Dispose();
            dir.Dispose();
        }