GetSortedSetDocValues() public abstract method

Returns SortedSetDocValues for this field, or null if no SortedSetDocValues were indexed for this field. The returned instance should only be used by a single thread.
public abstract GetSortedSetDocValues ( string field ) : Lucene.Net.Index.SortedSetDocValues
field string
return Lucene.Net.Index.SortedSetDocValues
        public override void Warm(AtomicReader reader)
        {
            long startTime      = Environment.TickCount;
            int  indexedCount   = 0;
            int  docValuesCount = 0;
            int  normsCount     = 0;

            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.IsIndexed)
                {
                    reader.GetTerms(info.Name);
                    indexedCount++;

                    if (info.HasNorms)
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues)
                {
                    switch (info.DocValuesType)
                    {
                    case DocValuesType.NUMERIC:
                        reader.GetNumericDocValues(info.Name);
                        break;

                    case DocValuesType.BINARY:
                        reader.GetBinaryDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED:
                        reader.GetSortedDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED_SET:
                        reader.GetSortedSetDocValues(info.Name);
                        break;

                    default:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(false);                               // unknown dv type
                        }
                        break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (infoStream.IsEnabled("SMSW"))
            {
                infoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (Environment.TickCount - startTime));
            }
        }
        public override void Warm(AtomicReader reader)
        {
            long startTime = DateTime.Now.Millisecond;
            int indexedCount = 0;
            int docValuesCount = 0;
            int normsCount = 0;
            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.Indexed)
                {
                    reader.Terms(info.Name);
                    indexedCount++;

                    if (info.HasNorms())
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues())
                {
                    switch (info.DocValuesType)
                    {
                        case DocValuesType_e.NUMERIC:
                            reader.GetNumericDocValues(info.Name);
                            break;

                        case DocValuesType_e.BINARY:
                            reader.GetBinaryDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED:
                            reader.GetSortedDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED_SET:
                            reader.GetSortedSetDocValues(info.Name);
                            break;

                        default:
                            Debug.Assert(false); // unknown dv type
                            break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (InfoStream.IsEnabled("SMSW"))
            {
                InfoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (DateTime.Now.Millisecond - startTime));
            }
        }
        /// <summary>
        /// Creates this, pulling doc values from the specified
        /// field. 
        /// </summary>
        public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = FacetsConfig.DEFAULT_INDEX_FIELD_NAME)
        {
            this.field = field;
            this.origReader = reader;

            // We need this to create thread-safe MultiSortedSetDV
            // per collector:
            topReader = SlowCompositeReaderWrapper.Wrap(reader);
            SortedSetDocValues dv = topReader.GetSortedSetDocValues(field);
            if (dv == null)
            {
                throw new System.ArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
            }
            if (dv.ValueCount > int.MaxValue)
            {
                throw new System.ArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.ValueCount);
            }
            valueCount = (int)dv.ValueCount;

            // TODO: we can make this more efficient if eg we can be
            // "involved" when IOrdinalMap is being created?  Ie see
            // each term/ord it's assigning as it goes...
            string lastDim = null;
            int startOrd = -1;

            // TODO: this approach can work for full hierarchy?;
            // TaxoReader can't do this since ords are not in
            // "sorted order" ... but we should generalize this to
            // support arbitrary hierarchy:
            for (int ord = 0; ord < valueCount; ord++)
            {
                BytesRef term = new BytesRef();
                dv.LookupOrd(ord, term);
                string[] components = FacetsConfig.StringToPath(term.Utf8ToString());
                if (components.Length != 2)
                {
                    throw new System.ArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.ToString(components) + " " + term.Utf8ToString());
                }
                if (!components[0].Equals(lastDim))
                {
                    if (lastDim != null)
                    {
                        prefixToOrdRange[lastDim] = new OrdRange(startOrd, ord - 1);
                    }
                    startOrd = ord;
                    lastDim = components[0];
                }
            }

            if (lastDim != null)
            {
                prefixToOrdRange[lastDim] = new OrdRange(startOrd, valueCount - 1);
            }
        }
Exemplo n.º 4
0
        public virtual void TestSortedSetWithDups()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc       = new Document();
                int      numValues = Random().Next(5);
                for (int j = 0; j < numValues; j++)
                {
                    doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random(), 2))));
                }
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.Reader;
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            SortedSetDocValues multi  = MultiDocValues.GetSortedSetValues(ir, "bytes");
            SortedSetDocValues single = merged.GetSortedSetDocValues("bytes");

            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.ValueCount, multi.ValueCount);
                BytesRef actual   = new BytesRef();
                BytesRef expected = new BytesRef();
                // check values
                for (long i = 0; i < single.ValueCount; i++)
                {
                    single.LookupOrd(i, expected);
                    multi.LookupOrd(i, actual);
                    Assert.AreEqual(expected, actual);
                }
                // check ord list
                for (int i = 0; i < numDocs; i++)
                {
                    single.SetDocument(i);
                    List <long?> expectedList = new List <long?>();
                    long         ord;
                    while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        expectedList.Add(ord);
                    }

                    multi.SetDocument(i);
                    int upto = 0;
                    while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        Assert.AreEqual((long)expectedList[upto], ord);
                        upto++;
                    }
                    Assert.AreEqual(expectedList.Count, upto);
                }
            }

            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
Exemplo n.º 5
0
        /// <summary>
        /// Test docvalues.
        /// @lucene.experimental
        /// </summary>
        public static Status.DocValuesStatus TestDocValues(AtomicReader reader, TextWriter infoStream)
        {
            Status.DocValuesStatus status = new Status.DocValuesStatus();
            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: docvalues...........");
                }
                foreach (FieldInfo fieldInfo in reader.FieldInfos)
                {
                    if (fieldInfo.HasDocValues())
                    {
                        status.TotalValueFields++;
                        CheckDocValues(fieldInfo, reader, /*infoStream,*/ status);
                    }
                    else
                    {
                        if (reader.GetBinaryDocValues(fieldInfo.Name) != null || reader.GetNumericDocValues(fieldInfo.Name) != null || reader.GetSortedDocValues(fieldInfo.Name) != null || reader.GetSortedSetDocValues(fieldInfo.Name) != null || reader.GetDocsWithField(fieldInfo.Name) != null)
                        {
                            throw new Exception("field: " + fieldInfo.Name + " has docvalues but should omit them!");
                        }
                    }
                }

                Msg(infoStream, "OK [" + status.TotalValueFields + " docvalues fields; " + status.TotalBinaryFields + " BINARY; " + status.TotalNumericFields + " NUMERIC; " + status.TotalSortedFields + " SORTED; " + status.TotalSortedSetFields + " SORTED_SET]");
            }
            catch (Exception e)
            {
                Msg(infoStream, "ERROR [" + Convert.ToString(e.Message) + "]");
                status.Error = e;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(e.ToString());
                    //infoStream.WriteLine(e.StackTrace);
                }
            }
            return status;
        }
Exemplo n.º 6
0
        private static void CheckDocValues(FieldInfo fi, AtomicReader reader, /*StreamWriter infoStream,*/ DocValuesStatus status)
        {
            Bits docsWithField = reader.GetDocsWithField(fi.Name);
            if (docsWithField == null)
            {
                throw new Exception(fi.Name + " docsWithField does not exist");
            }
            else if (docsWithField.Length() != reader.MaxDoc)
            {
                throw new Exception(fi.Name + " docsWithField has incorrect length: " + docsWithField.Length() + ",expected: " + reader.MaxDoc);
            }
            switch (fi.DocValuesType)
            {
                case FieldInfo.DocValuesType_e.SORTED:
                    status.TotalSortedFields++;
                    CheckSortedDocValues(fi.Name, reader, reader.GetSortedDocValues(fi.Name), docsWithField);
                    if (reader.GetBinaryDocValues(fi.Name) != null || reader.GetNumericDocValues(fi.Name) != null || reader.GetSortedSetDocValues(fi.Name) != null)
                    {
                        throw new Exception(fi.Name + " returns multiple docvalues types!");
                    }
                    break;

                case FieldInfo.DocValuesType_e.SORTED_SET:
                    status.TotalSortedSetFields++;
                    CheckSortedSetDocValues(fi.Name, reader, reader.GetSortedSetDocValues(fi.Name), docsWithField);
                    if (reader.GetBinaryDocValues(fi.Name) != null || reader.GetNumericDocValues(fi.Name) != null || reader.GetSortedDocValues(fi.Name) != null)
                    {
                        throw new Exception(fi.Name + " returns multiple docvalues types!");
                    }
                    break;

                case FieldInfo.DocValuesType_e.BINARY:
                    status.TotalBinaryFields++;
                    CheckBinaryDocValues(fi.Name, reader, reader.GetBinaryDocValues(fi.Name), docsWithField);
                    if (reader.GetNumericDocValues(fi.Name) != null || reader.GetSortedDocValues(fi.Name) != null || reader.GetSortedSetDocValues(fi.Name) != null)
                    {
                        throw new Exception(fi.Name + " returns multiple docvalues types!");
                    }
                    break;

                case FieldInfo.DocValuesType_e.NUMERIC:
                    status.TotalNumericFields++;
                    CheckNumericDocValues(fi.Name, reader, reader.GetNumericDocValues(fi.Name), docsWithField);
                    if (reader.GetBinaryDocValues(fi.Name) != null || reader.GetSortedDocValues(fi.Name) != null || reader.GetSortedSetDocValues(fi.Name) != null)
                    {
                        throw new Exception(fi.Name + " returns multiple docvalues types!");
                    }
                    break;

                default:
                    throw new InvalidOperationException();
            }
        }
Exemplo n.º 7
0
        public override void Warm(AtomicReader reader)
        {
            long startTime      = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            int  indexedCount   = 0;
            int  docValuesCount = 0;
            int  normsCount     = 0;

            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.IsIndexed)
                {
                    reader.GetTerms(info.Name);
                    indexedCount++;

                    if (info.HasNorms)
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues)
                {
                    switch (info.DocValuesType)
                    {
                    case DocValuesType.NUMERIC:
                        reader.GetNumericDocValues(info.Name);
                        break;

                    case DocValuesType.BINARY:
                        reader.GetBinaryDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED:
                        reader.GetSortedDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED_SET:
                        reader.GetSortedSetDocValues(info.Name);
                        break;

                    default:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(false);                               // unknown dv type
                        }
                        break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (infoStream.IsEnabled("SMSW"))
            {
                infoStream.Message("SMSW",
                                   "Finished warming segment: " + reader +
                                   ", indexed=" + indexedCount +
                                   ", docValues=" + docValuesCount +
                                   ", norms=" + normsCount +
                                   ", time=" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startTime)); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }
        }
Exemplo n.º 8
0
 public override SortedSetDocValues GetSortedSetDocValues(string field)
 {
     EnsureOpen();
     return(m_input.GetSortedSetDocValues(field));
 }
Exemplo n.º 9
0
 internal SlowMinShouldMatchScorer(BooleanWeight weight, AtomicReader reader, IndexSearcher searcher)
     : base(weight)
 {
     this.Dv = reader.GetSortedSetDocValues("dv");
     this.MaxDoc = reader.MaxDoc;
     BooleanQuery bq = (BooleanQuery)weight.Query;
     this.MinNrShouldMatch = bq.MinimumNumberShouldMatch;
     this.Sims = new SimScorer[(int)Dv.ValueCount];
     foreach (BooleanClause clause in bq.Clauses)
     {
         Debug.Assert(!clause.Prohibited);
         Debug.Assert(!clause.Required);
         Term term = ((TermQuery)clause.Query).Term;
         long ord = Dv.LookupTerm(term.Bytes);
         if (ord >= 0)
         {
             bool success = Ords.Add(ord);
             Debug.Assert(success); // no dups
             TermContext context = TermContext.Build(reader.Context, term);
             SimWeight w = weight.Similarity.ComputeWeight(1f, searcher.CollectionStatistics("field"), searcher.TermStatistics(term, context));
             var dummy = w.ValueForNormalization; // ignored
             w.Normalize(1F, 1F);
             Sims[(int)ord] = weight.Similarity.DoSimScorer(w, (AtomicReaderContext)reader.Context);
         }
     }
 }