示例#1
0
        public override void AddField(int docID, IIndexableField field, FieldInfo fieldInfo)
        {
            DocValuesType dvType = field.FieldType.DocValueType;

            if (dvType != DocValuesType.NONE)
            {
                fieldInfo.DocValuesType = dvType;
                if (dvType == DocValuesType.BINARY)
                {
                    AddBinaryField(fieldInfo, docID, field.GetBinaryValue());
                }
                else if (dvType == DocValuesType.SORTED)
                {
                    AddSortedField(fieldInfo, docID, field.GetBinaryValue());
                }
                else if (dvType == DocValuesType.SORTED_SET)
                {
                    AddSortedSetField(fieldInfo, docID, field.GetBinaryValue());
                }
                else if (dvType == DocValuesType.NUMERIC)
                {
                    if (!(field.GetNumericValue() is long?))
                    {
                        throw new System.ArgumentException("illegal type " + field.GetNumericValue().GetType() + ": DocValues types must be Long");
                    }
                    AddNumericField(fieldInfo, docID, (long)field.GetNumericValue());
                }
                else
                {
                    Debug.Assert(false, "unrecognized DocValues.Type: " + dvType);
                }
            }
        }
示例#2
0
        private void AddField(Document doc, string field, string value, DocValuesType type)
        {
            doc.Add(new StringField(field, value, Field.Store.YES));
            if (type == DocValuesType.NONE)
            {
                return;
            }
            string dvField = field + "_dv";

            Field valuesField = null;

            switch (type)
            {
            case DocValuesType.NUMERIC:
                valuesField = new NumericDocValuesField(dvField, int.Parse(value, CultureInfo.InvariantCulture));
                break;

            case DocValuesType.BINARY:
                valuesField = new BinaryDocValuesField(dvField, new BytesRef(value));
                break;

            case DocValuesType.SORTED:
                valuesField = new SortedDocValuesField(dvField, new BytesRef(value));
                break;
            }
            doc.Add(valuesField);
        }
 private static sbyte DocValuesByte(DocValuesType type)
 {
     if (type == DocValuesType.NONE)
     {
         return(0);
     }
     else if (type == DocValuesType.NUMERIC)
     {
         return(1);
     }
     else if (type == DocValuesType.BINARY)
     {
         return(2);
     }
     else if (type == DocValuesType.SORTED)
     {
         return(3);
     }
     else if (type == DocValuesType.SORTED_SET)
     {
         return(4);
     }
     else
     {
         throw new InvalidOperationException();
     }
 }
示例#4
0
 private static sbyte DocValuesByte(DocValuesType type)
 {
     if (type == DocValuesType.NONE)
     {
         return(0);
     }
     else if (type == DocValuesType.NUMERIC)
     {
         return(1);
     }
     else if (type == DocValuesType.BINARY)
     {
         return(2);
     }
     else if (type == DocValuesType.SORTED)
     {
         return(3);
     }
     else if (type == DocValuesType.SORTED_SET)
     {
         return(4);
     }
     else
     {
         throw AssertionError.Create();
     }
 }
示例#5
0
        private long dvGen = -1; // the DocValues generation of this field

        // LUCENENET specific: De-nested the IndexOptions and DocValuesType enums from this class to prevent naming conflicts

        /// <summary>
        /// Sole Constructor.
        /// <para/>
        /// @lucene.experimental
        /// </summary>
        public FieldInfo(string name, bool indexed, int number, bool storeTermVector, bool omitNorms,
                         bool storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType,
                         IDictionary <string, string> attributes)
        {
            this.Name         = name;
            this.indexed      = indexed;
            this.Number       = number;
            this.docValueType = docValues;
            if (indexed)
            {
                this.storeTermVector = storeTermVector;
                this.storePayloads   = storePayloads;
                this.omitNorms       = omitNorms;
                this.indexOptions    = indexOptions;
                this.normType        = !omitNorms ? normsType : DocValuesType.NONE;
            } // for non-indexed fields, leave defaults
            else
            {
                this.storeTermVector = false;
                this.storePayloads   = false;
                this.omitNorms       = false;
                this.indexOptions    = IndexOptions.NONE;
                this.normType        = DocValuesType.NONE;
            }
            this.attributes = attributes;
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(CheckConsistency());
            }
        }
示例#6
0
        public override void AddField(int docID, IIndexableField field, FieldInfo fieldInfo)
        {
            DocValuesType dvType = field.IndexableFieldType.DocValueType;

            if (dvType != DocValuesType.NONE)
            {
                fieldInfo.DocValuesType = dvType;
                if (dvType == DocValuesType.BINARY)
                {
                    AddBinaryField(fieldInfo, docID, field.GetBinaryValue());
                }
                else if (dvType == DocValuesType.SORTED)
                {
                    AddSortedField(fieldInfo, docID, field.GetBinaryValue());
                }
                else if (dvType == DocValuesType.SORTED_SET)
                {
                    AddSortedSetField(fieldInfo, docID, field.GetBinaryValue());
                }
                else if (dvType == DocValuesType.NUMERIC)
                {
                    if (field.NumericType != NumericFieldType.INT64)
                    {
                        throw new ArgumentException("illegal type " + field.NumericType + ": DocValues types must be " + NumericFieldType.INT64);
                    }
                    AddNumericField(fieldInfo, docID, field.GetInt64ValueOrDefault());
                }
                else
                {
                    Debug.Assert(false, "unrecognized DocValues.Type: " + dvType);
                }
            }
        }
示例#7
0
 internal void SetDocValuesType(int number, string name, DocValuesType dvType)
 {
     lock (this)
     {
         Debug.Assert(ContainsConsistent(number, name, dvType));
         docValuesType[name] = dvType;
     }
 }
示例#8
0
 internal IndexContext(Directory directory, DirectoryReader indexReader, DocValuesType dvType,
                       IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts, string[] contentStrings)
 {
     this.directory               = directory;
     this.indexReader             = indexReader;
     this.dvType                  = dvType;
     this.searchTermToGroupCounts = searchTermToGroupCounts;
     this.contentStrings          = contentStrings;
 }
示例#9
0
        /// <summary>Write the header for this field. </summary>
        private void WriteFieldEntry(FieldInfo field, DocValuesType type)
        {
            SimpleTextUtil.Write(data, FIELD);
            SimpleTextUtil.Write(data, field.Name, scratch);
            SimpleTextUtil.WriteNewline(data);

            SimpleTextUtil.Write(data, TYPE);
            SimpleTextUtil.Write(data, type.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);
        }
示例#10
0
            // used by assert
            internal bool ContainsConsistent(int?number, string name, DocValuesType dvType)
            {
                lock (this)
                {
                    numberToName.TryGetValue(number, out string numberToNameStr);
                    nameToNumber.TryGetValue(name, out int?nameToNumberVal);
                    this.docValuesType.TryGetValue(name, out DocValuesType docValuesType);

                    return(name.Equals(numberToNameStr, StringComparison.Ordinal) &&
                           number.Equals(nameToNumber[name]) &&
                           (dvType == DocValuesType.NONE || docValuesType == DocValuesType.NONE || dvType == docValuesType));
                }
            }
示例#11
0
 /// <summary>
 /// 4.0-style docvalues byte </summary>
 public virtual byte DocValuesByte(DocValuesType type, string legacyTypeAtt)
 {
     if (type == DocValuesType.NONE)
     {
         Debug.Assert(legacyTypeAtt == null);
         return(0);
     }
     else
     {
         Debug.Assert(legacyTypeAtt != null);
         //return (sbyte)LegacyDocValuesType.ordinalLookup[legacyTypeAtt];
         return((byte)legacyTypeAtt.ToLegacyDocValuesType());
     }
 }
示例#12
0
 /// <summary>
 /// Create a new mutable <see cref="FieldType"/> with all of the properties from <paramref name="ref"/>
 /// </summary>
 public FieldType(FieldType @ref)
 {
     this.indexed                  = @ref.IsIndexed;
     this.stored                   = @ref.IsStored;
     this.tokenized                = @ref.IsTokenized;
     this.storeTermVectors         = @ref.StoreTermVectors;
     this.storeTermVectorOffsets   = @ref.StoreTermVectorOffsets;
     this.storeTermVectorPositions = @ref.StoreTermVectorPositions;
     this.storeTermVectorPayloads  = @ref.StoreTermVectorPayloads;
     this.omitNorms                = @ref.OmitNorms;
     this.indexOptions             = @ref.IndexOptions;
     this.docValueType             = @ref.DocValueType;
     this.numericType              = @ref.NumericType;
     // Do not copy frozen!
 }
示例#13
0
 internal void SetDocValuesType(int number, string name, DocValuesType dvType)
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(ContainsConsistent(number, name, dvType));
         }
         docValuesType[name] = dvType;
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }
示例#14
0
 // should only be called by FieldInfos#addOrUpdate
 internal void Update(bool indexed, bool storeTermVector, bool omitNorms, bool storePayloads, IndexOptions indexOptions)
 {
     //System.out.println("FI.update field=" + name + " indexed=" + indexed + " omitNorms=" + omitNorms + " this.omitNorms=" + this.omitNorms);
     if (this.indexed != indexed)
     {
         this.indexed = true; // once indexed, always index
     }
     if (indexed)             // if updated field data is not for indexing, leave the updates out
     {
         if (this.storeTermVector != storeTermVector)
         {
             this.storeTermVector = true; // once vector, always vector
         }
         if (this.storePayloads != storePayloads)
         {
             this.storePayloads = true;
         }
         if (this.omitNorms != omitNorms)
         {
             this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
             this.normType  = DocValuesType.NONE;
         }
         if (this.indexOptions != indexOptions)
         {
             if (this.indexOptions == IndexOptions.NONE)
             {
                 this.indexOptions = indexOptions;
             }
             else
             {
                 // downgrade
                 // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
                 this.indexOptions = IndexOptionsComparer.Default.Compare(this.indexOptions, indexOptions) < 0 ? this.indexOptions : indexOptions;
             }
             // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
             if (IndexOptionsComparer.Default.Compare(this.indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0)
             {
                 // cannot store payloads if we don't store positions:
                 this.storePayloads = false;
             }
         }
     }
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(CheckConsistency());
     }
 }
示例#15
0
            /// <summary>
            /// Returns the global field number for the given field name. If the name
            /// does not exist yet it tries to add it with the given preferred field
            /// number assigned if possible otherwise the first unassigned field number
            /// is used as the field number.
            /// </summary>
            internal int AddOrGet(string fieldName, int preferredFieldNumber, DocValuesType dvType)
            {
                UninterruptableMonitor.Enter(this);
                try
                {
                    if (dvType != DocValuesType.NONE)
                    {
                        if (!docValuesType.TryGetValue(fieldName, out DocValuesType currentDVType) || currentDVType == DocValuesType.NONE) // default value in .NET (value type 0)
                        {
                            docValuesType[fieldName] = dvType;
                        }
                        else if (currentDVType != DocValuesType.NONE && currentDVType != dvType)
                        {
                            throw new ArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + fieldName + "\"");
                        }
                    }
                    if (!nameToNumber.TryGetValue(fieldName, out int?fieldNumber) || fieldNumber == null)
                    {
                        int?preferredBoxed = preferredFieldNumber;

                        if (preferredFieldNumber != -1 && !numberToName.ContainsKey(preferredBoxed))
                        {
                            // cool - we can use this number globally
                            fieldNumber = preferredBoxed;
                        }
                        else
                        {
                            // find a new FieldNumber
                            while (numberToName.ContainsKey(++lowestUnassignedFieldNumber))
                            {
                                // might not be up to date - lets do the work once needed
                            }
                            fieldNumber = lowestUnassignedFieldNumber;
                        }

                        numberToName[fieldNumber] = fieldName;
                        nameToNumber[fieldName]   = fieldNumber;
                    }

                    return((int)fieldNumber);
                }
                finally
                {
                    UninterruptableMonitor.Exit(this);
                }
            }
示例#16
0
 /// <summary>
 /// Returns <c>true</c> if the <paramref name="fieldName"/> exists in the map and is of the
 /// same <paramref name="dvType"/>.
 /// </summary>
 internal bool Contains(string fieldName, DocValuesType dvType)
 {
     lock (this)
     {
         // used by IndexWriter.updateNumericDocValue
         if (!nameToNumber.ContainsKey(fieldName))
         {
             return(false);
         }
         else
         {
             // only return true if the field has the same dvType as the requested one
             docValuesType.TryGetValue(fieldName, out DocValuesType dvCand); // LUCENENET NOTE: This could be NONE even if TryGetValue returns false
             return(dvType == dvCand);
         }
     }
 }
示例#17
0
            // used by assert
            internal bool ContainsConsistent(int number, string name, DocValuesType dvType)
            {
                UninterruptableMonitor.Enter(this);
                try
                {
                    numberToName.TryGetValue(number, out string numberToNameStr);
                    nameToNumber.TryGetValue(name, out int nameToNumberVal);
                    this.docValuesType.TryGetValue(name, out DocValuesType docValuesType);

                    return(name.Equals(numberToNameStr, StringComparison.Ordinal) &&
                           number.Equals(nameToNumber[name]) &&
                           (dvType == DocValuesType.NONE || docValuesType == DocValuesType.NONE || dvType == docValuesType));
                }
                finally
                {
                    UninterruptableMonitor.Exit(this);
                }
            }
示例#18
0
 /// <summary>
 /// Returns <c>true</c> if the <paramref name="fieldName"/> exists in the map and is of the
 /// same <paramref name="dvType"/>.
 /// </summary>
 internal bool Contains(string fieldName, DocValuesType dvType)
 {
     lock (this)
     {
         // used by IndexWriter.updateNumericDocValue
         if (!nameToNumber.ContainsKey(fieldName))
         {
             return(false);
         }
         else
         {
             // only return true if the field has the same dvType as the requested one
             DocValuesType dvCand;
             docValuesType.TryGetValue(fieldName, out dvCand);
             return(dvType == dvCand);
         }
     }
 }
示例#19
0
        // returns the FieldInfo that corresponds to the given field and type, or
        // null if the field does not exist, or not indexed as the requested
        // DovDocValuesType.
        private FieldInfo GetDVField(string field, DocValuesType type)
        {
            FieldInfo fi = FieldInfos.FieldInfo(field);
            if (fi == null)
            {
                // Field does not exist
                return null;
            }
            if (fi.DocValuesType == DocValuesType.NONE)
            {
                // Field was not indexed with doc values
                return null;
            }
            if (fi.DocValuesType != type)
            {
                // Field DocValues are different than requested type
                return null;
            }

            return fi;
        }
示例#20
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: void assertValidType(String key, Object value, IndexIdentifier identifier) throws org.neo4j.internal.kernel.api.exceptions.explicitindex.ExplicitIndexNotFoundKernelException
        internal virtual void AssertValidType(string key, object value, IndexIdentifier identifier)
        {
            DocValuesType expectedType;
            string        expectedTypeName;

            if (value is Number)
            {
                expectedType     = DocValuesType.SORTED_NUMERIC;
                expectedTypeName = "numbers";
            }
            else
            {
                expectedType     = DocValuesType.SORTED_SET;
                expectedTypeName = "strings";
            }
            IDictionary <string, DocValuesType> stringDocValuesTypeMap = _indexTypeMap[identifier];
            // If the index searcher has never been loaded, we need to load it now to populate the map.
            int iterations = 0;               // Iterate a bit in case we race with an index drop or create.

            while (stringDocValuesTypeMap == null && iterations++ < 20)
            {
                // We don't use ensureInstantiated because we want to surface the exception in this case.
                GetIndexSearcher(identifier).close();
                stringDocValuesTypeMap = _indexTypeMap[identifier];
            }

            if (stringDocValuesTypeMap == null)
            {
                // Looks like we are running into some adversarial racing, so let's just give up.
                throw new ExplicitIndexNotFoundKernelException("Index '%s' doesn't exist.", identifier);
            }

            DocValuesType actualType = stringDocValuesTypeMap.putIfAbsent(key, expectedType);

            if (actualType != null && !actualType.Equals(DocValuesType.NONE) && !actualType.Equals(expectedType))
            {
                throw new System.ArgumentException(string.Format("Cannot index '{0}' for key '{1}', since this key has been used to index {2}. Raw value of the index type is {3}", value, key, expectedTypeName, actualType));
            }
        }
示例#21
0
 /// <returns> the {@code NumericDocValues} for a given field </returns>
 /// <exception cref="IllegalArgumentException"> if this field is not indexed with numeric doc values </exception>
 public NumericDocValues ReadDocValues(string field)
 {
     try
     {
         NumericDocValues dv = Context.reader().getNumericDocValues(field);
         if (dv == null)
         {
             FieldInfo     fi     = Context.reader().FieldInfos.fieldInfo(field);
             DocValuesType actual = null;
             if (fi != null)
             {
                 actual = fi.DocValuesType;
             }
             throw new System.InvalidOperationException("The field '" + field + "' is not indexed properly, expected NumericDV, but got '" + actual + "'");
         }
         return(dv);
     }
     catch (IOException e)
     {
         throw new Exception(e);
     }
 }
示例#22
0
        private void MergeDocValues(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in mergeState.FieldInfos)
                {
                    DocValuesType type = field.DocValuesType;
                    if (type != DocValuesType.NONE)
                    {
                        if (type == DocValuesType.NUMERIC)
                        {
                            IList <NumericDocValues> toMerge       = new JCG.List <NumericDocValues>();
                            IList <IBits>            docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                NumericDocValues values = reader.GetNumericDocValues(field.Name);
                                IBits            bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_NUMERIC;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeNumericField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.BINARY)
                        {
                            IList <BinaryDocValues> toMerge       = new JCG.List <BinaryDocValues>();
                            IList <IBits>           docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
                                IBits           bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_BINARY;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.SORTED)
                        {
                            IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedDocValues values = reader.GetSortedDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedField(field, mergeState, toMerge);
                        }
                        else if (type == DocValuesType.SORTED_SET)
                        {
                            IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED_SET;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedSetField(field, mergeState, toMerge);
                        }
                        else
                        {
                            throw AssertionError.Create("type=" + type);
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
        private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, DocValuesType valueType)
        {
            doc.Add(new TextField(groupField, value, Field.Store.YES));
            if (canUseIDV)
            {
                Field valuesField = null;
                switch (valueType)
                {
                case DocValuesType.BINARY:
                    valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
                    break;

                case DocValuesType.SORTED:
                    valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value));
                    break;

                default:
                    fail("unhandled type");
                    break;
                }
                doc.Add(valuesField);
            }
        }
        private AbstractAllGroupHeadsCollector CreateRandomCollector(string groupField, Sort sortWithinGroup, bool canUseIDV, DocValuesType valueType)
        {
            AbstractAllGroupHeadsCollector collector;

            if (Random.nextBoolean())
            {
                ValueSource vs = new BytesRefFieldSource(groupField);
                collector = new FunctionAllGroupHeadsCollector(vs, new Hashtable(), sortWithinGroup);
            }
            else
            {
                collector = TermAllGroupHeadsCollector.Create(groupField, sortWithinGroup);
            }

            if (VERBOSE)
            {
                Console.WriteLine("Selected implementation: " + collector.GetType().Name);
            }

            return(collector);
        }
        public void TestBasic()
        {
            string            groupField = "author";
            Directory         dir        = NewDirectory();
            RandomIndexWriter w          = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            bool          canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType valueType = vts[Random.nextInt(vts.Length)];

            // 0
            Document doc = new Document();

            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "1", Field.Store.NO));
            doc.Add(NewStringField("id_2", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(NewStringField("id_1", "2", Field.Store.NO));
            doc.Add(NewStringField("id_2", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(NewStringField("id_1", "3", Field.Store.NO));
            doc.Add(NewStringField("id_2", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "4", Field.Store.NO));
            doc.Add(NewStringField("id_2", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "5", Field.Store.NO));
            doc.Add(NewStringField("id_2", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
            doc.Add(NewTextField("content", "random blob", Field.Store.NO));
            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 7 -- no author field
            doc = new Document();
            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "7", Field.Store.NO));
            doc.Add(NewStringField("id_2", "7", Field.Store.NO));
            w.AddDocument(doc);

            IndexReader   reader        = w.GetReader();
            IndexSearcher indexSearcher = NewSearcher(reader);

            w.Dispose();
            int maxDoc = reader.MaxDoc;

            Sort sortWithinGroup        = new Sort(new SortField("id_1", SortFieldType.INT32, true));
            var  allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            // STRING sort type triggers different implementation
            Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortFieldType.STRING, true));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortFieldType.STRING, false));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            // 7 b/c higher doc id wins, even if order of field is in not in reverse.
            assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
        public void TestRandom()
        {
            int numberOfRuns = TestUtil.NextInt32(Random, 3, 6);

            for (int iter = 0; iter < numberOfRuns; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
                }

                int numDocs   = TestUtil.NextInt32(Random, 100, 1000) * RANDOM_MULTIPLIER;
                int numGroups = TestUtil.NextInt32(Random, 1, numDocs);

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
                }

                List <BytesRef> groups = new List <BytesRef>();
                for (int i = 0; i < numGroups; i++)
                {
                    string randomValue;
                    do
                    {
                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
                        // For that reason we don't generate empty string groups.
                        randomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    } while ("".Equals(randomValue, StringComparison.Ordinal));
                    groups.Add(new BytesRef(randomValue));
                }
                string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)];
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: create fake content");
                }
                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.append("real").append(Random.nextInt(3)).append(' ');
                    int fakeCount = Random.nextInt(10);
                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
                    {
                        sb.append("fake ");
                    }
                    contentStrings[contentIDX] = sb.toString();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + sb.toString());
                    }
                }

                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
                    Random,
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                         new MockAnalyzer(Random)));
                bool          preFlex   = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
                bool          canUseIDV = !preFlex;
                DocValuesType valueType = vts[Random.nextInt(vts.Length)];

                Document doc        = new Document();
                Document docNoGroup = new Document();
                Field    group      = NewStringField("group", "", Field.Store.NO);
                doc.Add(group);
                Field valuesField = null;
                if (canUseIDV)
                {
                    switch (valueType)
                    {
                    case DocValuesType.BINARY:
                        valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
                        break;

                    case DocValuesType.SORTED:
                        valuesField = new SortedDocValuesField("group_dv", new BytesRef());
                        break;

                    default:
                        fail("unhandled type");
                        break;
                    }
                    doc.Add(valuesField);
                }
                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
                doc.Add(sort1);
                docNoGroup.Add(sort1);
                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
                doc.Add(sort2);
                docNoGroup.Add(sort2);
                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
                doc.Add(sort3);
                docNoGroup.Add(sort3);
                Field content = NewTextField("content", "", Field.Store.NO);
                doc.Add(content);
                docNoGroup.Add(content);
                Int32Field id = new Int32Field("id", 0, Field.Store.NO);
                doc.Add(id);
                docNoGroup.Add(id);
                GroupDoc[] groupDocs = new GroupDoc[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    BytesRef groupValue;
                    if (Random.nextInt(24) == 17)
                    {
                        // So we test the "doc doesn't have the group'd
                        // field" case:
                        groupValue = null;
                    }
                    else
                    {
                        groupValue = groups[Random.nextInt(groups.size())];
                    }

                    GroupDoc groupDoc = new GroupDoc(
                        i,
                        groupValue,
                        groups[Random.nextInt(groups.size())],
                        groups[Random.nextInt(groups.size())],
                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
                        contentStrings[Random.nextInt(contentStrings.Length)]
                        );

                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
                    }

                    groupDocs[i] = groupDoc;
                    if (groupDoc.group != null)
                    {
                        group.SetStringValue(groupDoc.group.Utf8ToString());
                        if (canUseIDV)
                        {
                            valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString()));
                        }
                    }
                    sort1.SetStringValue(groupDoc.sort1.Utf8ToString());
                    sort2.SetStringValue(groupDoc.sort2.Utf8ToString());
                    sort3.SetStringValue(groupDoc.sort3.Utf8ToString());
                    content.SetStringValue(groupDoc.content);
                    id.SetInt32Value(groupDoc.id);
                    if (groupDoc.group == null)
                    {
                        w.AddDocument(docNoGroup);
                    }
                    else
                    {
                        w.AddDocument(doc);
                    }
                }

                DirectoryReader r = w.GetReader();
                w.Dispose();

                // NOTE: intentional but temporary field cache insanity!
                FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);
                int[]             fieldIdToDocID = new int[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    int fieldId = docIdToFieldId.Get(i);
                    fieldIdToDocID[fieldId] = i;
                }

                try
                {
                    IndexSearcher s = NewSearcher(r);
                    if (typeof(SlowCompositeReaderWrapper).GetTypeInfo().IsAssignableFrom(s.IndexReader.GetType()))
                    {
                        canUseIDV = false;
                    }
                    else
                    {
                        canUseIDV = !preFlex;
                    }

                    for (int contentID = 0; contentID < 3; contentID++)
                    {
                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
                        foreach (ScoreDoc hit in hits)
                        {
                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
                            assertTrue(gd.score == 0.0);
                            gd.score = hit.Score;
                            int docId = gd.id;
                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
                        }
                    }

                    foreach (GroupDoc gd in groupDocs)
                    {
                        assertTrue(gd.score != 0.0);
                    }

                    for (int searchIter = 0; searchIter < 100; searchIter++)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: searchIter=" + searchIter);
                        }

                        string searchTerm      = "real" + Random.nextInt(3);
                        bool   sortByScoreOnly = Random.nextBoolean();
                        Sort   sortWithinGroup = GetRandomSort(sortByScoreOnly);
                        AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
                        int[] actualGroupHeads   = allGroupHeadsCollector.RetrieveGroupHeads();
                        // The actual group heads contains Lucene ids. Need to change them into our id value.
                        for (int i = 0; i < actualGroupHeads.Length; i++)
                        {
                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
                        }
                        // Allows us the easily iterate and assert the actual and expected results.
                        Array.Sort(expectedGroupHeads);
                        Array.Sort(actualGroupHeads);

                        if (VERBOSE)
                        {
                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
                            Console.WriteLine("Sort within group: " + sortWithinGroup);
                            Console.WriteLine("Num group: " + numGroups);
                            Console.WriteLine("Num doc: " + numDocs);
                            Console.WriteLine("\n=== Expected: \n");
                            foreach (int expectedDocId in expectedGroupHeads)
                            {
                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                                string   expectedGroup    = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
                                                  expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
                                    );
                            }
                            Console.WriteLine("\n=== Actual: \n");
                            foreach (int actualDocId in actualGroupHeads)
                            {
                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
                                string   actualGroup    = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
                                                  actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
                                    );
                            }
                            Console.WriteLine("\n===================================================================================");
                        }

                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
                    }
                }
                finally
                {
                    QueryUtils.PurgeFieldCache(r);
                }

                r.Dispose();
                dir.Dispose();
            }
        }
 private static sbyte DocValuesByte(DocValuesType? type)
 {
     if (type == null)
     {
         return 0;
     }
     else if (type == DocValuesType.NUMERIC)
     {
         return 1;
     }
     else if (type == DocValuesType.BINARY)
     {
         return 2;
     }
     else if (type == DocValuesType.SORTED)
     {
         return 3;
     }
     else if (type == DocValuesType.SORTED_SET)
     {
         return 4;
     }
     else
     {
         throw new InvalidOperationException();
     }
 }
示例#28
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            bool success = false;

            try
            {
                CodecUtil.CheckHeader(input, Lucene42FieldInfosFormat.CODEC_NAME,
                                      Lucene42FieldInfosFormat.FORMAT_START,
                                      Lucene42FieldInfosFormat.FORMAT_CURRENT);

                int         size  = input.ReadVInt32(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string       name            = input.ReadString();
                    int          fieldNumber     = input.ReadVInt32();
                    sbyte        bits            = (sbyte)input.ReadByte();
                    bool         isIndexed       = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
                    bool         storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
                    bool         omitNorms       = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
                    bool         storePayloads   = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
                    IndexOptions indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = IndexOptions.NONE;
                    }
                    else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS;
                    }
                    else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    }
                    else
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // DV Types are packed in one byte
                    sbyte         val                       = (sbyte)input.ReadByte();
                    DocValuesType docValuesType             = GetDocValuesType(input, (byte)(val & 0x0F));
                    DocValuesType normsType                 = GetDocValuesType(input, (byte)((val.TripleShift(4)) & 0x0F));
                    IDictionary <string, string> attributes = input.ReadStringStringMap();
                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
                                             omitNorms, storePayloads, indexOptions, docValuesType, normsType, attributes.AsReadOnly());
                }

                CodecUtil.CheckEOF(input);
                FieldInfos fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(input);
                }
            }
        }
示例#29
0
        private IndexContext CreateIndexContext()
        {
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.BINARY,
                DocValuesType.SORTED
            };

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())
                );

            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;

            string[] groupValues = new string[numDocs / 5];
            string[] countValues = new string[numDocs / 10];
            for (int i = 0; i < groupValues.Length; i++)
            {
                groupValues[i] = GenerateRandomNonEmptyString();
            }
            for (int i = 0; i < countValues.Length; i++)
            {
                countValues[i] = GenerateRandomNonEmptyString();
            }

            List <string> contentStrings = new List <string>();
            IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new HashMap <string, IDictionary <string, ISet <string> > >();

            for (int i = 1; i <= numDocs; i++)
            {
                string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
                string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
                string content    = "random" + random.nextInt(numDocs / 20);
                IDictionary <string, ISet <string> > groupToCounts;
                if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
                {
                    // Groups sort always DOCID asc...
                    searchTermToGroupCounts.Add(content, groupToCounts = new LinkedHashMap <string, ISet <string> >());
                    contentStrings.Add(content);
                }

                ISet <string> countsVals;
                if (!groupToCounts.TryGetValue(groupValue, out countsVals))
                {
                    groupToCounts.Add(groupValue, countsVals = new HashSet <string>());
                }
                countsVals.Add(countValue);

                Document doc = new Document();
                doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
                if (groupValue != null)
                {
                    AddField(doc, groupField, groupValue, dvType);
                }
                if (countValue != null)
                {
                    AddField(doc, countField, countValue, dvType);
                }
                doc.Add(new TextField("content", content, Field.Store.YES));
                w.AddDocument(doc);
            }

            DirectoryReader reader = w.GetReader();

            if (VERBOSE)
            {
                for (int docID = 0; docID < reader.MaxDoc; docID++)
                {
                    Document doc = reader.Document(docID);
                    Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));
                }
            }

            w.Dispose();
            return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/)));
        }
示例#30
0
            private FieldInfo AddOrUpdateInternal(string name, int preferredFieldNumber, bool isIndexed, bool storeTermVector, bool omitNorms, bool storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType)
            {
                // LUCENENET: Bypass FieldInfo method so we can access the quick boolean check
                if (!TryGetFieldInfo(name, out FieldInfo fi) || fi is null)
                {
                    // this field wasn't yet added to this in-RAM
                    // segment's FieldInfo, so now we get a global
                    // number for this field.  If the field was seen
                    // before then we'll get the same name and number,
                    // else we'll allocate a new one:
                    int fieldNumber = globalFieldNumbers.AddOrGet(name, preferredFieldNumber, docValues);
                    fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, null);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(!byName.ContainsKey(fi.Name));
                        Debugging.Assert(globalFieldNumbers.ContainsConsistent(fi.Number, fi.Name, fi.DocValuesType));
                    }
                    byName[fi.Name] = fi;
                }
                else
                {
                    fi.Update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);

                    if (docValues != DocValuesType.NONE)
                    {
                        // only pay the synchronization cost if fi does not already have a DVType
                        bool updateGlobal = !fi.HasDocValues;
                        fi.DocValuesType = docValues; // this will also perform the consistency check.
                        if (updateGlobal)
                        {
                            // must also update docValuesType map so it's
                            // aware of this field's DocValueType
                            globalFieldNumbers.SetDocValuesType(fi.Number, name, docValues);
                        }
                    }

                    if (!fi.OmitsNorms && normType != DocValuesType.NONE)
                    {
                        fi.NormType = normType;
                    }
                }
                return(fi);
            }
示例#31
0
        public virtual void TestSimple()
        {
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.NUMERIC,
                DocValuesType.BINARY,
                DocValuesType.SORTED,
            };
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            Document doc = new Document();

            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random text", Field.Store.NO));
            doc.Add(new StringField("id", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(new StringField("id", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "2", dvType);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(new StringField("id", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "2", dvType);
            doc.Add(new TextField("content", "some random text", Field.Store.NO));
            doc.Add(new StringField("id", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text", Field.Store.NO));
            doc.Add(new StringField("id", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random blob", Field.Store.NO));
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            AddField(doc, countField, "1", dvType);
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            w.Dispose();

            var cmp = new ComparerAnonymousHelper1(this);

            // === Search for content:random
            IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
            IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector
                = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);

            //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);

            gcs.Sort(cmp);
            assertEquals(4, gcs.Count);

            CompareNull(gcs[0].GroupValue);
            List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues);

            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            Compare("1", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            countValues.Sort(nullComparer);
            assertEquals(2, countValues.size());
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[3].GroupValue);
            countValues = new List <IComparable>(gcs[3].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:some
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(3, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            assertEquals(2, countValues.size());
            countValues.Sort(nullComparer);
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:blob
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(2, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            // B/c the only one document matched with blob inside the author 1 group
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            Compare("3", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }