Example #1
0
        public virtual void TestSimple()
        {
            Random random = Random();

            FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[] {
                FieldInfo.DocValuesType_e.NUMERIC,
                FieldInfo.DocValuesType_e.BINARY,
                FieldInfo.DocValuesType_e.SORTED,
            };
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
            bool canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);

            FieldInfo.DocValuesType_e?dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null;

            Document doc = new Document();

            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random text", Field.Store.NO));
            doc.Add(new StringField("id", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(new StringField("id", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "2", dvType);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(new StringField("id", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "2", dvType);
            doc.Add(new TextField("content", "some random text", Field.Store.NO));
            doc.Add(new StringField("id", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text", Field.Store.NO));
            doc.Add(new StringField("id", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random blob", Field.Store.NO));
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            AddField(doc, countField, "1", dvType);
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.Reader);

            w.Dispose();

            var cmp = new ComparerAnonymousHelper1(this);

            // === Search for content:random
            IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);

            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector as Collector);
            IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector
                = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType.GetValueOrDefault());

            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector as Collector);

            //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);

            gcs.Sort(cmp);
            assertEquals(4, gcs.Count);

            CompareNull(gcs[0].GroupValue);
            List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues);

            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            Compare("1", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            countValues.Sort(nullComparator);
            assertEquals(2, countValues.size());
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[3].GroupValue);
            countValues = new List <IComparable>(gcs[3].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:some
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector as Collector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector as Collector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(3, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            assertEquals(2, countValues.size());
            countValues.Sort(nullComparator);
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:blob
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector as Collector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector as Collector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(2, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            // B/c the only one document matched with blob inside the author 1 group
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            Compare("3", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
        private void DoTest(FieldInfo.DocValuesType_e type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case FieldInfo.DocValuesType_e.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case FieldInfo.DocValuesType_e.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case FieldInfo.DocValuesType_e.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw new InvalidOperationException();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random(), d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.LongValue = i;
                switch (type)
                {
                case FieldInfo.DocValuesType_e.SORTED:
                case FieldInfo.DocValuesType_e.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random(), 20);
                    } while (((string)vals[i]).Length == 0);
                    f.BytesValue = new BytesRef((string)vals[i]);
                    break;

                case FieldInfo.DocValuesType_e.NUMERIC:
                    int bitsPerValue = Random().NextIntBetween(1, 31);     // keep it an int
                    vals[i]     = (long)Random().Next((int)PackedInts.MaxValue(bitsPerValue));
                    f.LongValue = (long)vals[i];
                    break;
                }
                iw.AddDocument(document);
                if (Random().NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new LongFieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case FieldInfo.DocValuesType_e.BINARY:
                case FieldInfo.DocValuesType_e.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case FieldInfo.DocValuesType_e.NUMERIC:
                    vs = new LongFieldSource("dv");
                    break;

                default:
                    throw new InvalidOperationException();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is LongFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is long?);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }

                    object expected = vals[ids.IntVal(i)];
                    switch (type)
                    {
                    case FieldInfo.DocValuesType_e.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd() >= 1);
                        goto case FieldInfo.DocValuesType_e.BINARY;

                    case FieldInfo.DocValuesType_e.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case FieldInfo.DocValuesType_e.NUMERIC:
                        assertEquals(Number.ToInt64(expected.ToString()), values.LongVal(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
Example #3
0
        private IndexContext CreateIndexContext()
        {
            Random random = Random();

            FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[] {
                FieldInfo.DocValuesType_e.BINARY,
                FieldInfo.DocValuesType_e.SORTED
            };

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())
                );

            bool canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);

            FieldInfo.DocValuesType_e?dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null;

            int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;

            string[] groupValues = new string[numDocs / 5];
            string[] countValues = new string[numDocs / 10];
            for (int i = 0; i < groupValues.Length; i++)
            {
                groupValues[i] = GenerateRandomNonEmptyString();
            }
            for (int i = 0; i < countValues.Length; i++)
            {
                countValues[i] = GenerateRandomNonEmptyString();
            }

            List <string> contentStrings = new List <string>();
            IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new HashMap <string, IDictionary <string, ISet <string> > >();

            for (int i = 1; i <= numDocs; i++)
            {
                string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
                string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
                string content    = "random" + random.nextInt(numDocs / 20);
                IDictionary <string, ISet <string> > groupToCounts;
                if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
                {
                    // Groups sort always DOCID asc...
                    searchTermToGroupCounts.Add(content, groupToCounts = new LinkedHashMap <string, ISet <string> >());
                    contentStrings.Add(content);
                }

                ISet <string> countsVals;
                if (!groupToCounts.TryGetValue(groupValue, out countsVals))
                {
                    groupToCounts.Add(groupValue, countsVals = new HashSet <string>());
                }
                countsVals.Add(countValue);

                Document doc = new Document();
                doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
                if (groupValue != null)
                {
                    AddField(doc, groupField, groupValue, dvType);
                }
                if (countValue != null)
                {
                    AddField(doc, countField, countValue, dvType);
                }
                doc.Add(new TextField("content", content, Field.Store.YES));
                w.AddDocument(doc);
            }

            DirectoryReader reader = w.Reader;

            if (VERBOSE)
            {
                for (int docID = 0; docID < reader.MaxDoc; docID++)
                {
                    Document doc = reader.Document(docID);
                    Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));
                }
            }

            w.Dispose();
            return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/)));
        }
Example #4
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            bool success = false;

            try
            {
                CodecUtil.CheckHeader(input, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_START, Lucene42FieldInfosFormat.FORMAT_CURRENT);

                int         size  = input.ReadVInt(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string name            = input.ReadString();
                    int    fieldNumber     = input.ReadVInt();
                    byte   bits            = input.ReadByte();
                    bool   isIndexed       = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
                    bool   storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
                    bool   omitNorms       = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
                    bool   storePayloads   = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
                    FieldInfo.IndexOptions indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = default(FieldInfo.IndexOptions);
                    }
                    else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                    }
                    else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    }
                    else
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // DV Types are packed in one byte
                    byte val = input.ReadByte();
                    FieldInfo.DocValuesType_e    docValuesType = GetDocValuesType(input, (sbyte)(val & 0x0F));
                    FieldInfo.DocValuesType_e    normsType     = GetDocValuesType(input, (sbyte)(((int)((uint)val >> 4)) & 0x0F));
                    IDictionary <string, string> attributes    = input.ReadStringStringMap();
                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, CollectionsHelper.UnmodifiableMap(attributes));
                }

                CodecUtil.CheckEOF(input);
                FieldInfos fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
Example #5
0
        private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, FieldInfo.DocValuesType_e valueType)
        {
            doc.Add(new TextField(groupField, value, Field.Store.YES));
            if (canUseIDV)
            {
                Field valuesField = null;
                switch (valueType)
                {
                case FieldInfo.DocValuesType_e.BINARY:
                    valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
                    break;

                case FieldInfo.DocValuesType_e.SORTED:
                    valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value));
                    break;

                default:
                    fail("unhandled type");
                    break;
                }
                doc.Add(valuesField);
            }
        }
Example #6
0
        private AbstractAllGroupHeadsCollector CreateRandomCollector(string groupField, Sort sortWithinGroup, bool canUseIDV, FieldInfo.DocValuesType_e valueType)
        {
            AbstractAllGroupHeadsCollector collector;

            if (Random().nextBoolean())
            {
                ValueSource vs = new BytesRefFieldSource(groupField);
                collector = new FunctionAllGroupHeadsCollector(vs, new Hashtable(), sortWithinGroup);
            }
            else
            {
                collector = TermAllGroupHeadsCollector.Create(groupField, sortWithinGroup);
            }

            if (VERBOSE)
            {
                Console.WriteLine("Selected implementation: " + collector.GetType().Name);
            }

            return(collector);
        }
Example #7
0
        public void TestBasic()
        {
            string            groupField = "author";
            Directory         dir        = NewDirectory();
            RandomIndexWriter w          = new RandomIndexWriter(
                Random(),
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            bool canUseIDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);

            FieldInfo.DocValuesType_e valueType = vts[Random().nextInt(vts.Length)];

            // 0
            Document doc = new Document();

            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "1", Field.Store.NO));
            doc.Add(NewStringField("id_2", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(NewStringField("id_1", "2", Field.Store.NO));
            doc.Add(NewStringField("id_2", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(NewStringField("id_1", "3", Field.Store.NO));
            doc.Add(NewStringField("id_2", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "4", Field.Store.NO));
            doc.Add(NewStringField("id_2", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "5", Field.Store.NO));
            doc.Add(NewStringField("id_2", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
            doc.Add(NewTextField("content", "random blob", Field.Store.NO));
            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 7 -- no author field
            doc = new Document();
            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "7", Field.Store.NO));
            doc.Add(NewStringField("id_2", "7", Field.Store.NO));
            w.AddDocument(doc);

            IndexReader   reader        = w.Reader;
            IndexSearcher indexSearcher = NewSearcher(reader);

            w.Dispose();
            int maxDoc = reader.MaxDoc;

            Sort sortWithinGroup        = new Sort(new SortField("id_1", SortField.Type_e.INT, true));
            var  allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            // STRING sort type triggers different implementation
            Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type_e.STRING, true));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type_e.STRING, false));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            // 7 b/c higher doc id wins, even if order of field is in not in reverse.
            assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Example #8
0
        public void TestRandom()
        {
            int numberOfRuns = TestUtil.NextInt(Random(), 3, 6);

            for (int iter = 0; iter < numberOfRuns; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
                }

                int numDocs   = TestUtil.NextInt(Random(), 100, 1000) * RANDOM_MULTIPLIER;
                int numGroups = TestUtil.NextInt(Random(), 1, numDocs);

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
                }

                List <BytesRef> groups = new List <BytesRef>();
                for (int i = 0; i < numGroups; i++)
                {
                    string randomValue;
                    do
                    {
                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
                        // For that reason we don't generate empty string groups.
                        randomValue = TestUtil.RandomRealisticUnicodeString(Random());
                    } while ("".Equals(randomValue, StringComparison.Ordinal));
                    groups.Add(new BytesRef(randomValue));
                }
                string[] contentStrings = new string[TestUtil.NextInt(Random(), 2, 20)];
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: create fake content");
                }
                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.append("real").append(Random().nextInt(3)).append(' ');
                    int fakeCount = Random().nextInt(10);
                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
                    {
                        sb.append("fake ");
                    }
                    contentStrings[contentIDX] = sb.toString();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + sb.toString());
                    }
                }

                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
                    Random(),
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                         new MockAnalyzer(Random())));
                bool preFlex   = "Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);
                bool canUseIDV = !preFlex;
                FieldInfo.DocValuesType_e valueType = vts[Random().nextInt(vts.Length)];

                Document doc        = new Document();
                Document docNoGroup = new Document();
                Field    group      = NewStringField("group", "", Field.Store.NO);
                doc.Add(group);
                Field valuesField = null;
                if (canUseIDV)
                {
                    switch (valueType)
                    {
                    case FieldInfo.DocValuesType_e.BINARY:
                        valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
                        break;

                    case FieldInfo.DocValuesType_e.SORTED:
                        valuesField = new SortedDocValuesField("group_dv", new BytesRef());
                        break;

                    default:
                        fail("unhandled type");
                        break;
                    }
                    doc.Add(valuesField);
                }
                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
                doc.Add(sort1);
                docNoGroup.Add(sort1);
                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
                doc.Add(sort2);
                docNoGroup.Add(sort2);
                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
                doc.Add(sort3);
                docNoGroup.Add(sort3);
                Field content = NewTextField("content", "", Field.Store.NO);
                doc.Add(content);
                docNoGroup.Add(content);
                IntField id = new IntField("id", 0, Field.Store.NO);
                doc.Add(id);
                docNoGroup.Add(id);
                GroupDoc[] groupDocs = new GroupDoc[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    BytesRef groupValue;
                    if (Random().nextInt(24) == 17)
                    {
                        // So we test the "doc doesn't have the group'd
                        // field" case:
                        groupValue = null;
                    }
                    else
                    {
                        groupValue = groups[Random().nextInt(groups.size())];
                    }

                    GroupDoc groupDoc = new GroupDoc(
                        i,
                        groupValue,
                        groups[Random().nextInt(groups.size())],
                        groups[Random().nextInt(groups.size())],
                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
                        contentStrings[Random().nextInt(contentStrings.Length)]
                        );

                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
                    }

                    groupDocs[i] = groupDoc;
                    if (groupDoc.group != null)
                    {
                        group.StringValue = (groupDoc.group.Utf8ToString());
                        if (canUseIDV)
                        {
                            valuesField.BytesValue = (new BytesRef(groupDoc.group.Utf8ToString()));
                        }
                    }
                    sort1.StringValue   = (groupDoc.sort1.Utf8ToString());
                    sort2.StringValue   = (groupDoc.sort2.Utf8ToString());
                    sort3.StringValue   = (groupDoc.sort3.Utf8ToString());
                    content.StringValue = (groupDoc.content);
                    id.IntValue         = (groupDoc.id);
                    if (groupDoc.group == null)
                    {
                        w.AddDocument(docNoGroup);
                    }
                    else
                    {
                        w.AddDocument(doc);
                    }
                }

                DirectoryReader r = w.Reader;
                w.Dispose();

                // NOTE: intentional but temporary field cache insanity!
                FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false);
                int[]           fieldIdToDocID = new int[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    int fieldId = docIdToFieldId.Get(i);
                    fieldIdToDocID[fieldId] = i;
                }

                try
                {
                    IndexSearcher s = NewSearcher(r);
                    if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(s.IndexReader.GetType()))
                    {
                        canUseIDV = false;
                    }
                    else
                    {
                        canUseIDV = !preFlex;
                    }

                    for (int contentID = 0; contentID < 3; contentID++)
                    {
                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
                        foreach (ScoreDoc hit in hits)
                        {
                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
                            assertTrue(gd.score == 0.0);
                            gd.score = hit.Score;
                            int docId = gd.id;
                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
                        }
                    }

                    foreach (GroupDoc gd in groupDocs)
                    {
                        assertTrue(gd.score != 0.0);
                    }

                    for (int searchIter = 0; searchIter < 100; searchIter++)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: searchIter=" + searchIter);
                        }

                        string searchTerm      = "real" + Random().nextInt(3);
                        bool   sortByScoreOnly = Random().nextBoolean();
                        Sort   sortWithinGroup = GetRandomSort(sortByScoreOnly);
                        AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
                        int[] actualGroupHeads   = allGroupHeadsCollector.RetrieveGroupHeads();
                        // The actual group heads contains Lucene ids. Need to change them into our id value.
                        for (int i = 0; i < actualGroupHeads.Length; i++)
                        {
                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
                        }
                        // Allows us the easily iterate and assert the actual and expected results.
                        Array.Sort(expectedGroupHeads);
                        Array.Sort(actualGroupHeads);

                        if (VERBOSE)
                        {
                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
                            Console.WriteLine("Sort within group: " + sortWithinGroup);
                            Console.WriteLine("Num group: " + numGroups);
                            Console.WriteLine("Num doc: " + numDocs);
                            Console.WriteLine("\n=== Expected: \n");
                            foreach (int expectedDocId in expectedGroupHeads)
                            {
                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                                string   expectedGroup    = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
                                                  expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
                                    );
                            }
                            Console.WriteLine("\n=== Actual: \n");
                            foreach (int actualDocId in actualGroupHeads)
                            {
                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
                                string   actualGroup    = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
                                                  actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
                                    );
                            }
                            Console.WriteLine("\n===================================================================================");
                        }

                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
                    }
                }
                finally
                {
                    QueryUtils.PurgeFieldCache(r);
                }

                r.Dispose();
                dir.Dispose();
            }
        }