Example #1
0
        private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, DocValuesType valueType)
        {
            doc.Add(new TextField(groupField, value, Field.Store.YES));
            if (canUseIDV)
            {
                Field valuesField = null;
                switch (valueType)
                {
                case DocValuesType.BINARY:
                    valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
                    break;

                case DocValuesType.SORTED:
                    valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value));
                    break;

                default:
                    fail("unhandled type");
                    break;
                }
                doc.Add(valuesField);
            }
        }
Example #2
0
            public DocState(bool useDocValues)
            {
                Doc = new Document();

                Title = new StringField("title", "", Field.Store.NO);
                Doc.Add(Title);

                FieldType ft = new FieldType(TextField.TYPE_STORED)
                {
                    StoreTermVectors         = true,
                    StoreTermVectorOffsets   = true,
                    StoreTermVectorPositions = true
                };

                TitleTokenized = new Field("titleTokenized", "", ft);
                Doc.Add(TitleTokenized);

                Body = new Field("body", "", ft);
                Doc.Add(Body);

                Id = new StringField("docid", "", Field.Store.YES);
                Doc.Add(Id);

                Date = new StringField("date", "", Field.Store.YES);
                Doc.Add(Date);

                if (useDocValues)
                {
                    TitleDV = new SortedDocValuesField("titleDV", new BytesRef());
                    Doc.Add(TitleDV);
                }
                else
                {
                    TitleDV = null;
                }
            }
        public virtual void TestFixedSorted([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedSorted"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                            .SetRAMBufferSizeMB(256.0)
                                            .SetMergeScheduler(scheduler)
                                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

            Document             doc     = new Document();
            var                  bytes   = new byte[2];
            BytesRef             data    = new BytesRef(bytes);
            SortedDocValuesField dvField = new SortedDocValuesField("dv", data);

            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 8);
                bytes[1] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r             = DirectoryReader.Open(dir);
            int             expectedValue = 0;

            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader    reader  = context.AtomicReader;
                BytesRef        scratch = new BytesRef();
                BinaryDocValues dv      = reader.GetSortedDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 8);
                    bytes[1] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public void TestRandom()
        {
            int numberOfRuns = TestUtil.NextInt32(Random, 3, 6);

            for (int iter = 0; iter < numberOfRuns; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
                }

                int numDocs   = TestUtil.NextInt32(Random, 100, 1000) * RANDOM_MULTIPLIER;
                int numGroups = TestUtil.NextInt32(Random, 1, numDocs);

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
                }

                List <BytesRef> groups = new List <BytesRef>();
                for (int i = 0; i < numGroups; i++)
                {
                    string randomValue;
                    do
                    {
                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
                        // For that reason we don't generate empty string groups.
                        randomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    } while ("".Equals(randomValue, StringComparison.Ordinal));
                    groups.Add(new BytesRef(randomValue));
                }
                string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)];
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: create fake content");
                }
                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.append("real").append(Random.nextInt(3)).append(' ');
                    int fakeCount = Random.nextInt(10);
                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
                    {
                        sb.append("fake ");
                    }
                    contentStrings[contentIDX] = sb.toString();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + sb.toString());
                    }
                }

                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
                    Random,
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                         new MockAnalyzer(Random)));
                bool          preFlex   = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
                bool          canUseIDV = !preFlex;
                DocValuesType valueType = vts[Random.nextInt(vts.Length)];

                Document doc        = new Document();
                Document docNoGroup = new Document();
                Field    group      = NewStringField("group", "", Field.Store.NO);
                doc.Add(group);
                Field valuesField = null;
                if (canUseIDV)
                {
                    switch (valueType)
                    {
                    case DocValuesType.BINARY:
                        valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
                        break;

                    case DocValuesType.SORTED:
                        valuesField = new SortedDocValuesField("group_dv", new BytesRef());
                        break;

                    default:
                        fail("unhandled type");
                        break;
                    }
                    doc.Add(valuesField);
                }
                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
                doc.Add(sort1);
                docNoGroup.Add(sort1);
                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
                doc.Add(sort2);
                docNoGroup.Add(sort2);
                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
                doc.Add(sort3);
                docNoGroup.Add(sort3);
                Field content = NewTextField("content", "", Field.Store.NO);
                doc.Add(content);
                docNoGroup.Add(content);
                Int32Field id = new Int32Field("id", 0, Field.Store.NO);
                doc.Add(id);
                docNoGroup.Add(id);
                GroupDoc[] groupDocs = new GroupDoc[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    BytesRef groupValue;
                    if (Random.nextInt(24) == 17)
                    {
                        // So we test the "doc doesn't have the group'd
                        // field" case:
                        groupValue = null;
                    }
                    else
                    {
                        groupValue = groups[Random.nextInt(groups.size())];
                    }

                    GroupDoc groupDoc = new GroupDoc(
                        i,
                        groupValue,
                        groups[Random.nextInt(groups.size())],
                        groups[Random.nextInt(groups.size())],
                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
                        contentStrings[Random.nextInt(contentStrings.Length)]
                        );

                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
                    }

                    groupDocs[i] = groupDoc;
                    if (groupDoc.group != null)
                    {
                        group.SetStringValue(groupDoc.group.Utf8ToString());
                        if (canUseIDV)
                        {
                            valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString()));
                        }
                    }
                    sort1.SetStringValue(groupDoc.sort1.Utf8ToString());
                    sort2.SetStringValue(groupDoc.sort2.Utf8ToString());
                    sort3.SetStringValue(groupDoc.sort3.Utf8ToString());
                    content.SetStringValue(groupDoc.content);
                    id.SetInt32Value(groupDoc.id);
                    if (groupDoc.group == null)
                    {
                        w.AddDocument(docNoGroup);
                    }
                    else
                    {
                        w.AddDocument(doc);
                    }
                }

                DirectoryReader r = w.GetReader();
                w.Dispose();

                // NOTE: intentional but temporary field cache insanity!
                FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);
                int[]             fieldIdToDocID = new int[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    int fieldId = docIdToFieldId.Get(i);
                    fieldIdToDocID[fieldId] = i;
                }

                try
                {
                    IndexSearcher s = NewSearcher(r);
                    if (typeof(SlowCompositeReaderWrapper).GetTypeInfo().IsAssignableFrom(s.IndexReader.GetType()))
                    {
                        canUseIDV = false;
                    }
                    else
                    {
                        canUseIDV = !preFlex;
                    }

                    for (int contentID = 0; contentID < 3; contentID++)
                    {
                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
                        foreach (ScoreDoc hit in hits)
                        {
                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
                            assertTrue(gd.score == 0.0);
                            gd.score = hit.Score;
                            int docId = gd.id;
                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
                        }
                    }

                    foreach (GroupDoc gd in groupDocs)
                    {
                        assertTrue(gd.score != 0.0);
                    }

                    for (int searchIter = 0; searchIter < 100; searchIter++)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: searchIter=" + searchIter);
                        }

                        string searchTerm      = "real" + Random.nextInt(3);
                        bool   sortByScoreOnly = Random.nextBoolean();
                        Sort   sortWithinGroup = GetRandomSort(sortByScoreOnly);
                        AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
                        int[] actualGroupHeads   = allGroupHeadsCollector.RetrieveGroupHeads();
                        // The actual group heads contains Lucene ids. Need to change them into our id value.
                        for (int i = 0; i < actualGroupHeads.Length; i++)
                        {
                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
                        }
                        // Allows us the easily iterate and assert the actual and expected results.
                        Array.Sort(expectedGroupHeads);
                        Array.Sort(actualGroupHeads);

                        if (VERBOSE)
                        {
                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
                            Console.WriteLine("Sort within group: " + sortWithinGroup);
                            Console.WriteLine("Num group: " + numGroups);
                            Console.WriteLine("Num doc: " + numDocs);
                            Console.WriteLine("\n=== Expected: \n");
                            foreach (int expectedDocId in expectedGroupHeads)
                            {
                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                                string   expectedGroup    = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
                                                  expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
                                    );
                            }
                            Console.WriteLine("\n=== Actual: \n");
                            foreach (int actualDocId in actualGroupHeads)
                            {
                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
                                string   actualGroup    = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
                                                  actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
                                    );
                            }
                            Console.WriteLine("\n===================================================================================");
                        }

                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
                    }
                }
                finally
                {
                    QueryUtils.PurgeFieldCache(r);
                }

                r.Dispose();
                dir.Dispose();
            }
        }
        private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument)
        {
            Random random    = Random;
            int    numDocs   = TestUtil.NextInt32(random, 138, 1145) * RandomMultiplier;
            int    numGroups = TestUtil.NextInt32(random, 1, numDocs / 4);
            int    numFacets = TestUtil.NextInt32(random, 1, numDocs / 6);

            if (Verbose)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
            }

            List <string> groups = new List <string>();

            for (int i = 0; i < numGroups; i++)
            {
                groups.Add(GenerateRandomNonEmptyString());
            }
            List <string> facetValues = new List <string>();

            for (int i = 0; i < numFacets; i++)
            {
                facetValues.Add(GenerateRandomNonEmptyString());
            }
            string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)];
            if (Verbose)
            {
                Console.WriteLine("TEST: create fake content");
            }
            for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++)
            {
                contentBrs[contentIDX] = GenerateRandomNonEmptyString();
                if (Verbose)
                {
                    Console.WriteLine("  content=" + contentBrs[contentIDX]);
                }
            }

            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(
                    TEST_VERSION_CURRENT,
                    new MockAnalyzer(random)
                    )
                );
            bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            bool useDv    = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();

            Document doc               = new Document();
            Document docNoGroup        = new Document();
            Document docNoFacet        = new Document();
            Document docNoGroupNoFacet = new Document();
            Field    group             = NewStringField("group", "", Field.Store.NO);
            Field    groupDc           = new SortedDocValuesField("group_dv", new BytesRef());

            if (useDv)
            {
                doc.Add(groupDc);
                docNoFacet.Add(groupDc);
            }
            doc.Add(group);
            docNoFacet.Add(group);
            Field[] facetFields;
            if (useDv)
            {
                Debug.Assert(!multipleFacetValuesPerDocument);
                facetFields    = new Field[2];
                facetFields[0] = NewStringField("facet", "", Field.Store.NO);
                doc.Add(facetFields[0]);
                docNoGroup.Add(facetFields[0]);
                facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
                doc.Add(facetFields[1]);
                docNoGroup.Add(facetFields[1]);
            }
            else
            {
                facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
                for (int i = 0; i < facetFields.Length; i++)
                {
                    facetFields[i] = NewStringField("facet", "", Field.Store.NO);
                    doc.Add(facetFields[i]);
                    docNoGroup.Add(facetFields[i]);
                }
            }
            Field content = NewStringField("content", "", Field.Store.NO);

            doc.Add(content);
            docNoGroup.Add(content);
            docNoFacet.Add(content);
            docNoGroupNoFacet.Add(content);

            ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => {
                if (a == b)
                {
                    return(0);
                }
                else if (a == null)
                {
                    return(-1);
                }
                else if (b == null)
                {
                    return(1);
                }
                else
                {
                    return(a.CompareToOrdinal(b));
                }
            }));

            // LUCENENET NOTE: Need JCG.Dictionary here because of null keys
            IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >();
            int facetWithMostGroups = 0;

            for (int i = 0; i < numDocs; i++)
            {
                string groupValue;
                if (random.nextInt(24) == 17)
                {
                    // So we test the "doc doesn't have the group'd
                    // field" case:
                    if (useDv)
                    {
                        groupValue = "";
                    }
                    else
                    {
                        groupValue = null;
                    }
                }
                else
                {
                    groupValue = groups[random.nextInt(groups.size())];
                }

                string contentStr = contentBrs[random.nextInt(contentBrs.Length)];
                if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups))
                {
                    searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >();
                }

                List <string> facetVals = new List <string>();
                if (useDv || random.nextInt(24) != 18)
                {
                    if (useDv)
                    {
                        string facetValue = facetValues[random.nextInt(facetValues.size())];
                        uniqueFacetValues.Add(facetValue);
                        if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                        {
                            facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                        }
                        groupsInFacet.add(groupValue);
                        if (groupsInFacet.size() > facetWithMostGroups)
                        {
                            facetWithMostGroups = groupsInFacet.size();
                        }
                        facetFields[0].SetStringValue(facetValue);
                        facetFields[1].SetBytesValue(new BytesRef(facetValue));
                        facetVals.Add(facetValue);
                    }
                    else
                    {
                        foreach (Field facetField in facetFields)
                        {
                            string facetValue = facetValues[random.nextInt(facetValues.size())];
                            uniqueFacetValues.Add(facetValue);
                            if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                            {
                                facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                            }
                            groupsInFacet.add(groupValue);
                            if (groupsInFacet.size() > facetWithMostGroups)
                            {
                                facetWithMostGroups = groupsInFacet.size();
                            }
                            facetField.SetStringValue(facetValue);
                            facetVals.Add(facetValue);
                        }
                    }
                }
                else
                {
                    uniqueFacetValues.Add(null);
                    if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet))
                    {
                        facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>();
                    }
                    groupsInFacet.add(groupValue);
                    if (groupsInFacet.size() > facetWithMostGroups)
                    {
                        facetWithMostGroups = groupsInFacet.size();
                    }
                }

                if (Verbose)
                {
                    Console.WriteLine("  doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + Collections.ToString(facetVals));
                }

                if (groupValue != null)
                {
                    if (useDv)
                    {
                        groupDc.SetBytesValue(new BytesRef(groupValue));
                    }
                    group.SetStringValue(groupValue);
                }
                else if (useDv)
                {
                    // DV cannot have missing values:
                    groupDc.SetBytesValue(new BytesRef());
                }
                content.SetStringValue(contentStr);
                if (groupValue == null && facetVals.Count == 0)
                {
                    writer.AddDocument(docNoGroupNoFacet);
                }
                else if (facetVals.Count == 0)
                {
                    writer.AddDocument(docNoFacet);
                }
                else if (groupValue == null)
                {
                    writer.AddDocument(docNoGroup);
                }
                else
                {
                    writer.AddDocument(doc);
                }
            }

            DirectoryReader reader = writer.GetReader();

            writer.Dispose();

            return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv));
        }
Example #6
0
        private void DoTest(DocValuesType type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case DocValuesType.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw AssertionError.Create();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.SetInt64Value(i);
                switch (type)
                {
                case DocValuesType.SORTED:
                case DocValuesType.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random, 20);
                    } while (((string)vals[i]).Length == 0);
                    f.SetBytesValue(new BytesRef((string)vals[i]));
                    break;

                case DocValuesType.NUMERIC:
                    int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31);     // keep it an int
                    vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue));
                    f.SetInt64Value((long)vals[i]);
                    break;
                }
                iw.AddDocument(document);
                if (Random.NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case DocValuesType.BINARY:
                case DocValuesType.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case DocValuesType.NUMERIC:
                    vs = new Int64FieldSource("dv");
                    break;

                default:
                    throw AssertionError.Create();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is Int64FieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is J2N.Numerics.Int64);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw AssertionError.Create();
                    }

                    object expected = vals[ids.Int32Val(i)];
                    switch (type)
                    {
                    case DocValuesType.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd >= 1);
                        goto case DocValuesType.BINARY;

                    case DocValuesType.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case DocValuesType.NUMERIC:
                        assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
        private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument)
        {
            Random random    = Random;
            int    numDocs   = TestUtil.NextInt32(random, 138, 1145) * RANDOM_MULTIPLIER;
            int    numGroups = TestUtil.NextInt32(random, 1, numDocs / 4);
            int    numFacets = TestUtil.NextInt32(random, 1, numDocs / 6);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
            }

            List <string> groups = new List <string>();

            for (int i = 0; i < numGroups; i++)
            {
                groups.Add(GenerateRandomNonEmptyString());
            }
            List <string> facetValues = new List <string>();

            for (int i = 0; i < numFacets; i++)
            {
                facetValues.Add(GenerateRandomNonEmptyString());
            }
            string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)];
            if (VERBOSE)
            {
                Console.WriteLine("TEST: create fake content");
            }
            for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++)
            {
                contentBrs[contentIDX] = GenerateRandomNonEmptyString();
                if (VERBOSE)
                {
                    Console.WriteLine("  content=" + contentBrs[contentIDX]);
                }
            }

            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(
                    TEST_VERSION_CURRENT,
                    new MockAnalyzer(random)
                    )
                );
            bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            bool useDv    = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();

            Document doc               = new Document();
            Document docNoGroup        = new Document();
            Document docNoFacet        = new Document();
            Document docNoGroupNoFacet = new Document();
            Field    group             = NewStringField("group", "", Field.Store.NO);
            Field    groupDc           = new SortedDocValuesField("group_dv", new BytesRef());

            if (useDv)
            {
                doc.Add(groupDc);
                docNoFacet.Add(groupDc);
            }
            doc.Add(group);
            docNoFacet.Add(group);
            Field[] facetFields;
            if (useDv)
            {
                Debug.Assert(!multipleFacetValuesPerDocument);
                facetFields    = new Field[2];
                facetFields[0] = NewStringField("facet", "", Field.Store.NO);
                doc.Add(facetFields[0]);
                docNoGroup.Add(facetFields[0]);
                facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
                doc.Add(facetFields[1]);
                docNoGroup.Add(facetFields[1]);
            }
            else
            {
                facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
                for (int i = 0; i < facetFields.Length; i++)
                {
                    facetFields[i] = NewStringField("facet", "", Field.Store.NO);
                    doc.Add(facetFields[i]);
                    docNoGroup.Add(facetFields[i]);
                }
            }
            Field content = NewStringField("content", "", Field.Store.NO);

            doc.Add(content);
            docNoGroup.Add(content);
            docNoFacet.Add(content);
            docNoGroupNoFacet.Add(content);

            // LUCENENET NOTE: TreeSet (the class used in Java) allows duplicate keys. However, SortedSet seems to work,
            // and based on the name of the variable, presuming the entries are meant to be unique.
            ISet <string> uniqueFacetValues = new SortedSet <string>(new ComparerAnonymousHelper1());

            // LUCENENET NOTE: Need HashMap here because of null keys
            IDictionary <string, HashMap <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, HashMap <string, ISet <string> > >();
            int facetWithMostGroups = 0;

            for (int i = 0; i < numDocs; i++)
            {
                string groupValue;
                if (random.nextInt(24) == 17)
                {
                    // So we test the "doc doesn't have the group'd
                    // field" case:
                    if (useDv)
                    {
                        groupValue = "";
                    }
                    else
                    {
                        groupValue = null;
                    }
                }
                else
                {
                    groupValue = groups[random.nextInt(groups.size())];
                }

                string contentStr = contentBrs[random.nextInt(contentBrs.Length)];
                if (!searchTermToFacetToGroups.ContainsKey(contentStr))
                {
                    searchTermToFacetToGroups[contentStr] = new HashMap <string, ISet <string> >();
                }
                IDictionary <string, ISet <string> > facetToGroups = searchTermToFacetToGroups[contentStr];

                List <string> facetVals = new List <string>();
                if (useDv || random.nextInt(24) != 18)
                {
                    if (useDv)
                    {
                        string facetValue = facetValues[random.nextInt(facetValues.size())];
                        uniqueFacetValues.Add(facetValue);
                        if (!facetToGroups.ContainsKey(facetValue))
                        {
                            facetToGroups[facetValue] = new HashSet <string>();
                        }
                        ISet <string> groupsInFacet = facetToGroups[facetValue];
                        groupsInFacet.add(groupValue);
                        if (groupsInFacet.size() > facetWithMostGroups)
                        {
                            facetWithMostGroups = groupsInFacet.size();
                        }
                        facetFields[0].SetStringValue(facetValue);
                        facetFields[1].SetBytesValue(new BytesRef(facetValue));
                        facetVals.Add(facetValue);
                    }
                    else
                    {
                        foreach (Field facetField in facetFields)
                        {
                            string facetValue = facetValues[random.nextInt(facetValues.size())];
                            uniqueFacetValues.Add(facetValue);
                            if (!facetToGroups.ContainsKey(facetValue))
                            {
                                facetToGroups[facetValue] = new HashSet <string>();
                            }
                            ISet <string> groupsInFacet = facetToGroups[facetValue];
                            groupsInFacet.add(groupValue);
                            if (groupsInFacet.size() > facetWithMostGroups)
                            {
                                facetWithMostGroups = groupsInFacet.size();
                            }
                            facetField.SetStringValue(facetValue);
                            facetVals.Add(facetValue);
                        }
                    }
                }
                else
                {
                    uniqueFacetValues.Add(null);
                    if (!facetToGroups.ContainsKey(null))
                    {
                        facetToGroups.Put(null, new HashSet <string>());
                    }
                    ISet <string> groupsInFacet = facetToGroups[null];
                    groupsInFacet.add(groupValue);
                    if (groupsInFacet.size() > facetWithMostGroups)
                    {
                        facetWithMostGroups = groupsInFacet.size();
                    }
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals);
                }

                if (groupValue != null)
                {
                    if (useDv)
                    {
                        groupDc.SetBytesValue(new BytesRef(groupValue));
                    }
                    group.SetStringValue(groupValue);
                }
                else if (useDv)
                {
                    // DV cannot have missing values:
                    groupDc.SetBytesValue(new BytesRef());
                }
                content.SetStringValue(contentStr);
                if (groupValue == null && !facetVals.Any())
                {
                    writer.AddDocument(docNoGroupNoFacet);
                }
                else if (!facetVals.Any())
                {
                    writer.AddDocument(docNoFacet);
                }
                else if (groupValue == null)
                {
                    writer.AddDocument(docNoGroup);
                }
                else
                {
                    writer.AddDocument(doc);
                }
            }

            DirectoryReader reader = writer.GetReader();

            writer.Dispose();

            return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv));
        }
Example #8
0
        // NOTE: this is likely buggy, and cannot clone fields
        // with tokenStreamValues, etc.  Use at your own risk!!

        // TODO: is there a pre-existing way to do this!!!
        public static Document CloneDocument(Document doc1)
        {
            Document doc2 = new Document();

            foreach (IIndexableField f in doc1.Fields)
            {
                Field         field1 = (Field)f;
                Field         field2;
                DocValuesType dvType  = field1.FieldType.DocValueType;
                NumericType   numType = field1.FieldType.NumericType;
                if (dvType != DocValuesType.NONE)
                {
                    switch (dvType)
                    {
                    case DocValuesType.NUMERIC:
                        field2 = new NumericDocValuesField(field1.Name, field1.GetInt64Value().Value);
                        break;

                    case DocValuesType.BINARY:
                        field2 = new BinaryDocValuesField(field1.Name, field1.GetBinaryValue());
                        break;

                    case DocValuesType.SORTED:
                        field2 = new SortedDocValuesField(field1.Name, field1.GetBinaryValue());
                        break;

                    default:
                        throw IllegalStateException.Create("unknown Type: " + dvType);
                    }
                }
                else if (numType != NumericType.NONE)
                {
                    switch (numType)
                    {
                    case NumericType.INT32:
                        field2 = new Int32Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
                        break;

                    case NumericType.SINGLE:
                        field2 = new SingleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
                        break;

                    case NumericType.INT64:
                        field2 = new Int64Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
                        break;

                    case NumericType.DOUBLE:
                        field2 = new DoubleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
                        break;

                    default:
                        throw IllegalStateException.Create("unknown Type: " + numType);
                    }
                }
                else
                {
                    field2 = new Field(field1.Name, field1.GetStringValue(), field1.FieldType);
                }
                doc2.Add(field2);
            }

            return(doc2);
        }
Example #9
0
        public virtual void TestSortedBytesDocValuesField()
        {
            SortedDocValuesField field = new SortedDocValuesField("foo", new BytesRef("bar"));

            TrySetBoost(field);
            TrySetByteValue(field);
            field.BytesValue = "fubar".ToBytesRefArray(Encoding.UTF8);
            field.BytesValue = new BytesRef("baz");
            TrySetDoubleValue(field);
            TrySetIntValue(field);
            TrySetFloatValue(field);
            TrySetLongValue(field);
            TrySetReaderValue(field);
            TrySetShortValue(field);
            TrySetStringValue(field);
            TrySetTokenStreamValue(field);

            Assert.AreEqual(new BytesRef("baz"), field.BinaryValue());
        }