示例#1
0
        public virtual void TestIntFieldCache()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            cfg.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw    = new RandomIndexWriter(Random, dir, cfg);
            Document          doc   = new Document();
            Int32Field        field = new Int32Field("f", 0, Field.Store.YES);

            doc.Add(field);
            int[] values = new int[TestUtil.NextInt32(Random, 1, 10)];
            for (int i = 0; i < values.Length; ++i)
            {
                int v;
                switch (Random.Next(10))
                {
                case 0:
                    v = int.MinValue;
                    break;

                case 1:
                    v = 0;
                    break;

                case 2:
                    v = int.MaxValue;
                    break;

                default:
                    v = TestUtil.NextInt32(Random, -10, 10);
                    break;
                }
                values[i] = v;
                if (v == 0 && Random.NextBoolean())
                {
                    // missing
                    iw.AddDocument(new Document());
                }
                else
                {
                    field.SetInt32Value(v);
                    iw.AddDocument(doc);
                }
            }
            iw.ForceMerge(1);
            DirectoryReader reader = iw.GetReader();
            Int32s          ints   = FieldCache.DEFAULT.GetInt32s(GetOnlySegmentReader(reader), "f", false);

            for (int i = 0; i < values.Length; ++i)
            {
                Assert.AreEqual(values[i], ints.Get(i));
            }
            reader.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
示例#2
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            NoDocs    = AtLeast(4096);
            Distance  = (1 << 30) / NoDocs;
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 100, 1000)).SetMergePolicy(NewLogMergePolicy()));

            FieldType storedInt = new FieldType(Int32Field.TYPE_NOT_STORED);

            storedInt.IsStored = true;
            storedInt.Freeze();

            FieldType storedInt8 = new FieldType(storedInt);

            storedInt8.NumericPrecisionStep = 8;

            FieldType storedInt4 = new FieldType(storedInt);

            storedInt4.NumericPrecisionStep = 4;

            FieldType storedInt2 = new FieldType(storedInt);

            storedInt2.NumericPrecisionStep = 2;

            FieldType storedIntNone = new FieldType(storedInt);

            storedIntNone.NumericPrecisionStep = int.MaxValue;

            FieldType unstoredInt = Int32Field.TYPE_NOT_STORED;

            FieldType unstoredInt8 = new FieldType(unstoredInt);

            unstoredInt8.NumericPrecisionStep = 8;

            FieldType unstoredInt4 = new FieldType(unstoredInt);

            unstoredInt4.NumericPrecisionStep = 4;

            FieldType unstoredInt2 = new FieldType(unstoredInt);

            unstoredInt2.NumericPrecisionStep = 2;

            Int32Field field8 = new Int32Field("field8", 0, storedInt8), field4 = new Int32Field("field4", 0, storedInt4), field2 = new Int32Field("field2", 0, storedInt2), fieldNoTrie = new Int32Field("field" + int.MaxValue, 0, storedIntNone), ascfield8 = new Int32Field("ascfield8", 0, unstoredInt8), ascfield4 = new Int32Field("ascfield4", 0, unstoredInt4), ascfield2 = new Int32Field("ascfield2", 0, unstoredInt2);

            Document doc = new Document();

            // add fields, that have a distance to test general functionality
            doc.Add(field8);
            doc.Add(field4);
            doc.Add(field2);
            doc.Add(fieldNoTrie);
            // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
            doc.Add(ascfield8);
            doc.Add(ascfield4);
            doc.Add(ascfield2);

            // Add a series of noDocs docs with increasing int values
            for (int l = 0; l < NoDocs; l++)
            {
                int val = Distance * l + StartOffset;
                field8.SetInt32Value(val);
                field4.SetInt32Value(val);
                field2.SetInt32Value(val);
                fieldNoTrie.SetInt32Value(val);

                val = l - (NoDocs / 2);
                ascfield8.SetInt32Value(val);
                ascfield4.SetInt32Value(val);
                ascfield2.SetInt32Value(val);
                writer.AddDocument(doc);
            }

            Reader   = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
        public void TestRandom()
        {
            int numberOfRuns = TestUtil.NextInt32(Random, 3, 6);

            for (int iter = 0; iter < numberOfRuns; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
                }

                int numDocs   = TestUtil.NextInt32(Random, 100, 1000) * RANDOM_MULTIPLIER;
                int numGroups = TestUtil.NextInt32(Random, 1, numDocs);

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
                }

                List <BytesRef> groups = new List <BytesRef>();
                for (int i = 0; i < numGroups; i++)
                {
                    string randomValue;
                    do
                    {
                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
                        // For that reason we don't generate empty string groups.
                        randomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    } while ("".Equals(randomValue, StringComparison.Ordinal));
                    groups.Add(new BytesRef(randomValue));
                }
                string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)];
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: create fake content");
                }
                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.append("real").append(Random.nextInt(3)).append(' ');
                    int fakeCount = Random.nextInt(10);
                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
                    {
                        sb.append("fake ");
                    }
                    contentStrings[contentIDX] = sb.toString();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + sb.toString());
                    }
                }

                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
                    Random,
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                         new MockAnalyzer(Random)));
                bool          preFlex   = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
                bool          canUseIDV = !preFlex;
                DocValuesType valueType = vts[Random.nextInt(vts.Length)];

                Document doc        = new Document();
                Document docNoGroup = new Document();
                Field    group      = NewStringField("group", "", Field.Store.NO);
                doc.Add(group);
                Field valuesField = null;
                if (canUseIDV)
                {
                    switch (valueType)
                    {
                    case DocValuesType.BINARY:
                        valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
                        break;

                    case DocValuesType.SORTED:
                        valuesField = new SortedDocValuesField("group_dv", new BytesRef());
                        break;

                    default:
                        fail("unhandled type");
                        break;
                    }
                    doc.Add(valuesField);
                }
                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
                doc.Add(sort1);
                docNoGroup.Add(sort1);
                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
                doc.Add(sort2);
                docNoGroup.Add(sort2);
                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
                doc.Add(sort3);
                docNoGroup.Add(sort3);
                Field content = NewTextField("content", "", Field.Store.NO);
                doc.Add(content);
                docNoGroup.Add(content);
                Int32Field id = new Int32Field("id", 0, Field.Store.NO);
                doc.Add(id);
                docNoGroup.Add(id);
                GroupDoc[] groupDocs = new GroupDoc[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    BytesRef groupValue;
                    if (Random.nextInt(24) == 17)
                    {
                        // So we test the "doc doesn't have the group'd
                        // field" case:
                        groupValue = null;
                    }
                    else
                    {
                        groupValue = groups[Random.nextInt(groups.size())];
                    }

                    GroupDoc groupDoc = new GroupDoc(
                        i,
                        groupValue,
                        groups[Random.nextInt(groups.size())],
                        groups[Random.nextInt(groups.size())],
                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
                        contentStrings[Random.nextInt(contentStrings.Length)]
                        );

                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
                    }

                    groupDocs[i] = groupDoc;
                    if (groupDoc.group != null)
                    {
                        group.SetStringValue(groupDoc.group.Utf8ToString());
                        if (canUseIDV)
                        {
                            valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString()));
                        }
                    }
                    sort1.SetStringValue(groupDoc.sort1.Utf8ToString());
                    sort2.SetStringValue(groupDoc.sort2.Utf8ToString());
                    sort3.SetStringValue(groupDoc.sort3.Utf8ToString());
                    content.SetStringValue(groupDoc.content);
                    id.SetInt32Value(groupDoc.id);
                    if (groupDoc.group == null)
                    {
                        w.AddDocument(docNoGroup);
                    }
                    else
                    {
                        w.AddDocument(doc);
                    }
                }

                DirectoryReader r = w.GetReader();
                w.Dispose();

                // NOTE: intentional but temporary field cache insanity!
                FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);
                int[]             fieldIdToDocID = new int[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    int fieldId = docIdToFieldId.Get(i);
                    fieldIdToDocID[fieldId] = i;
                }

                try
                {
                    IndexSearcher s = NewSearcher(r);
                    if (typeof(SlowCompositeReaderWrapper).GetTypeInfo().IsAssignableFrom(s.IndexReader.GetType()))
                    {
                        canUseIDV = false;
                    }
                    else
                    {
                        canUseIDV = !preFlex;
                    }

                    for (int contentID = 0; contentID < 3; contentID++)
                    {
                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
                        foreach (ScoreDoc hit in hits)
                        {
                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
                            assertTrue(gd.score == 0.0);
                            gd.score = hit.Score;
                            int docId = gd.id;
                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
                        }
                    }

                    foreach (GroupDoc gd in groupDocs)
                    {
                        assertTrue(gd.score != 0.0);
                    }

                    for (int searchIter = 0; searchIter < 100; searchIter++)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: searchIter=" + searchIter);
                        }

                        string searchTerm      = "real" + Random.nextInt(3);
                        bool   sortByScoreOnly = Random.nextBoolean();
                        Sort   sortWithinGroup = GetRandomSort(sortByScoreOnly);
                        AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
                        int[] actualGroupHeads   = allGroupHeadsCollector.RetrieveGroupHeads();
                        // The actual group heads contains Lucene ids. Need to change them into our id value.
                        for (int i = 0; i < actualGroupHeads.Length; i++)
                        {
                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
                        }
                        // Allows us the easily iterate and assert the actual and expected results.
                        Array.Sort(expectedGroupHeads);
                        Array.Sort(actualGroupHeads);

                        if (VERBOSE)
                        {
                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
                            Console.WriteLine("Sort within group: " + sortWithinGroup);
                            Console.WriteLine("Num group: " + numGroups);
                            Console.WriteLine("Num doc: " + numDocs);
                            Console.WriteLine("\n=== Expected: \n");
                            foreach (int expectedDocId in expectedGroupHeads)
                            {
                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                                string   expectedGroup    = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
                                                  expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
                                    );
                            }
                            Console.WriteLine("\n=== Actual: \n");
                            foreach (int actualDocId in actualGroupHeads)
                            {
                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
                                string   actualGroup    = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
                                                  actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
                                    );
                            }
                            Console.WriteLine("\n===================================================================================");
                        }

                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
                    }
                }
                finally
                {
                    QueryUtils.PurgeFieldCache(r);
                }

                r.Dispose();
                dir.Dispose();
            }
        }
示例#4
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestWriteReadMerge()
        {
            // get another codec, other than the default: so we are merging segments across different codecs
            Codec otherCodec;

            if ("SimpleText".Equals(Codec.Default.Name, StringComparison.Ordinal))
            {
                otherCodec = new Lucene46Codec();
            }
            else
            {
                otherCodec = new SimpleTextCodec();
            }
            Directory         dir    = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone());

            int docCount = AtLeast(200);
            var data     = new byte[docCount][][];

            for (int i = 0; i < docCount; ++i)
            {
                int fieldCount = Rarely() ? RandomInts.NextIntBetween(Random(), 1, 500) : RandomInts.NextIntBetween(Random(), 1, 5);
                data[i] = new byte[fieldCount][];
                for (int j = 0; j < fieldCount; ++j)
                {
                    int length = Rarely() ? Random().Next(1000) : Random().Next(10);
                    int max    = Rarely() ? 256 : 2;
                    data[i][j] = RandomByteArray(length, max);
                }
            }

            FieldType type = new FieldType(StringField.TYPE_STORED);

            type.IsIndexed = false;
            type.Freeze();
            Int32Field id = new Int32Field("id", 0, Field.Store.YES);

            for (int i = 0; i < data.Length; ++i)
            {
                Document doc = new Document();
                doc.Add(id);
                id.SetInt32Value(i);
                for (int j = 0; j < data[i].Length; ++j)
                {
                    Field f = new Field("bytes" + j, data[i][j], type);
                    doc.Add(f);
                }
                iw.w.AddDocument(doc);
                if (Random().NextBoolean() && (i % (data.Length / 10) == 0))
                {
                    iw.w.Dispose();
                    // test merging against a non-compressing codec
                    if (iwConf.Codec == otherCodec)
                    {
                        iwConf.SetCodec(Codec.Default);
                    }
                    else
                    {
                        iwConf.SetCodec(otherCodec);
                    }
                    iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone());
                }
            }

            for (int i = 0; i < 10; ++i)
            {
                int min = Random().Next(data.Length);
                int max = min + Random().Next(20);
                iw.DeleteDocuments(NumericRangeQuery.NewInt32Range("id", min, max, true, false));
            }

            iw.ForceMerge(2); // force merges with deletions

            iw.Commit();

            DirectoryReader ir = DirectoryReader.Open(dir);

            Assert.IsTrue(ir.NumDocs > 0);
            int numDocs = 0;

            for (int i = 0; i < ir.MaxDoc; ++i)
            {
                Document doc = ir.Document(i);
                if (doc == null)
                {
                    continue;
                }
                ++numDocs;
                int docId = (int)doc.GetField("id").GetNumericValue();
                Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count);
                for (int j = 0; j < data[docId].Length; ++j)
                {
                    var      arr     = data[docId][j];
                    BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j);
                    var      arr2    = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length);
                    Assert.AreEqual(arr, arr2);
                }
            }
            Assert.IsTrue(ir.NumDocs <= numDocs);
            ir.Dispose();

            iw.DeleteAll();
            iw.Commit();
            iw.ForceMerge(1);

            iw.Dispose();
            dir.Dispose();
        }