public virtual void TestSimple() { Random random = Random(); FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[] { FieldInfo.DocValuesType_e.NUMERIC, FieldInfo.DocValuesType_e.BINARY, FieldInfo.DocValuesType_e.SORTED, }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())); bool canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); FieldInfo.DocValuesType_e?dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null; Document doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random text", Field.Store.NO)); doc.Add(new StringField("id", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text blob", Field.Store.NO)); doc.Add(new StringField("id", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "2", dvType); doc.Add(new TextField("content", "some more random textual data", Field.Store.NO)); doc.Add(new StringField("id", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddField(doc, groupField, "2", dvType); doc.Add(new TextField("content", "some random text", Field.Store.NO)); doc.Add(new StringField("id", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text", Field.Store.NO)); doc.Add(new StringField("id", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random blob", Field.Store.NO)); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); AddField(doc, countField, "1", dvType); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.Reader); w.Dispose(); var cmp = new ComparerAnonymousHelper1(this); // === Search for content:random IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from // so this cast is not necessary. Consider eliminating the Collector abstract class. indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector as Collector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType.GetValueOrDefault()); // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from // so this cast is not necessary. Consider eliminating the Collector abstract class. indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector as Collector); //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; // LUCENENET TODO: Try to work out how to do this without an O(n) operation var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(4, gcs.Count); CompareNull(gcs[0].GroupValue); List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); Compare("1", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); countValues.Sort(nullComparator); assertEquals(2, countValues.size()); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[3].GroupValue); countValues = new List <IComparable>(gcs[3].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:some firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from // so this cast is not necessary. Consider eliminating the Collector abstract class. indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector as Collector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from // so this cast is not necessary. Consider eliminating the Collector abstract class. indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector as Collector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(3, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(2, countValues.size()); countValues.Sort(nullComparator); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:blob firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from // so this cast is not necessary. Consider eliminating the Collector abstract class. indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector as Collector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from // so this cast is not necessary. Consider eliminating the Collector abstract class. indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector as Collector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(2, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); // B/c the only one document matched with blob inside the author 1 group assertEquals(1, countValues.Count); Compare("1", countValues[0]); Compare("3", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.Count); Compare("1", countValues[0]); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
private void DoTest(FieldInfo.DocValuesType_e type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case FieldInfo.DocValuesType_e.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case FieldInfo.DocValuesType_e.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case FieldInfo.DocValuesType_e.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw new InvalidOperationException(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random(), d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.LongValue = i; switch (type) { case FieldInfo.DocValuesType_e.SORTED: case FieldInfo.DocValuesType_e.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random(), 20); } while (((string)vals[i]).Length == 0); f.BytesValue = new BytesRef((string)vals[i]); break; case FieldInfo.DocValuesType_e.NUMERIC: int bitsPerValue = Random().NextIntBetween(1, 31); // keep it an int vals[i] = (long)Random().Next((int)PackedInts.MaxValue(bitsPerValue)); f.LongValue = (long)vals[i]; break; } iw.AddDocument(document); if (Random().NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new LongFieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case FieldInfo.DocValuesType_e.BINARY: case FieldInfo.DocValuesType_e.SORTED: vs = new BytesRefFieldSource("dv"); break; case FieldInfo.DocValuesType_e.NUMERIC: vs = new LongFieldSource("dv"); break; default: throw new InvalidOperationException(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is LongFieldSource) { assertTrue(values.ObjectVal(i) is long?); assertTrue(values.BytesVal(i, bytes)); } else { throw new InvalidOperationException(); } object expected = vals[ids.IntVal(i)]; switch (type) { case FieldInfo.DocValuesType_e.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd() >= 1); goto case FieldInfo.DocValuesType_e.BINARY; case FieldInfo.DocValuesType_e.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case FieldInfo.DocValuesType_e.NUMERIC: assertEquals(Number.ToInt64(expected.ToString()), values.LongVal(i)); break; } } } rd.Dispose(); d.Dispose(); }
private IndexContext CreateIndexContext() { Random random = Random(); FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[] { FieldInfo.DocValuesType_e.BINARY, FieldInfo.DocValuesType_e.SORTED }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()) ); bool canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); FieldInfo.DocValuesType_e?dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null; int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER; string[] groupValues = new string[numDocs / 5]; string[] countValues = new string[numDocs / 10]; for (int i = 0; i < groupValues.Length; i++) { groupValues[i] = GenerateRandomNonEmptyString(); } for (int i = 0; i < countValues.Length; i++) { countValues[i] = GenerateRandomNonEmptyString(); } List <string> contentStrings = new List <string>(); IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new HashMap <string, IDictionary <string, ISet <string> > >(); for (int i = 1; i <= numDocs; i++) { string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)]; string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)]; string content = "random" + random.nextInt(numDocs / 20); IDictionary <string, ISet <string> > groupToCounts; if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts)) { // Groups sort always DOCID asc... searchTermToGroupCounts.Add(content, groupToCounts = new LinkedHashMap <string, ISet <string> >()); contentStrings.Add(content); } ISet <string> countsVals; if (!groupToCounts.TryGetValue(groupValue, out countsVals)) { groupToCounts.Add(groupValue, countsVals = new HashSet <string>()); } countsVals.Add(countValue); Document doc = new Document(); doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES)); if (groupValue != null) { AddField(doc, groupField, groupValue, dvType); } if (countValue != null) { AddField(doc, countField, countValue, dvType); } doc.Add(new TextField("content", content, Field.Store.YES)); w.AddDocument(doc); } DirectoryReader reader = w.Reader; if (VERBOSE) { for (int docID = 0; docID < reader.MaxDoc; docID++) { Document doc = reader.Document(docID); Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher")); } } w.Dispose(); return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/))); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { CodecUtil.CheckHeader(input, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_START, Lucene42FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt(); byte bits = input.ReadByte(); bool isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0; FieldInfo.IndexOptions indexOptions; if (!isIndexed) { indexOptions = default(FieldInfo.IndexOptions); } else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; } else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte byte val = input.ReadByte(); FieldInfo.DocValuesType_e docValuesType = GetDocValuesType(input, (sbyte)(val & 0x0F)); FieldInfo.DocValuesType_e normsType = GetDocValuesType(input, (sbyte)(((int)((uint)val >> 4)) & 0x0F)); IDictionary <string, string> attributes = input.ReadStringStringMap(); infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, CollectionsHelper.UnmodifiableMap(attributes)); } CodecUtil.CheckEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.CloseWhileHandlingException(input); } } }
private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, FieldInfo.DocValuesType_e valueType) { doc.Add(new TextField(groupField, value, Field.Store.YES)); if (canUseIDV) { Field valuesField = null; switch (valueType) { case FieldInfo.DocValuesType_e.BINARY: valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value)); break; case FieldInfo.DocValuesType_e.SORTED: valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value)); break; default: fail("unhandled type"); break; } doc.Add(valuesField); } }
private AbstractAllGroupHeadsCollector CreateRandomCollector(string groupField, Sort sortWithinGroup, bool canUseIDV, FieldInfo.DocValuesType_e valueType) { AbstractAllGroupHeadsCollector collector; if (Random().nextBoolean()) { ValueSource vs = new BytesRefFieldSource(groupField); collector = new FunctionAllGroupHeadsCollector(vs, new Hashtable(), sortWithinGroup); } else { collector = TermAllGroupHeadsCollector.Create(groupField, sortWithinGroup); } if (VERBOSE) { Console.WriteLine("Selected implementation: " + collector.GetType().Name); } return(collector); }
public void TestBasic() { string groupField = "author"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); bool canUseIDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); FieldInfo.DocValuesType_e valueType = vts[Random().nextInt(vts.Length)]; // 0 Document doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "1", Field.Store.NO)); doc.Add(NewStringField("id_2", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO)); doc.Add(NewStringField("id_1", "2", Field.Store.NO)); doc.Add(NewStringField("id_2", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO)); doc.Add(NewStringField("id_1", "3", Field.Store.NO)); doc.Add(NewStringField("id_2", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddGroupField(doc, groupField, "author2", canUseIDV, valueType); doc.Add(NewTextField("content", "some random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "4", Field.Store.NO)); doc.Add(NewStringField("id_2", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "5", Field.Store.NO)); doc.Add(NewStringField("id_2", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV, valueType); doc.Add(NewTextField("content", "random blob", Field.Store.NO)); doc.Add(NewStringField("id_1", "6", Field.Store.NO)); doc.Add(NewStringField("id_2", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO)); doc.Add(NewStringField("id_1", "6", Field.Store.NO)); doc.Add(NewStringField("id_2", "6", Field.Store.NO)); w.AddDocument(doc); // 7 -- no author field doc = new Document(); doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO)); doc.Add(NewStringField("id_1", "7", Field.Store.NO)); doc.Add(NewStringField("id_2", "7", Field.Store.NO)); w.AddDocument(doc); IndexReader reader = w.Reader; IndexSearcher indexSearcher = NewSearcher(reader); w.Dispose(); int maxDoc = reader.MaxDoc; Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type_e.INT, true)); var allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); // STRING sort type triggers different implementation Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type_e.STRING, true)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type_e.STRING, false)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); // 7 b/c higher doc id wins, even if order of field is in not in reverse. assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public void TestRandom() { int numberOfRuns = TestUtil.NextInt(Random(), 3, 6); for (int iter = 0; iter < numberOfRuns; iter++) { if (VERBOSE) { Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns)); } int numDocs = TestUtil.NextInt(Random(), 100, 1000) * RANDOM_MULTIPLIER; int numGroups = TestUtil.NextInt(Random(), 1, numDocs); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <BytesRef> groups = new List <BytesRef>(); for (int i = 0; i < numGroups; i++) { string randomValue; do { // B/c of DV based impl we can't see the difference between an empty string and a null value. // For that reason we don't generate empty string groups. randomValue = TestUtil.RandomRealisticUnicodeString(Random()); } while ("".Equals(randomValue, StringComparison.Ordinal)); groups.Add(new BytesRef(randomValue)); } string[] contentStrings = new string[TestUtil.NextInt(Random(), 2, 20)]; if (VERBOSE) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); sb.append("real").append(Random().nextInt(3)).append(' '); int fakeCount = Random().nextInt(10); for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) { sb.append("fake "); } contentStrings[contentIDX] = sb.toString(); if (VERBOSE) { Console.WriteLine(" content=" + sb.toString()); } } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); bool preFlex = "Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); bool canUseIDV = !preFlex; FieldInfo.DocValuesType_e valueType = vts[Random().nextInt(vts.Length)]; Document doc = new Document(); Document docNoGroup = new Document(); Field group = NewStringField("group", "", Field.Store.NO); doc.Add(group); Field valuesField = null; if (canUseIDV) { switch (valueType) { case FieldInfo.DocValuesType_e.BINARY: valuesField = new BinaryDocValuesField("group_dv", new BytesRef()); break; case FieldInfo.DocValuesType_e.SORTED: valuesField = new SortedDocValuesField("group_dv", new BytesRef()); break; default: fail("unhandled type"); break; } doc.Add(valuesField); } Field sort1 = NewStringField("sort1", "", Field.Store.NO); doc.Add(sort1); docNoGroup.Add(sort1); Field sort2 = NewStringField("sort2", "", Field.Store.NO); doc.Add(sort2); docNoGroup.Add(sort2); Field sort3 = NewStringField("sort3", "", Field.Store.NO); doc.Add(sort3); docNoGroup.Add(sort3); Field content = NewTextField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); IntField id = new IntField("id", 0, Field.Store.NO); doc.Add(id); docNoGroup.Add(id); GroupDoc[] groupDocs = new GroupDoc[numDocs]; for (int i = 0; i < numDocs; i++) { BytesRef groupValue; if (Random().nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: groupValue = null; } else { groupValue = groups[Random().nextInt(groups.size())]; } GroupDoc groupDoc = new GroupDoc( i, groupValue, groups[Random().nextInt(groups.size())], groups[Random().nextInt(groups.size())], new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)), contentStrings[Random().nextInt(contentStrings.Length)] ); if (VERBOSE) { Console.WriteLine(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString()); } groupDocs[i] = groupDoc; if (groupDoc.group != null) { group.StringValue = (groupDoc.group.Utf8ToString()); if (canUseIDV) { valuesField.BytesValue = (new BytesRef(groupDoc.group.Utf8ToString())); } } sort1.StringValue = (groupDoc.sort1.Utf8ToString()); sort2.StringValue = (groupDoc.sort2.Utf8ToString()); sort3.StringValue = (groupDoc.sort3.Utf8ToString()); content.StringValue = (groupDoc.content); id.IntValue = (groupDoc.id); if (groupDoc.group == null) { w.AddDocument(docNoGroup); } else { w.AddDocument(doc); } } DirectoryReader r = w.Reader; w.Dispose(); // NOTE: intentional but temporary field cache insanity! FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false); int[] fieldIdToDocID = new int[numDocs]; for (int i = 0; i < numDocs; i++) { int fieldId = docIdToFieldId.Get(i); fieldIdToDocID[fieldId] = i; } try { IndexSearcher s = NewSearcher(r); if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(s.IndexReader.GetType())) { canUseIDV = false; } else { canUseIDV = !preFlex; } for (int contentID = 0; contentID < 3; contentID++) { ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs; foreach (ScoreDoc hit in hits) { GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)]; assertTrue(gd.score == 0.0); gd.score = hit.Score; int docId = gd.id; assertEquals(docId, docIdToFieldId.Get(hit.Doc)); } } foreach (GroupDoc gd in groupDocs) { assertTrue(gd.score != 0.0); } for (int searchIter = 0; searchIter < 100; searchIter++) { if (VERBOSE) { Console.WriteLine("TEST: searchIter=" + searchIter); } string searchTerm = "real" + Random().nextInt(3); bool sortByScoreOnly = Random().nextBoolean(); Sort sortWithinGroup = GetRandomSort(sortByScoreOnly); AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType); s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector); int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID); int[] actualGroupHeads = allGroupHeadsCollector.RetrieveGroupHeads(); // The actual group heads contains Lucene ids. Need to change them into our id value. for (int i = 0; i < actualGroupHeads.Length; i++) { actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]); } // Allows us the easily iterate and assert the actual and expected results. Array.Sort(expectedGroupHeads); Array.Sort(actualGroupHeads); if (VERBOSE) { Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name); Console.WriteLine("Sort within group: " + sortWithinGroup); Console.WriteLine("Num group: " + numGroups); Console.WriteLine("Num doc: " + numDocs); Console.WriteLine("\n=== Expected: \n"); foreach (int expectedDocId in expectedGroupHeads) { GroupDoc expectedGroupDoc = groupDocs[expectedDocId]; string expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(), expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId) ); } Console.WriteLine("\n=== Actual: \n"); foreach (int actualDocId in actualGroupHeads) { GroupDoc actualGroupDoc = groupDocs[actualDocId]; string actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(), actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId) ); } Console.WriteLine("\n==================================================================================="); } assertArrayEquals(expectedGroupHeads, actualGroupHeads); } } finally { QueryUtils.PurgeFieldCache(r); } r.Dispose(); dir.Dispose(); } }