private IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <T> > CreateDistinctCountCollector <T>(IAbstractFirstPassGroupingCollector <T> firstPassGroupingCollector, string groupField, string countField, DocValuesType dvType) { Random random = Random; IEnumerable <ISearchGroup <T> > searchGroups = firstPassGroupingCollector.GetTopGroups(0, false); if (typeof(FunctionFirstPassGroupingCollector).IsAssignableFrom(firstPassGroupingCollector.GetType())) { return((IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <T> >) new FunctionDistinctValuesCollector(new Hashtable(), new BytesRefFieldSource(groupField), new BytesRefFieldSource(countField), searchGroups as IEnumerable <ISearchGroup <MutableValue> >)); } else { return((IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <T> >) new TermDistinctValuesCollector(groupField, countField, searchGroups as IEnumerable <ISearchGroup <BytesRef> >)); } }
public virtual void TestSimple() { Random random = Random; DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.NUMERIC, DocValuesType.BINARY, DocValuesType.SORTED, }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; Document doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random text", Field.Store.NO)); doc.Add(new StringField("id", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text blob", Field.Store.NO)); doc.Add(new StringField("id", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "2", dvType); doc.Add(new TextField("content", "some more random textual data", Field.Store.NO)); doc.Add(new StringField("id", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddField(doc, groupField, "2", dvType); doc.Add(new TextField("content", "some random text", Field.Store.NO)); doc.Add(new StringField("id", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text", Field.Store.NO)); doc.Add(new StringField("id", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random blob", Field.Store.NO)); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); AddField(doc, countField, "1", dvType); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.GetReader()); w.Dispose(); var cmp = new ComparerAnonymousHelper1(this); // === Search for content:random IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector); //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; // LUCENENET TODO: Try to work out how to do this without an O(n) operation var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(4, gcs.Count); CompareNull(gcs[0].GroupValue); List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); Compare("1", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); countValues.Sort(nullComparer); assertEquals(2, countValues.size()); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[3].GroupValue); countValues = new List <IComparable>(gcs[3].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:some firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(3, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(2, countValues.size()); countValues.Sort(nullComparer); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:blob firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(2, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); // B/c the only one document matched with blob inside the author 1 group assertEquals(1, countValues.Count); Compare("1", countValues[0]); Compare("3", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.Count); Compare("1", countValues[0]); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { Random random = Random; int numberOfRuns = TestUtil.NextInt32(random, 3, 6); for (int indexIter = 0; indexIter < numberOfRuns; indexIter++) { IndexContext context = CreateIndexContext(); for (int searchIter = 0; searchIter < 100; searchIter++) { IndexSearcher searcher = NewSearcher(context.indexReader); bool useDv = context.dvType != DocValuesType.NONE && random.nextBoolean(); DocValuesType dvType = useDv ? context.dvType : DocValuesType.NONE; string term = context.contentStrings[random.nextInt(context.contentStrings.Length)]; Sort groupSort = new Sort(new SortField("id", SortFieldType.STRING)); int topN = 1 + random.nextInt(10); List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > expectedResult = CreateExpectedResult(context, term, groupSort, topN); IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, groupSort, groupField, topN); searcher.Search(new TermQuery(new Term("content", term)), firstCollector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); searcher.Search(new TermQuery(new Term("content", term)), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > actualResult = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); if (VERBOSE) { Console.WriteLine("Index iter=" + indexIter); Console.WriteLine("Search iter=" + searchIter); Console.WriteLine("1st pass collector class name=" + firstCollector.GetType().Name); Console.WriteLine("2nd pass collector class name=" + distinctValuesCollector.GetType().Name); Console.WriteLine("Search term=" + term); Console.WriteLine("DVType=" + dvType); Console.WriteLine("1st pass groups=" + firstCollector.GetTopGroups(0, false).toString()); Console.WriteLine("Expected:"); PrintGroups(expectedResult); Console.WriteLine("Actual:"); PrintGroups(actualResult); Console.Out.Flush(); } assertEquals(expectedResult.Count, actualResult.Count); for (int i = 0; i < expectedResult.size(); i++) { AbstractDistinctValuesCollector.IGroupCount <IComparable> expected = expectedResult[i]; AbstractDistinctValuesCollector.IGroupCount <IComparable> actual = actualResult[i]; AssertValues(expected.GroupValue, actual.GroupValue); assertEquals(expected.UniqueValues.Count(), actual.UniqueValues.Count()); List <IComparable> expectedUniqueValues = new List <IComparable>(expected.UniqueValues); expectedUniqueValues.Sort(nullComparer); List <IComparable> actualUniqueValues = new List <IComparable>(actual.UniqueValues); actualUniqueValues.Sort(nullComparer); for (int j = 0; j < expectedUniqueValues.size(); j++) { AssertValues(expectedUniqueValues[j], actualUniqueValues[j]); } } } context.indexReader.Dispose(); context.directory.Dispose(); } }