private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument) { Random random = Random; int numDocs = TestUtil.NextInt32(random, 138, 1145) * RandomMultiplier; int numGroups = TestUtil.NextInt32(random, 1, numDocs / 4); int numFacets = TestUtil.NextInt32(random, 1, numDocs / 6); if (Verbose) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <string> groups = new List <string>(); for (int i = 0; i < numGroups; i++) { groups.Add(GenerateRandomNonEmptyString()); } List <string> facetValues = new List <string>(); for (int i = 0; i < numFacets; i++) { facetValues.Add(GenerateRandomNonEmptyString()); } string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)]; if (Verbose) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++) { contentBrs[contentIDX] = GenerateRandomNonEmptyString(); if (Verbose) { Console.WriteLine(" content=" + contentBrs[contentIDX]); } } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random) ) ); bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = NewStringField("group", "", Field.Store.NO); Field groupDc = new SortedDocValuesField("group_dv", new BytesRef()); if (useDv) { doc.Add(groupDc); docNoFacet.Add(groupDc); } doc.Add(group); docNoFacet.Add(group); Field[] facetFields; if (useDv) { Debug.Assert(!multipleFacetValuesPerDocument); facetFields = new Field[2]; facetFields[0] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[0]); docNoGroup.Add(facetFields[0]); facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef()); doc.Add(facetFields[1]); docNoGroup.Add(facetFields[1]); } else { facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; for (int i = 0; i < facetFields.Length; i++) { facetFields[i] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[i]); docNoGroup.Add(facetFields[i]); } } Field content = NewStringField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); docNoFacet.Add(content); docNoGroupNoFacet.Add(content); ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => { if (a == b) { return(0); } else if (a == null) { return(-1); } else if (b == null) { return(1); } else { return(a.CompareToOrdinal(b)); } })); // LUCENENET NOTE: Need JCG.Dictionary here because of null keys IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >(); int facetWithMostGroups = 0; for (int i = 0; i < numDocs; i++) { string groupValue; if (random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: if (useDv) { groupValue = ""; } else { groupValue = null; } } else { groupValue = groups[random.nextInt(groups.size())]; } string contentStr = contentBrs[random.nextInt(contentBrs.Length)]; if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups)) { searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >(); } List <string> facetVals = new List <string>(); if (useDv || random.nextInt(24) != 18) { if (useDv) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetFields[0].SetStringValue(facetValue); facetFields[1].SetBytesValue(new BytesRef(facetValue)); facetVals.Add(facetValue); } else { foreach (Field facetField in facetFields) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetField.SetStringValue(facetValue); facetVals.Add(facetValue); } } } else { uniqueFacetValues.Add(null); if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet)) { facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } } if (Verbose) { Console.WriteLine(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + Collections.ToString(facetVals)); } if (groupValue != null) { if (useDv) { groupDc.SetBytesValue(new BytesRef(groupValue)); } group.SetStringValue(groupValue); } else if (useDv) { // DV cannot have missing values: groupDc.SetBytesValue(new BytesRef()); } content.SetStringValue(contentStr); if (groupValue == null && facetVals.Count == 0) { writer.AddDocument(docNoGroupNoFacet); } else if (facetVals.Count == 0) { writer.AddDocument(docNoFacet); } else if (groupValue == null) { writer.AddDocument(docNoGroup); } else { writer.AddDocument(doc); } } DirectoryReader reader = writer.GetReader(); writer.Dispose(); return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv)); }
private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument) { Random random = Random; int numDocs = TestUtil.NextInt32(random, 138, 1145) * RANDOM_MULTIPLIER; int numGroups = TestUtil.NextInt32(random, 1, numDocs / 4); int numFacets = TestUtil.NextInt32(random, 1, numDocs / 6); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <string> groups = new List <string>(); for (int i = 0; i < numGroups; i++) { groups.Add(GenerateRandomNonEmptyString()); } List <string> facetValues = new List <string>(); for (int i = 0; i < numFacets; i++) { facetValues.Add(GenerateRandomNonEmptyString()); } string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)]; if (VERBOSE) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++) { contentBrs[contentIDX] = GenerateRandomNonEmptyString(); if (VERBOSE) { Console.WriteLine(" content=" + contentBrs[contentIDX]); } } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random) ) ); bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = NewStringField("group", "", Field.Store.NO); Field groupDc = new SortedDocValuesField("group_dv", new BytesRef()); if (useDv) { doc.Add(groupDc); docNoFacet.Add(groupDc); } doc.Add(group); docNoFacet.Add(group); Field[] facetFields; if (useDv) { Debug.Assert(!multipleFacetValuesPerDocument); facetFields = new Field[2]; facetFields[0] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[0]); docNoGroup.Add(facetFields[0]); facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef()); doc.Add(facetFields[1]); docNoGroup.Add(facetFields[1]); } else { facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; for (int i = 0; i < facetFields.Length; i++) { facetFields[i] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[i]); docNoGroup.Add(facetFields[i]); } } Field content = NewStringField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); docNoFacet.Add(content); docNoGroupNoFacet.Add(content); // LUCENENET NOTE: TreeSet (the class used in Java) allows duplicate keys. However, SortedSet seems to work, // and based on the name of the variable, presuming the entries are meant to be unique. ISet <string> uniqueFacetValues = new SortedSet <string>(new ComparerAnonymousHelper1()); // LUCENENET NOTE: Need HashMap here because of null keys IDictionary <string, HashMap <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, HashMap <string, ISet <string> > >(); int facetWithMostGroups = 0; for (int i = 0; i < numDocs; i++) { string groupValue; if (random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: if (useDv) { groupValue = ""; } else { groupValue = null; } } else { groupValue = groups[random.nextInt(groups.size())]; } string contentStr = contentBrs[random.nextInt(contentBrs.Length)]; if (!searchTermToFacetToGroups.ContainsKey(contentStr)) { searchTermToFacetToGroups[contentStr] = new HashMap <string, ISet <string> >(); } IDictionary <string, ISet <string> > facetToGroups = searchTermToFacetToGroups[contentStr]; List <string> facetVals = new List <string>(); if (useDv || random.nextInt(24) != 18) { if (useDv) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.ContainsKey(facetValue)) { facetToGroups[facetValue] = new HashSet <string>(); } ISet <string> groupsInFacet = facetToGroups[facetValue]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetFields[0].SetStringValue(facetValue); facetFields[1].SetBytesValue(new BytesRef(facetValue)); facetVals.Add(facetValue); } else { foreach (Field facetField in facetFields) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.ContainsKey(facetValue)) { facetToGroups[facetValue] = new HashSet <string>(); } ISet <string> groupsInFacet = facetToGroups[facetValue]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetField.SetStringValue(facetValue); facetVals.Add(facetValue); } } } else { uniqueFacetValues.Add(null); if (!facetToGroups.ContainsKey(null)) { facetToGroups.Put(null, new HashSet <string>()); } ISet <string> groupsInFacet = facetToGroups[null]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } } if (VERBOSE) { Console.WriteLine(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals); } if (groupValue != null) { if (useDv) { groupDc.SetBytesValue(new BytesRef(groupValue)); } group.SetStringValue(groupValue); } else if (useDv) { // DV cannot have missing values: groupDc.SetBytesValue(new BytesRef()); } content.SetStringValue(contentStr); if (groupValue == null && !facetVals.Any()) { writer.AddDocument(docNoGroupNoFacet); } else if (!facetVals.Any()) { writer.AddDocument(docNoFacet); } else if (groupValue == null) { writer.AddDocument(docNoGroup); } else { writer.AddDocument(doc); } } DirectoryReader reader = writer.GetReader(); writer.Dispose(); return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv)); }