private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, DocValuesType valueType) { doc.Add(new TextField(groupField, value, Field.Store.YES)); if (canUseIDV) { Field valuesField = null; switch (valueType) { case DocValuesType.BINARY: valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value)); break; case DocValuesType.SORTED: valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value)); break; default: fail("unhandled type"); break; } doc.Add(valuesField); } }
public DocState(bool useDocValues) { Doc = new Document(); Title = new StringField("title", "", Field.Store.NO); Doc.Add(Title); FieldType ft = new FieldType(TextField.TYPE_STORED) { StoreTermVectors = true, StoreTermVectorOffsets = true, StoreTermVectorPositions = true }; TitleTokenized = new Field("titleTokenized", "", ft); Doc.Add(TitleTokenized); Body = new Field("body", "", ft); Doc.Add(Body); Id = new StringField("docid", "", Field.Store.YES); Doc.Add(Id); Date = new StringField("date", "", Field.Store.YES); Doc.Add(Date); if (useDocValues) { TitleDV = new SortedDocValuesField("titleDV", new BytesRef()); Doc.Add(TitleDV); } else { TitleDV = null; } }
public virtual void TestFixedSorted([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedSorted")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); Document doc = new Document(); var bytes = new byte[2]; BytesRef data = new BytesRef(bytes); SortedDocValuesField dvField = new SortedDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 8); bytes[1] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetSortedDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 8); bytes[1] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public void TestRandom() { int numberOfRuns = TestUtil.NextInt32(Random, 3, 6); for (int iter = 0; iter < numberOfRuns; iter++) { if (VERBOSE) { Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns)); } int numDocs = TestUtil.NextInt32(Random, 100, 1000) * RANDOM_MULTIPLIER; int numGroups = TestUtil.NextInt32(Random, 1, numDocs); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <BytesRef> groups = new List <BytesRef>(); for (int i = 0; i < numGroups; i++) { string randomValue; do { // B/c of DV based impl we can't see the difference between an empty string and a null value. // For that reason we don't generate empty string groups. randomValue = TestUtil.RandomRealisticUnicodeString(Random); } while ("".Equals(randomValue, StringComparison.Ordinal)); groups.Add(new BytesRef(randomValue)); } string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)]; if (VERBOSE) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); sb.append("real").append(Random.nextInt(3)).append(' '); int fakeCount = Random.nextInt(10); for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) { sb.append("fake "); } contentStrings[contentIDX] = sb.toString(); if (VERBOSE) { Console.WriteLine(" content=" + sb.toString()); } } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); bool preFlex = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool canUseIDV = !preFlex; DocValuesType valueType = vts[Random.nextInt(vts.Length)]; Document doc = new Document(); Document docNoGroup = new Document(); Field group = NewStringField("group", "", Field.Store.NO); doc.Add(group); Field valuesField = null; if (canUseIDV) { switch (valueType) { case DocValuesType.BINARY: valuesField = new BinaryDocValuesField("group_dv", new BytesRef()); break; case DocValuesType.SORTED: valuesField = new SortedDocValuesField("group_dv", new BytesRef()); break; default: fail("unhandled type"); break; } doc.Add(valuesField); } Field sort1 = NewStringField("sort1", "", Field.Store.NO); doc.Add(sort1); docNoGroup.Add(sort1); Field sort2 = NewStringField("sort2", "", Field.Store.NO); doc.Add(sort2); docNoGroup.Add(sort2); Field sort3 = NewStringField("sort3", "", Field.Store.NO); doc.Add(sort3); docNoGroup.Add(sort3); Field content = NewTextField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); Int32Field id = new Int32Field("id", 0, Field.Store.NO); doc.Add(id); docNoGroup.Add(id); GroupDoc[] groupDocs = new GroupDoc[numDocs]; for (int i = 0; i < numDocs; i++) { BytesRef groupValue; if (Random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: groupValue = null; } else { groupValue = groups[Random.nextInt(groups.size())]; } GroupDoc groupDoc = new GroupDoc( i, groupValue, groups[Random.nextInt(groups.size())], groups[Random.nextInt(groups.size())], new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)), contentStrings[Random.nextInt(contentStrings.Length)] ); if (VERBOSE) { Console.WriteLine(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString()); } groupDocs[i] = groupDoc; if (groupDoc.group != null) { group.SetStringValue(groupDoc.group.Utf8ToString()); if (canUseIDV) { valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString())); } } sort1.SetStringValue(groupDoc.sort1.Utf8ToString()); sort2.SetStringValue(groupDoc.sort2.Utf8ToString()); sort3.SetStringValue(groupDoc.sort3.Utf8ToString()); content.SetStringValue(groupDoc.content); id.SetInt32Value(groupDoc.id); if (groupDoc.group == null) { w.AddDocument(docNoGroup); } else { w.AddDocument(doc); } } DirectoryReader r = w.GetReader(); w.Dispose(); // NOTE: intentional but temporary field cache insanity! FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false); int[] fieldIdToDocID = new int[numDocs]; for (int i = 0; i < numDocs; i++) { int fieldId = docIdToFieldId.Get(i); fieldIdToDocID[fieldId] = i; } try { IndexSearcher s = NewSearcher(r); if (typeof(SlowCompositeReaderWrapper).GetTypeInfo().IsAssignableFrom(s.IndexReader.GetType())) { canUseIDV = false; } else { canUseIDV = !preFlex; } for (int contentID = 0; contentID < 3; contentID++) { ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs; foreach (ScoreDoc hit in hits) { GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)]; assertTrue(gd.score == 0.0); gd.score = hit.Score; int docId = gd.id; assertEquals(docId, docIdToFieldId.Get(hit.Doc)); } } foreach (GroupDoc gd in groupDocs) { assertTrue(gd.score != 0.0); } for (int searchIter = 0; searchIter < 100; searchIter++) { if (VERBOSE) { Console.WriteLine("TEST: searchIter=" + searchIter); } string searchTerm = "real" + Random.nextInt(3); bool sortByScoreOnly = Random.nextBoolean(); Sort sortWithinGroup = GetRandomSort(sortByScoreOnly); AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType); s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector); int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID); int[] actualGroupHeads = allGroupHeadsCollector.RetrieveGroupHeads(); // The actual group heads contains Lucene ids. Need to change them into our id value. for (int i = 0; i < actualGroupHeads.Length; i++) { actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]); } // Allows us the easily iterate and assert the actual and expected results. Array.Sort(expectedGroupHeads); Array.Sort(actualGroupHeads); if (VERBOSE) { Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name); Console.WriteLine("Sort within group: " + sortWithinGroup); Console.WriteLine("Num group: " + numGroups); Console.WriteLine("Num doc: " + numDocs); Console.WriteLine("\n=== Expected: \n"); foreach (int expectedDocId in expectedGroupHeads) { GroupDoc expectedGroupDoc = groupDocs[expectedDocId]; string expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(), expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId) ); } Console.WriteLine("\n=== Actual: \n"); foreach (int actualDocId in actualGroupHeads) { GroupDoc actualGroupDoc = groupDocs[actualDocId]; string actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(), actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId) ); } Console.WriteLine("\n==================================================================================="); } assertArrayEquals(expectedGroupHeads, actualGroupHeads); } } finally { QueryUtils.PurgeFieldCache(r); } r.Dispose(); dir.Dispose(); } }
private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument) { Random random = Random; int numDocs = TestUtil.NextInt32(random, 138, 1145) * RandomMultiplier; int numGroups = TestUtil.NextInt32(random, 1, numDocs / 4); int numFacets = TestUtil.NextInt32(random, 1, numDocs / 6); if (Verbose) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <string> groups = new List <string>(); for (int i = 0; i < numGroups; i++) { groups.Add(GenerateRandomNonEmptyString()); } List <string> facetValues = new List <string>(); for (int i = 0; i < numFacets; i++) { facetValues.Add(GenerateRandomNonEmptyString()); } string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)]; if (Verbose) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++) { contentBrs[contentIDX] = GenerateRandomNonEmptyString(); if (Verbose) { Console.WriteLine(" content=" + contentBrs[contentIDX]); } } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random) ) ); bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = NewStringField("group", "", Field.Store.NO); Field groupDc = new SortedDocValuesField("group_dv", new BytesRef()); if (useDv) { doc.Add(groupDc); docNoFacet.Add(groupDc); } doc.Add(group); docNoFacet.Add(group); Field[] facetFields; if (useDv) { Debug.Assert(!multipleFacetValuesPerDocument); facetFields = new Field[2]; facetFields[0] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[0]); docNoGroup.Add(facetFields[0]); facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef()); doc.Add(facetFields[1]); docNoGroup.Add(facetFields[1]); } else { facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; for (int i = 0; i < facetFields.Length; i++) { facetFields[i] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[i]); docNoGroup.Add(facetFields[i]); } } Field content = NewStringField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); docNoFacet.Add(content); docNoGroupNoFacet.Add(content); ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => { if (a == b) { return(0); } else if (a == null) { return(-1); } else if (b == null) { return(1); } else { return(a.CompareToOrdinal(b)); } })); // LUCENENET NOTE: Need JCG.Dictionary here because of null keys IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >(); int facetWithMostGroups = 0; for (int i = 0; i < numDocs; i++) { string groupValue; if (random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: if (useDv) { groupValue = ""; } else { groupValue = null; } } else { groupValue = groups[random.nextInt(groups.size())]; } string contentStr = contentBrs[random.nextInt(contentBrs.Length)]; if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups)) { searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >(); } List <string> facetVals = new List <string>(); if (useDv || random.nextInt(24) != 18) { if (useDv) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetFields[0].SetStringValue(facetValue); facetFields[1].SetBytesValue(new BytesRef(facetValue)); facetVals.Add(facetValue); } else { foreach (Field facetField in facetFields) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetField.SetStringValue(facetValue); facetVals.Add(facetValue); } } } else { uniqueFacetValues.Add(null); if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet)) { facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } } if (Verbose) { Console.WriteLine(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + Collections.ToString(facetVals)); } if (groupValue != null) { if (useDv) { groupDc.SetBytesValue(new BytesRef(groupValue)); } group.SetStringValue(groupValue); } else if (useDv) { // DV cannot have missing values: groupDc.SetBytesValue(new BytesRef()); } content.SetStringValue(contentStr); if (groupValue == null && facetVals.Count == 0) { writer.AddDocument(docNoGroupNoFacet); } else if (facetVals.Count == 0) { writer.AddDocument(docNoFacet); } else if (groupValue == null) { writer.AddDocument(docNoGroup); } else { writer.AddDocument(doc); } } DirectoryReader reader = writer.GetReader(); writer.Dispose(); return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv)); }
private void DoTest(DocValuesType type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case DocValuesType.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case DocValuesType.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case DocValuesType.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw AssertionError.Create(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.SetInt64Value(i); switch (type) { case DocValuesType.SORTED: case DocValuesType.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random, 20); } while (((string)vals[i]).Length == 0); f.SetBytesValue(new BytesRef((string)vals[i])); break; case DocValuesType.NUMERIC: int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31); // keep it an int vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue)); f.SetInt64Value((long)vals[i]); break; } iw.AddDocument(document); if (Random.NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case DocValuesType.BINARY: case DocValuesType.SORTED: vs = new BytesRefFieldSource("dv"); break; case DocValuesType.NUMERIC: vs = new Int64FieldSource("dv"); break; default: throw AssertionError.Create(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is Int64FieldSource) { assertTrue(values.ObjectVal(i) is J2N.Numerics.Int64); assertTrue(values.BytesVal(i, bytes)); } else { throw AssertionError.Create(); } object expected = vals[ids.Int32Val(i)]; switch (type) { case DocValuesType.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd >= 1); goto case DocValuesType.BINARY; case DocValuesType.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case DocValuesType.NUMERIC: assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i)); break; } } } rd.Dispose(); d.Dispose(); }
private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument) { Random random = Random; int numDocs = TestUtil.NextInt32(random, 138, 1145) * RANDOM_MULTIPLIER; int numGroups = TestUtil.NextInt32(random, 1, numDocs / 4); int numFacets = TestUtil.NextInt32(random, 1, numDocs / 6); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <string> groups = new List <string>(); for (int i = 0; i < numGroups; i++) { groups.Add(GenerateRandomNonEmptyString()); } List <string> facetValues = new List <string>(); for (int i = 0; i < numFacets; i++) { facetValues.Add(GenerateRandomNonEmptyString()); } string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)]; if (VERBOSE) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++) { contentBrs[contentIDX] = GenerateRandomNonEmptyString(); if (VERBOSE) { Console.WriteLine(" content=" + contentBrs[contentIDX]); } } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random) ) ); bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = NewStringField("group", "", Field.Store.NO); Field groupDc = new SortedDocValuesField("group_dv", new BytesRef()); if (useDv) { doc.Add(groupDc); docNoFacet.Add(groupDc); } doc.Add(group); docNoFacet.Add(group); Field[] facetFields; if (useDv) { Debug.Assert(!multipleFacetValuesPerDocument); facetFields = new Field[2]; facetFields[0] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[0]); docNoGroup.Add(facetFields[0]); facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef()); doc.Add(facetFields[1]); docNoGroup.Add(facetFields[1]); } else { facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; for (int i = 0; i < facetFields.Length; i++) { facetFields[i] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[i]); docNoGroup.Add(facetFields[i]); } } Field content = NewStringField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); docNoFacet.Add(content); docNoGroupNoFacet.Add(content); // LUCENENET NOTE: TreeSet (the class used in Java) allows duplicate keys. However, SortedSet seems to work, // and based on the name of the variable, presuming the entries are meant to be unique. ISet <string> uniqueFacetValues = new SortedSet <string>(new ComparerAnonymousHelper1()); // LUCENENET NOTE: Need HashMap here because of null keys IDictionary <string, HashMap <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, HashMap <string, ISet <string> > >(); int facetWithMostGroups = 0; for (int i = 0; i < numDocs; i++) { string groupValue; if (random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: if (useDv) { groupValue = ""; } else { groupValue = null; } } else { groupValue = groups[random.nextInt(groups.size())]; } string contentStr = contentBrs[random.nextInt(contentBrs.Length)]; if (!searchTermToFacetToGroups.ContainsKey(contentStr)) { searchTermToFacetToGroups[contentStr] = new HashMap <string, ISet <string> >(); } IDictionary <string, ISet <string> > facetToGroups = searchTermToFacetToGroups[contentStr]; List <string> facetVals = new List <string>(); if (useDv || random.nextInt(24) != 18) { if (useDv) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.ContainsKey(facetValue)) { facetToGroups[facetValue] = new HashSet <string>(); } ISet <string> groupsInFacet = facetToGroups[facetValue]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetFields[0].SetStringValue(facetValue); facetFields[1].SetBytesValue(new BytesRef(facetValue)); facetVals.Add(facetValue); } else { foreach (Field facetField in facetFields) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.ContainsKey(facetValue)) { facetToGroups[facetValue] = new HashSet <string>(); } ISet <string> groupsInFacet = facetToGroups[facetValue]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetField.SetStringValue(facetValue); facetVals.Add(facetValue); } } } else { uniqueFacetValues.Add(null); if (!facetToGroups.ContainsKey(null)) { facetToGroups.Put(null, new HashSet <string>()); } ISet <string> groupsInFacet = facetToGroups[null]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } } if (VERBOSE) { Console.WriteLine(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals); } if (groupValue != null) { if (useDv) { groupDc.SetBytesValue(new BytesRef(groupValue)); } group.SetStringValue(groupValue); } else if (useDv) { // DV cannot have missing values: groupDc.SetBytesValue(new BytesRef()); } content.SetStringValue(contentStr); if (groupValue == null && !facetVals.Any()) { writer.AddDocument(docNoGroupNoFacet); } else if (!facetVals.Any()) { writer.AddDocument(docNoFacet); } else if (groupValue == null) { writer.AddDocument(docNoGroup); } else { writer.AddDocument(doc); } } DirectoryReader reader = writer.GetReader(); writer.Dispose(); return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv)); }
// NOTE: this is likely buggy, and cannot clone fields // with tokenStreamValues, etc. Use at your own risk!! // TODO: is there a pre-existing way to do this!!! public static Document CloneDocument(Document doc1) { Document doc2 = new Document(); foreach (IIndexableField f in doc1.Fields) { Field field1 = (Field)f; Field field2; DocValuesType dvType = field1.FieldType.DocValueType; NumericType numType = field1.FieldType.NumericType; if (dvType != DocValuesType.NONE) { switch (dvType) { case DocValuesType.NUMERIC: field2 = new NumericDocValuesField(field1.Name, field1.GetInt64Value().Value); break; case DocValuesType.BINARY: field2 = new BinaryDocValuesField(field1.Name, field1.GetBinaryValue()); break; case DocValuesType.SORTED: field2 = new SortedDocValuesField(field1.Name, field1.GetBinaryValue()); break; default: throw IllegalStateException.Create("unknown Type: " + dvType); } } else if (numType != NumericType.NONE) { switch (numType) { case NumericType.INT32: field2 = new Int32Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; case NumericType.SINGLE: field2 = new SingleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; case NumericType.INT64: field2 = new Int64Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; case NumericType.DOUBLE: field2 = new DoubleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; default: throw IllegalStateException.Create("unknown Type: " + numType); } } else { field2 = new Field(field1.Name, field1.GetStringValue(), field1.FieldType); } doc2.Add(field2); } return(doc2); }
public virtual void TestSortedBytesDocValuesField() { SortedDocValuesField field = new SortedDocValuesField("foo", new BytesRef("bar")); TrySetBoost(field); TrySetByteValue(field); field.BytesValue = "fubar".ToBytesRefArray(Encoding.UTF8); field.BytesValue = new BytesRef("baz"); TrySetDoubleValue(field); TrySetIntValue(field); TrySetFloatValue(field); TrySetLongValue(field); TrySetReaderValue(field); TrySetShortValue(field); TrySetStringValue(field); TrySetTokenStreamValue(field); Assert.AreEqual(new BytesRef("baz"), field.BinaryValue()); }