private IndexContext CreateIndexContext() { Random random = Random; DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.BINARY, DocValuesType.SORTED }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()) ); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER; string[] groupValues = new string[numDocs / 5]; string[] countValues = new string[numDocs / 10]; for (int i = 0; i < groupValues.Length; i++) { groupValues[i] = GenerateRandomNonEmptyString(); } for (int i = 0; i < countValues.Length; i++) { countValues[i] = GenerateRandomNonEmptyString(); } List <string> contentStrings = new List <string>(); IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new JCG.Dictionary <string, IDictionary <string, ISet <string> > >(); for (int i = 1; i <= numDocs; i++) { string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)]; string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)]; string content = "random" + random.nextInt(numDocs / 20); IDictionary <string, ISet <string> > groupToCounts; if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts)) { // Groups sort always DOCID asc... searchTermToGroupCounts.Add(content, groupToCounts = new JCG.LinkedDictionary <string, ISet <string> >()); contentStrings.Add(content); } ISet <string> countsVals; if (!groupToCounts.TryGetValue(groupValue, out countsVals)) { groupToCounts.Add(groupValue, countsVals = new JCG.HashSet <string>()); } countsVals.Add(countValue); Document doc = new Document(); doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES)); if (groupValue != null) { AddField(doc, groupField, groupValue, dvType); } if (countValue != null) { AddField(doc, countField, countValue, dvType); } doc.Add(new TextField("content", content, Field.Store.YES)); w.AddDocument(doc); } DirectoryReader reader = w.GetReader(); if (VERBOSE) { for (int docID = 0; docID < reader.MaxDoc; docID++) { Document doc = reader.Document(docID); Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher")); } } w.Dispose(); return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/))); }
/// <summary> /// Detect repetition groups. Done once - for first doc. </summary> private IList <IList <PhrasePositions> > GatherRptGroups(JCG.LinkedDictionary <Term, int?> rptTerms) { PhrasePositions[] rpp = RepeatingPPs(rptTerms); IList <IList <PhrasePositions> > res = new List <IList <PhrasePositions> >(); if (!hasMultiTermRpts) { // simpler - no multi-terms - can base on positions in first doc for (int i = 0; i < rpp.Length; i++) { PhrasePositions pp = rpp[i]; if (pp.rptGroup >= 0) // already marked as a repetition { continue; } int tpPos = TpPos(pp); for (int j = i + 1; j < rpp.Length; j++) { PhrasePositions pp2 = rpp[j]; if (pp2.rptGroup >= 0 || pp2.offset == pp.offset || TpPos(pp2) != tpPos) // not a repetition - not a repetition: two PPs are originally in same offset in the query! - already marked as a repetition { continue; } // a repetition int g = pp.rptGroup; if (g < 0) { g = res.Count; pp.rptGroup = g; List <PhrasePositions> rl = new List <PhrasePositions>(2) { pp }; res.Add(rl); } pp2.rptGroup = g; res[g].Add(pp2); } } } else { // more involved - has multi-terms IList <JCG.HashSet <PhrasePositions> > tmp = new List <JCG.HashSet <PhrasePositions> >(); IList <FixedBitSet> bb = PpTermsBitSets(rpp, rptTerms); UnionTermGroups(bb); IDictionary <Term, int> tg = TermGroups(rptTerms, bb); JCG.HashSet <int> distinctGroupIDs = new JCG.HashSet <int>(tg.Values); for (int i = 0; i < distinctGroupIDs.Count; i++) { tmp.Add(new JCG.HashSet <PhrasePositions>()); } foreach (PhrasePositions pp in rpp) { foreach (Term t in pp.terms) { if (rptTerms.ContainsKey(t)) { int g = tg[t]; tmp[g].Add(pp); if (Debugging.AssertsEnabled) { Debugging.Assert(pp.rptGroup == -1 || pp.rptGroup == g); } pp.rptGroup = g; } } } foreach (JCG.HashSet <PhrasePositions> hs in tmp) { res.Add(new List <PhrasePositions>(hs)); } } return(res); }
private void doTest(SpatialOperation operation) { //first show that when there's no data, a query will result in no results { Query query = strategy.MakeQuery(new SpatialArgs(operation, randomRectangle())); SearchResults searchResults = executeQuery(query, 1); assertEquals(0, searchResults.numFound); } bool biasContains = (operation == SpatialOperation.Contains); //Main index loop: IDictionary <String, IShape> indexedShapes = new JCG.LinkedDictionary <String, IShape>(); IDictionary <String, IShape> indexedShapesGS = new JCG.LinkedDictionary <String, IShape>();//grid snapped int numIndexedShapes = randomIntBetween(1, 6); #pragma warning disable 219 bool indexedAtLeastOneShapePair = false; #pragma warning restore 219 for (int i = 0; i < numIndexedShapes; i++) { String id = "" + i; IShape indexedShape; int R = Random.nextInt(12); if (R == 0) {//1 in 12 indexedShape = null; } else if (R == 1) { //1 in 12 indexedShape = randomPoint(); //just one point } else if (R <= 4) {//3 in 12 //comprised of more than one shape indexedShape = randomShapePairRect(biasContains); indexedAtLeastOneShapePair = true; } else { indexedShape = randomRectangle();//just one rect } indexedShapes.Put(id, indexedShape); indexedShapesGS.Put(id, gridSnap(indexedShape)); adoc(id, indexedShape); if (Random.nextInt(10) == 0) { Commit();//intermediate commit, produces extra segments } } //delete some documents randomly IEnumerator <String> idIter = indexedShapes.Keys.ToList().GetEnumerator(); while (idIter.MoveNext()) { String id = idIter.Current; if (Random.nextInt(10) == 0) { DeleteDoc(id); //idIter.Remove(); indexedShapes.Remove(id); indexedShapesGS.Remove(id); } } Commit(); //Main query loop: int numQueryShapes = AtLeast(20); for (int i = 0; i < numQueryShapes; i++) { int scanLevel = randomInt(grid.MaxLevels); ((RecursivePrefixTreeStrategy)strategy).PrefixGridScanLevel = (scanLevel); IShape queryShape; switch (randomInt(10)) { case 0: queryShape = randomPoint(); break; // LUCENE-5549 //TODO debug: -Dtests.method=testWithin -Dtests.multiplier=3 -Dtests.seed=5F5294CE2E075A3E:AAD2F0F79288CA64 // case 1:case 2:case 3: // if (!indexedAtLeastOneShapePair) { // avoids ShapePair.relate(ShapePair), which isn't reliable // queryShape = randomShapePairRect(!biasContains);//invert biasContains for query side // break; // } default: queryShape = randomRectangle(); break; } IShape queryShapeGS = gridSnap(queryShape); bool opIsDisjoint = operation == SpatialOperation.IsDisjointTo; //Generate truth via brute force: // We ensure true-positive matches (if the predicate on the raw shapes match // then the search should find those same matches). // approximations, false-positive matches ISet <string> expectedIds = new JCG.LinkedHashSet <string>(); //true-positives ISet <string> secondaryIds = new JCG.LinkedHashSet <string>(); //false-positives (unless disjoint) foreach (var entry in indexedShapes) { string id = entry.Key; IShape indexedShapeCompare = entry.Value; if (indexedShapeCompare == null) { continue; } IShape queryShapeCompare = queryShape; if (operation.Evaluate(indexedShapeCompare, queryShapeCompare)) { expectedIds.Add(id); if (opIsDisjoint) { //if no longer intersect after buffering them, for disjoint, remember this indexedShapeCompare = indexedShapesGS[id]; queryShapeCompare = queryShapeGS; if (!operation.Evaluate(indexedShapeCompare, queryShapeCompare)) { secondaryIds.Add(id); } } } else if (!opIsDisjoint) { //buffer either the indexed or query shape (via gridSnap) and try again if (operation == SpatialOperation.Intersects) { indexedShapeCompare = indexedShapesGS[id]; queryShapeCompare = queryShapeGS; //TODO Unfortunately, grid-snapping both can result in intersections that otherwise // wouldn't happen when the grids are adjacent. Not a big deal but our test is just a // bit more lenient. } else if (operation == SpatialOperation.Contains) { indexedShapeCompare = indexedShapesGS[id]; } else if (operation == SpatialOperation.IsWithin) { queryShapeCompare = queryShapeGS; } if (operation.Evaluate(indexedShapeCompare, queryShapeCompare)) { secondaryIds.Add(id); } } } //Search and verify results SpatialArgs args = new SpatialArgs(operation, queryShape); if (queryShape is ShapePair) { args.DistErrPct = (0.0);//a hack; we want to be more detailed than gridSnap(queryShape) } Query query = strategy.MakeQuery(args); SearchResults got = executeQuery(query, 100); ISet <String> remainingExpectedIds = new JCG.LinkedHashSet <string>(expectedIds); foreach (SearchResult result in got.results) { String id = result.GetId(); bool removed = remainingExpectedIds.Remove(id); if (!removed && (!opIsDisjoint && !secondaryIds.Contains(id))) { fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape); } } if (opIsDisjoint) { remainingExpectedIds.ExceptWith(secondaryIds); } if (remainingExpectedIds.Any()) { var iter = remainingExpectedIds.GetEnumerator(); iter.MoveNext(); String id = iter.Current; fail("Should have matched", id, indexedShapes, indexedShapesGS, queryShape); } } }
private static void InitFromResourceBundle() { UResourceBundle keyTypeDataRes = UResourceBundle.GetBundleInstance( ICUData.IcuBaseName, "keyTypeData", ICUResourceBundle.IcuDataAssembly); GetKeyInfo(keyTypeDataRes.Get("keyInfo")); GetTypeInfo(keyTypeDataRes.Get("typeInfo")); UResourceBundle keyMapRes = keyTypeDataRes.Get("keyMap"); UResourceBundle typeMapRes = keyTypeDataRes.Get("typeMap"); // alias data is optional UResourceBundle typeAliasRes = null; UResourceBundle bcpTypeAliasRes = null; try { typeAliasRes = keyTypeDataRes.Get("typeAlias"); } catch (MissingManifestResourceException) { // fall through } try { bcpTypeAliasRes = keyTypeDataRes.Get("bcpTypeAlias"); } catch (MissingManifestResourceException) { // fall through } // iterate through keyMap resource using (UResourceBundleEnumerator keyMapItr = keyMapRes.GetEnumerator()) { IDictionary <string, ISet <string> > _Bcp47Keys = new JCG.LinkedDictionary <string, ISet <string> >(); // ICU4N NOTE: As long as we don't delete, Dictionary keeps insertion order the same as LinkedHashMap while (keyMapItr.MoveNext()) { UResourceBundle keyMapEntry = keyMapItr.Current; string legacyKeyId = keyMapEntry.Key; string bcpKeyId = keyMapEntry.GetString(); bool hasSameKey = false; if (bcpKeyId.Length == 0) { // Empty value indicates that BCP key is same with the legacy key. bcpKeyId = legacyKeyId; hasSameKey = true; } ISet <string> _bcp47Types = new JCG.LinkedHashSet <string>(); _Bcp47Keys[bcpKeyId] = _bcp47Types.AsReadOnly(); bool isTZ = legacyKeyId.Equals("timezone"); // reverse type alias map IDictionary <string, ISet <string> > typeAliasMap = null; if (typeAliasRes != null) { UResourceBundle typeAliasResByKey = null; try { typeAliasResByKey = typeAliasRes.Get(legacyKeyId); } catch (MissingManifestResourceException) { // fall through } if (typeAliasResByKey != null) { typeAliasMap = new Dictionary <string, ISet <string> >(); using (UResourceBundleEnumerator typeAliasResItr = typeAliasResByKey.GetEnumerator()) { while (typeAliasResItr.MoveNext()) { UResourceBundle typeAliasDataEntry = typeAliasResItr.Current; string from = typeAliasDataEntry.Key; string to = typeAliasDataEntry.GetString(); if (isTZ) { from = from.Replace(':', '/'); } if (!typeAliasMap.TryGetValue(to, out ISet <string> aliasSet) || aliasSet == null) { aliasSet = new JCG.HashSet <string>(); typeAliasMap[to] = aliasSet; } aliasSet.Add(from); } } } } // reverse bcp type alias map IDictionary <string, ISet <string> > bcpTypeAliasMap = null; if (bcpTypeAliasRes != null) { UResourceBundle bcpTypeAliasResByKey = null; try { bcpTypeAliasResByKey = bcpTypeAliasRes.Get(bcpKeyId); } catch (MissingManifestResourceException) { // fall through } if (bcpTypeAliasResByKey != null) { bcpTypeAliasMap = new Dictionary <string, ISet <string> >(); using (UResourceBundleEnumerator bcpTypeAliasResItr = bcpTypeAliasResByKey.GetEnumerator()) { while (bcpTypeAliasResItr.MoveNext()) { UResourceBundle bcpTypeAliasDataEntry = bcpTypeAliasResItr.Current; string from = bcpTypeAliasDataEntry.Key; string to = bcpTypeAliasDataEntry.GetString(); if (!bcpTypeAliasMap.TryGetValue(to, out ISet <string> aliasSet) || aliasSet == null) { aliasSet = new JCG.HashSet <string>(); bcpTypeAliasMap[to] = aliasSet; } aliasSet.Add(from); } } } } IDictionary <string, Type> typeDataMap = new Dictionary <string, Type>(); ISet <SpecialType> specialTypeSet = null; // look up type map for the key, and walk through the mapping data UResourceBundle typeMapResByKey = null; try { typeMapResByKey = typeMapRes.Get(legacyKeyId); } catch (MissingManifestResourceException) { // type map for each key must exist Debug.Assert(false); } if (typeMapResByKey != null) { using (UResourceBundleEnumerator typeMapResByKeyItr = typeMapResByKey.GetEnumerator()) while (typeMapResByKeyItr.MoveNext()) { UResourceBundle typeMapEntry = typeMapResByKeyItr.Current; string legacyTypeId = typeMapEntry.Key; string bcpTypeId = typeMapEntry.GetString(); // special types char first = legacyTypeId[0]; bool isSpecialType = '9' < first && first < 'a' && bcpTypeId.Length == 0; if (isSpecialType) { if (specialTypeSet == null) { specialTypeSet = new JCG.HashSet <SpecialType>(); } specialTypeSet.Add((SpecialType)Enum.Parse(typeof(SpecialType), legacyTypeId, true)); _bcp47Types.Add(legacyTypeId); continue; } if (isTZ) { // a timezone key uses a colon instead of a slash in the resource. // e.g. America:Los_Angeles legacyTypeId = legacyTypeId.Replace(':', '/'); } bool hasSameType = false; if (bcpTypeId.Length == 0) { // Empty value indicates that BCP type is same with the legacy type. bcpTypeId = legacyTypeId; hasSameType = true; } _bcp47Types.Add(bcpTypeId); // Note: legacy type value should never be // equivalent to bcp type value of a different // type under the same key. So we use a single // map for lookup. Type t = new Type(legacyTypeId, bcpTypeId); typeDataMap[AsciiUtil.ToLower(legacyTypeId)] = t; if (!hasSameType) { typeDataMap[AsciiUtil.ToLower(bcpTypeId)] = t; } // Also put aliases in the map if (typeAliasMap != null) { if (typeAliasMap.TryGetValue(legacyTypeId, out ISet <string> typeAliasSet) && typeAliasSet != null) { foreach (string alias in typeAliasSet) { typeDataMap[AsciiUtil.ToLower(alias)] = t; } } } if (bcpTypeAliasMap != null) { if (bcpTypeAliasMap.TryGetValue(bcpTypeId, out ISet <string> bcpTypeAliasSet) && bcpTypeAliasSet != null) { foreach (string alias in bcpTypeAliasSet) { typeDataMap[AsciiUtil.ToLower(alias)] = t; } } } } } KeyData keyData = new KeyData(legacyKeyId, bcpKeyId, typeDataMap, specialTypeSet); KEYMAP[AsciiUtil.ToLower(legacyKeyId)] = keyData; if (!hasSameKey) { KEYMAP[AsciiUtil.ToLower(bcpKeyId)] = keyData; } } BCP47_KEYS = _Bcp47Keys.AsReadOnly(); } }