Exemple #1
        private IndexContext CreateIndexContext()
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())

            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;

            string[] groupValues = new string[numDocs / 5];
            string[] countValues = new string[numDocs / 10];
            for (int i = 0; i < groupValues.Length; i++)
                groupValues[i] = GenerateRandomNonEmptyString();
            for (int i = 0; i < countValues.Length; i++)
                countValues[i] = GenerateRandomNonEmptyString();

            List <string> contentStrings = new List <string>();
            IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new JCG.Dictionary <string, IDictionary <string, ISet <string> > >();

            for (int i = 1; i <= numDocs; i++)
                string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
                string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
                string content    = "random" + random.nextInt(numDocs / 20);
                IDictionary <string, ISet <string> > groupToCounts;
                if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
                    // Groups sort always DOCID asc...
                    searchTermToGroupCounts.Add(content, groupToCounts = new JCG.LinkedDictionary <string, ISet <string> >());

                ISet <string> countsVals;
                if (!groupToCounts.TryGetValue(groupValue, out countsVals))
                    groupToCounts.Add(groupValue, countsVals = new JCG.HashSet <string>());

                Document doc = new Document();
                doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
                if (groupValue != null)
                    AddField(doc, groupField, groupValue, dvType);
                if (countValue != null)
                    AddField(doc, countField, countValue, dvType);
                doc.Add(new TextField("content", content, Field.Store.YES));

            DirectoryReader reader = w.GetReader();

            if (VERBOSE)
                for (int docID = 0; docID < reader.MaxDoc; docID++)
                    Document doc = reader.Document(docID);
                    Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));

            return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/)));
Exemple #2
        /// <summary>
        /// Detect repetition groups. Done once - for first doc. </summary>
        private IList <IList <PhrasePositions> > GatherRptGroups(JCG.LinkedDictionary <Term, int?> rptTerms)
            PhrasePositions[] rpp = RepeatingPPs(rptTerms);
            IList <IList <PhrasePositions> > res = new List <IList <PhrasePositions> >();

            if (!hasMultiTermRpts)
                // simpler - no multi-terms - can base on positions in first doc
                for (int i = 0; i < rpp.Length; i++)
                    PhrasePositions pp = rpp[i];
                    if (pp.rptGroup >= 0) // already marked as a repetition
                    int tpPos = TpPos(pp);
                    for (int j = i + 1; j < rpp.Length; j++)
                        PhrasePositions pp2 = rpp[j];
                        if (pp2.rptGroup >= 0 || pp2.offset == pp.offset || TpPos(pp2) != tpPos) // not a repetition -  not a repetition: two PPs are originally in same offset in the query! -  already marked as a repetition
                        // a repetition
                        int g = pp.rptGroup;
                        if (g < 0)
                            g           = res.Count;
                            pp.rptGroup = g;
                            List <PhrasePositions> rl = new List <PhrasePositions>(2)
                        pp2.rptGroup = g;
                // more involved - has multi-terms
                IList <JCG.HashSet <PhrasePositions> > tmp = new List <JCG.HashSet <PhrasePositions> >();
                IList <FixedBitSet> bb = PpTermsBitSets(rpp, rptTerms);
                IDictionary <Term, int> tg = TermGroups(rptTerms, bb);
                JCG.HashSet <int>       distinctGroupIDs = new JCG.HashSet <int>(tg.Values);
                for (int i = 0; i < distinctGroupIDs.Count; i++)
                    tmp.Add(new JCG.HashSet <PhrasePositions>());
                foreach (PhrasePositions pp in rpp)
                    foreach (Term t in pp.terms)
                        if (rptTerms.ContainsKey(t))
                            int g = tg[t];
                            if (Debugging.AssertsEnabled)
                                Debugging.Assert(pp.rptGroup == -1 || pp.rptGroup == g);
                            pp.rptGroup = g;
                foreach (JCG.HashSet <PhrasePositions> hs in tmp)
                    res.Add(new List <PhrasePositions>(hs));
        private void doTest(SpatialOperation operation)
            //first show that when there's no data, a query will result in no results
                Query         query         = strategy.MakeQuery(new SpatialArgs(operation, randomRectangle()));
                SearchResults searchResults = executeQuery(query, 1);
                assertEquals(0, searchResults.numFound);

            bool biasContains = (operation == SpatialOperation.Contains);

            //Main index loop:
            IDictionary <String, IShape> indexedShapes   = new JCG.LinkedDictionary <String, IShape>();
            IDictionary <String, IShape> indexedShapesGS = new JCG.LinkedDictionary <String, IShape>();//grid snapped
            int numIndexedShapes = randomIntBetween(1, 6);

#pragma warning disable 219
            bool indexedAtLeastOneShapePair = false;
#pragma warning restore 219
            for (int i = 0; i < numIndexedShapes; i++)
                String id = "" + i;
                IShape indexedShape;
                int    R = Random.nextInt(12);
                if (R == 0)
                {//1 in 12
                    indexedShape = null;
                else if (R == 1)
                {                                 //1 in 12
                    indexedShape = randomPoint(); //just one point
                else if (R <= 4)
                {//3 in 12
                 //comprised of more than one shape
                    indexedShape = randomShapePairRect(biasContains);
                    indexedAtLeastOneShapePair = true;
                    indexedShape = randomRectangle();//just one rect

                indexedShapes.Put(id, indexedShape);
                indexedShapesGS.Put(id, gridSnap(indexedShape));

                adoc(id, indexedShape);

                if (Random.nextInt(10) == 0)
                    Commit();//intermediate commit, produces extra segments
            //delete some documents randomly
            IEnumerator <String> idIter = indexedShapes.Keys.ToList().GetEnumerator();
            while (idIter.MoveNext())
                String id = idIter.Current;
                if (Random.nextInt(10) == 0)


            //Main query loop:
            int numQueryShapes = AtLeast(20);
            for (int i = 0; i < numQueryShapes; i++)
                int scanLevel = randomInt(grid.MaxLevels);
                ((RecursivePrefixTreeStrategy)strategy).PrefixGridScanLevel = (scanLevel);

                IShape queryShape;
                switch (randomInt(10))
                case 0: queryShape = randomPoint(); break;

                // LUCENE-5549
                //TODO debug: -Dtests.method=testWithin -Dtests.multiplier=3 -Dtests.seed=5F5294CE2E075A3E:AAD2F0F79288CA64
                //        case 1:case 2:case 3:
                //          if (!indexedAtLeastOneShapePair) { // avoids ShapePair.relate(ShapePair), which isn't reliable
                //            queryShape = randomShapePairRect(!biasContains);//invert biasContains for query side
                //            break;
                //          }
                default: queryShape = randomRectangle(); break;
                IShape queryShapeGS = gridSnap(queryShape);

                bool opIsDisjoint = operation == SpatialOperation.IsDisjointTo;

                //Generate truth via brute force:
                // We ensure true-positive matches (if the predicate on the raw shapes match
                //  then the search should find those same matches).
                // approximations, false-positive matches
                ISet <string> expectedIds  = new JCG.LinkedHashSet <string>(); //true-positives
                ISet <string> secondaryIds = new JCG.LinkedHashSet <string>(); //false-positives (unless disjoint)
                foreach (var entry in indexedShapes)
                    string id = entry.Key;
                    IShape indexedShapeCompare = entry.Value;
                    if (indexedShapeCompare == null)
                    IShape queryShapeCompare = queryShape;

                    if (operation.Evaluate(indexedShapeCompare, queryShapeCompare))
                        if (opIsDisjoint)
                            //if no longer intersect after buffering them, for disjoint, remember this
                            indexedShapeCompare = indexedShapesGS[id];
                            queryShapeCompare   = queryShapeGS;
                            if (!operation.Evaluate(indexedShapeCompare, queryShapeCompare))
                    else if (!opIsDisjoint)
                        //buffer either the indexed or query shape (via gridSnap) and try again
                        if (operation == SpatialOperation.Intersects)
                            indexedShapeCompare = indexedShapesGS[id];
                            queryShapeCompare   = queryShapeGS;
                            //TODO Unfortunately, grid-snapping both can result in intersections that otherwise
                            // wouldn't happen when the grids are adjacent. Not a big deal but our test is just a
                            // bit more lenient.
                        else if (operation == SpatialOperation.Contains)
                            indexedShapeCompare = indexedShapesGS[id];
                        else if (operation == SpatialOperation.IsWithin)
                            queryShapeCompare = queryShapeGS;
                        if (operation.Evaluate(indexedShapeCompare, queryShapeCompare))

                //Search and verify results
                SpatialArgs args = new SpatialArgs(operation, queryShape);
                if (queryShape is ShapePair)
                    args.DistErrPct = (0.0);//a hack; we want to be more detailed than gridSnap(queryShape)
                Query         query = strategy.MakeQuery(args);
                SearchResults got   = executeQuery(query, 100);
                ISet <String> remainingExpectedIds = new JCG.LinkedHashSet <string>(expectedIds);
                foreach (SearchResult result in got.results)
                    String id      = result.GetId();
                    bool   removed = remainingExpectedIds.Remove(id);
                    if (!removed && (!opIsDisjoint && !secondaryIds.Contains(id)))
                        fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape);
                if (opIsDisjoint)
                if (remainingExpectedIds.Any())
                    var iter = remainingExpectedIds.GetEnumerator();
                    String id = iter.Current;
                    fail("Should have matched", id, indexedShapes, indexedShapesGS, queryShape);
Exemple #4
        private static void InitFromResourceBundle()
            UResourceBundle keyTypeDataRes = UResourceBundle.GetBundleInstance(


            UResourceBundle keyMapRes  = keyTypeDataRes.Get("keyMap");
            UResourceBundle typeMapRes = keyTypeDataRes.Get("typeMap");

            // alias data is optional
            UResourceBundle typeAliasRes    = null;
            UResourceBundle bcpTypeAliasRes = null;

                typeAliasRes = keyTypeDataRes.Get("typeAlias");
            catch (MissingManifestResourceException)
                // fall through

                bcpTypeAliasRes = keyTypeDataRes.Get("bcpTypeAlias");
            catch (MissingManifestResourceException)
                // fall through

            // iterate through keyMap resource
            using (UResourceBundleEnumerator keyMapItr = keyMapRes.GetEnumerator())
                IDictionary <string, ISet <string> > _Bcp47Keys = new JCG.LinkedDictionary <string, ISet <string> >(); // ICU4N NOTE: As long as we don't delete, Dictionary keeps insertion order the same as LinkedHashMap

                while (keyMapItr.MoveNext())
                    UResourceBundle keyMapEntry = keyMapItr.Current;
                    string          legacyKeyId = keyMapEntry.Key;
                    string          bcpKeyId    = keyMapEntry.GetString();

                    bool hasSameKey = false;
                    if (bcpKeyId.Length == 0)
                        // Empty value indicates that BCP key is same with the legacy key.
                        bcpKeyId   = legacyKeyId;
                        hasSameKey = true;
                    ISet <string> _bcp47Types = new JCG.LinkedHashSet <string>();
                    _Bcp47Keys[bcpKeyId] = _bcp47Types.AsReadOnly();

                    bool isTZ = legacyKeyId.Equals("timezone");

                    // reverse type alias map
                    IDictionary <string, ISet <string> > typeAliasMap = null;
                    if (typeAliasRes != null)
                        UResourceBundle typeAliasResByKey = null;
                            typeAliasResByKey = typeAliasRes.Get(legacyKeyId);
                        catch (MissingManifestResourceException)
                            // fall through
                        if (typeAliasResByKey != null)
                            typeAliasMap = new Dictionary <string, ISet <string> >();
                            using (UResourceBundleEnumerator typeAliasResItr = typeAliasResByKey.GetEnumerator())
                                while (typeAliasResItr.MoveNext())
                                    UResourceBundle typeAliasDataEntry = typeAliasResItr.Current;
                                    string          from = typeAliasDataEntry.Key;
                                    string          to   = typeAliasDataEntry.GetString();
                                    if (isTZ)
                                        from = from.Replace(':', '/');
                                    if (!typeAliasMap.TryGetValue(to, out ISet <string> aliasSet) || aliasSet == null)
                                        aliasSet         = new JCG.HashSet <string>();
                                        typeAliasMap[to] = aliasSet;

                    // reverse bcp type alias map
                    IDictionary <string, ISet <string> > bcpTypeAliasMap = null;
                    if (bcpTypeAliasRes != null)
                        UResourceBundle bcpTypeAliasResByKey = null;
                            bcpTypeAliasResByKey = bcpTypeAliasRes.Get(bcpKeyId);
                        catch (MissingManifestResourceException)
                            // fall through
                        if (bcpTypeAliasResByKey != null)
                            bcpTypeAliasMap = new Dictionary <string, ISet <string> >();
                            using (UResourceBundleEnumerator bcpTypeAliasResItr = bcpTypeAliasResByKey.GetEnumerator())
                                while (bcpTypeAliasResItr.MoveNext())
                                    UResourceBundle bcpTypeAliasDataEntry = bcpTypeAliasResItr.Current;
                                    string          from = bcpTypeAliasDataEntry.Key;
                                    string          to   = bcpTypeAliasDataEntry.GetString();
                                    if (!bcpTypeAliasMap.TryGetValue(to, out ISet <string> aliasSet) || aliasSet == null)
                                        aliasSet            = new JCG.HashSet <string>();
                                        bcpTypeAliasMap[to] = aliasSet;

                    IDictionary <string, Type> typeDataMap    = new Dictionary <string, Type>();
                    ISet <SpecialType>         specialTypeSet = null;

                    // look up type map for the key, and walk through the mapping data
                    UResourceBundle typeMapResByKey = null;
                        typeMapResByKey = typeMapRes.Get(legacyKeyId);
                    catch (MissingManifestResourceException)
                        // type map for each key must exist
                    if (typeMapResByKey != null)
                        using (UResourceBundleEnumerator typeMapResByKeyItr = typeMapResByKey.GetEnumerator())
                            while (typeMapResByKeyItr.MoveNext())
                                UResourceBundle typeMapEntry = typeMapResByKeyItr.Current;
                                string          legacyTypeId = typeMapEntry.Key;
                                string          bcpTypeId    = typeMapEntry.GetString();

                                // special types
                                char first         = legacyTypeId[0];
                                bool isSpecialType = '9' < first && first < 'a' && bcpTypeId.Length == 0;
                                if (isSpecialType)
                                    if (specialTypeSet == null)
                                        specialTypeSet = new JCG.HashSet <SpecialType>();
                                    specialTypeSet.Add((SpecialType)Enum.Parse(typeof(SpecialType), legacyTypeId, true));

                                if (isTZ)
                                    // a timezone key uses a colon instead of a slash in the resource.
                                    // e.g. America:Los_Angeles
                                    legacyTypeId = legacyTypeId.Replace(':', '/');

                                bool hasSameType = false;
                                if (bcpTypeId.Length == 0)
                                    // Empty value indicates that BCP type is same with the legacy type.
                                    bcpTypeId   = legacyTypeId;
                                    hasSameType = true;

                                // Note: legacy type value should never be
                                // equivalent to bcp type value of a different
                                // type under the same key. So we use a single
                                // map for lookup.
                                Type t = new Type(legacyTypeId, bcpTypeId);
                                typeDataMap[AsciiUtil.ToLower(legacyTypeId)] = t;
                                if (!hasSameType)
                                    typeDataMap[AsciiUtil.ToLower(bcpTypeId)] = t;

                                // Also put aliases in the map
                                if (typeAliasMap != null)
                                    if (typeAliasMap.TryGetValue(legacyTypeId, out ISet <string> typeAliasSet) && typeAliasSet != null)
                                        foreach (string alias in typeAliasSet)
                                            typeDataMap[AsciiUtil.ToLower(alias)] = t;
                                if (bcpTypeAliasMap != null)
                                    if (bcpTypeAliasMap.TryGetValue(bcpTypeId, out ISet <string> bcpTypeAliasSet) && bcpTypeAliasSet != null)
                                        foreach (string alias in bcpTypeAliasSet)
                                            typeDataMap[AsciiUtil.ToLower(alias)] = t;

                    KeyData keyData = new KeyData(legacyKeyId, bcpKeyId, typeDataMap, specialTypeSet);

                    KEYMAP[AsciiUtil.ToLower(legacyKeyId)] = keyData;
                    if (!hasSameKey)
                        KEYMAP[AsciiUtil.ToLower(bcpKeyId)] = keyData;

                BCP47_KEYS = _Bcp47Keys.AsReadOnly();