Ejemplo n.º 1
0
        private IndexContext CreateIndexContext()
        {
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.BINARY,
                DocValuesType.SORTED
            };

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())
                );

            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;

            string[] groupValues = new string[numDocs / 5];
            string[] countValues = new string[numDocs / 10];
            for (int i = 0; i < groupValues.Length; i++)
            {
                groupValues[i] = GenerateRandomNonEmptyString();
            }
            for (int i = 0; i < countValues.Length; i++)
            {
                countValues[i] = GenerateRandomNonEmptyString();
            }

            List <string> contentStrings = new List <string>();
            IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new JCG.Dictionary <string, IDictionary <string, ISet <string> > >();

            for (int i = 1; i <= numDocs; i++)
            {
                string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
                string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
                string content    = "random" + random.nextInt(numDocs / 20);
                IDictionary <string, ISet <string> > groupToCounts;
                if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
                {
                    // Groups sort always DOCID asc...
                    searchTermToGroupCounts.Add(content, groupToCounts = new JCG.LinkedDictionary <string, ISet <string> >());
                    contentStrings.Add(content);
                }

                ISet <string> countsVals;
                if (!groupToCounts.TryGetValue(groupValue, out countsVals))
                {
                    groupToCounts.Add(groupValue, countsVals = new JCG.HashSet <string>());
                }
                countsVals.Add(countValue);

                Document doc = new Document();
                doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
                if (groupValue != null)
                {
                    AddField(doc, groupField, groupValue, dvType);
                }
                if (countValue != null)
                {
                    AddField(doc, countField, countValue, dvType);
                }
                doc.Add(new TextField("content", content, Field.Store.YES));
                w.AddDocument(doc);
            }

            DirectoryReader reader = w.GetReader();

            if (VERBOSE)
            {
                for (int docID = 0; docID < reader.MaxDoc; docID++)
                {
                    Document doc = reader.Document(docID);
                    Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));
                }
            }

            w.Dispose();
            return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/)));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Detect repetition groups. Done once - for first doc. </summary>
        private IList <IList <PhrasePositions> > GatherRptGroups(JCG.LinkedDictionary <Term, int?> rptTerms)
        {
            PhrasePositions[] rpp = RepeatingPPs(rptTerms);
            IList <IList <PhrasePositions> > res = new List <IList <PhrasePositions> >();

            if (!hasMultiTermRpts)
            {
                // simpler - no multi-terms - can base on positions in first doc
                for (int i = 0; i < rpp.Length; i++)
                {
                    PhrasePositions pp = rpp[i];
                    if (pp.rptGroup >= 0) // already marked as a repetition
                    {
                        continue;
                    }
                    int tpPos = TpPos(pp);
                    for (int j = i + 1; j < rpp.Length; j++)
                    {
                        PhrasePositions pp2 = rpp[j];
                        if (pp2.rptGroup >= 0 || pp2.offset == pp.offset || TpPos(pp2) != tpPos) // not a repetition -  not a repetition: two PPs are originally in same offset in the query! -  already marked as a repetition
                        {
                            continue;
                        }
                        // a repetition
                        int g = pp.rptGroup;
                        if (g < 0)
                        {
                            g           = res.Count;
                            pp.rptGroup = g;
                            List <PhrasePositions> rl = new List <PhrasePositions>(2)
                            {
                                pp
                            };
                            res.Add(rl);
                        }
                        pp2.rptGroup = g;
                        res[g].Add(pp2);
                    }
                }
            }
            else
            {
                // more involved - has multi-terms
                IList <JCG.HashSet <PhrasePositions> > tmp = new List <JCG.HashSet <PhrasePositions> >();
                IList <FixedBitSet> bb = PpTermsBitSets(rpp, rptTerms);
                UnionTermGroups(bb);
                IDictionary <Term, int> tg = TermGroups(rptTerms, bb);
                JCG.HashSet <int>       distinctGroupIDs = new JCG.HashSet <int>(tg.Values);
                for (int i = 0; i < distinctGroupIDs.Count; i++)
                {
                    tmp.Add(new JCG.HashSet <PhrasePositions>());
                }
                foreach (PhrasePositions pp in rpp)
                {
                    foreach (Term t in pp.terms)
                    {
                        if (rptTerms.ContainsKey(t))
                        {
                            int g = tg[t];
                            tmp[g].Add(pp);
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(pp.rptGroup == -1 || pp.rptGroup == g);
                            }
                            pp.rptGroup = g;
                        }
                    }
                }
                foreach (JCG.HashSet <PhrasePositions> hs in tmp)
                {
                    res.Add(new List <PhrasePositions>(hs));
                }
            }
            return(res);
        }
        private void doTest(SpatialOperation operation)
        {
            //first show that when there's no data, a query will result in no results
            {
                Query         query         = strategy.MakeQuery(new SpatialArgs(operation, randomRectangle()));
                SearchResults searchResults = executeQuery(query, 1);
                assertEquals(0, searchResults.numFound);
            }

            bool biasContains = (operation == SpatialOperation.Contains);

            //Main index loop:
            IDictionary <String, IShape> indexedShapes   = new JCG.LinkedDictionary <String, IShape>();
            IDictionary <String, IShape> indexedShapesGS = new JCG.LinkedDictionary <String, IShape>();//grid snapped
            int numIndexedShapes = randomIntBetween(1, 6);

#pragma warning disable 219
            bool indexedAtLeastOneShapePair = false;
#pragma warning restore 219
            for (int i = 0; i < numIndexedShapes; i++)
            {
                String id = "" + i;
                IShape indexedShape;
                int    R = Random.nextInt(12);
                if (R == 0)
                {//1 in 12
                    indexedShape = null;
                }
                else if (R == 1)
                {                                 //1 in 12
                    indexedShape = randomPoint(); //just one point
                }
                else if (R <= 4)
                {//3 in 12
                 //comprised of more than one shape
                    indexedShape = randomShapePairRect(biasContains);
                    indexedAtLeastOneShapePair = true;
                }
                else
                {
                    indexedShape = randomRectangle();//just one rect
                }

                indexedShapes.Put(id, indexedShape);
                indexedShapesGS.Put(id, gridSnap(indexedShape));

                adoc(id, indexedShape);

                if (Random.nextInt(10) == 0)
                {
                    Commit();//intermediate commit, produces extra segments
                }
            }
            //delete some documents randomly
            IEnumerator <String> idIter = indexedShapes.Keys.ToList().GetEnumerator();
            while (idIter.MoveNext())
            {
                String id = idIter.Current;
                if (Random.nextInt(10) == 0)
                {
                    DeleteDoc(id);
                    //idIter.Remove();
                    indexedShapes.Remove(id);
                    indexedShapesGS.Remove(id);
                }
            }

            Commit();

            //Main query loop:
            int numQueryShapes = AtLeast(20);
            for (int i = 0; i < numQueryShapes; i++)
            {
                int scanLevel = randomInt(grid.MaxLevels);
                ((RecursivePrefixTreeStrategy)strategy).PrefixGridScanLevel = (scanLevel);

                IShape queryShape;
                switch (randomInt(10))
                {
                case 0: queryShape = randomPoint(); break;

                // LUCENE-5549
                //TODO debug: -Dtests.method=testWithin -Dtests.multiplier=3 -Dtests.seed=5F5294CE2E075A3E:AAD2F0F79288CA64
                //        case 1:case 2:case 3:
                //          if (!indexedAtLeastOneShapePair) { // avoids ShapePair.relate(ShapePair), which isn't reliable
                //            queryShape = randomShapePairRect(!biasContains);//invert biasContains for query side
                //            break;
                //          }
                default: queryShape = randomRectangle(); break;
                }
                IShape queryShapeGS = gridSnap(queryShape);

                bool opIsDisjoint = operation == SpatialOperation.IsDisjointTo;

                //Generate truth via brute force:
                // We ensure true-positive matches (if the predicate on the raw shapes match
                //  then the search should find those same matches).
                // approximations, false-positive matches
                ISet <string> expectedIds  = new JCG.LinkedHashSet <string>(); //true-positives
                ISet <string> secondaryIds = new JCG.LinkedHashSet <string>(); //false-positives (unless disjoint)
                foreach (var entry in indexedShapes)
                {
                    string id = entry.Key;
                    IShape indexedShapeCompare = entry.Value;
                    if (indexedShapeCompare == null)
                    {
                        continue;
                    }
                    IShape queryShapeCompare = queryShape;

                    if (operation.Evaluate(indexedShapeCompare, queryShapeCompare))
                    {
                        expectedIds.Add(id);
                        if (opIsDisjoint)
                        {
                            //if no longer intersect after buffering them, for disjoint, remember this
                            indexedShapeCompare = indexedShapesGS[id];
                            queryShapeCompare   = queryShapeGS;
                            if (!operation.Evaluate(indexedShapeCompare, queryShapeCompare))
                            {
                                secondaryIds.Add(id);
                            }
                        }
                    }
                    else if (!opIsDisjoint)
                    {
                        //buffer either the indexed or query shape (via gridSnap) and try again
                        if (operation == SpatialOperation.Intersects)
                        {
                            indexedShapeCompare = indexedShapesGS[id];
                            queryShapeCompare   = queryShapeGS;
                            //TODO Unfortunately, grid-snapping both can result in intersections that otherwise
                            // wouldn't happen when the grids are adjacent. Not a big deal but our test is just a
                            // bit more lenient.
                        }
                        else if (operation == SpatialOperation.Contains)
                        {
                            indexedShapeCompare = indexedShapesGS[id];
                        }
                        else if (operation == SpatialOperation.IsWithin)
                        {
                            queryShapeCompare = queryShapeGS;
                        }
                        if (operation.Evaluate(indexedShapeCompare, queryShapeCompare))
                        {
                            secondaryIds.Add(id);
                        }
                    }
                }

                //Search and verify results
                SpatialArgs args = new SpatialArgs(operation, queryShape);
                if (queryShape is ShapePair)
                {
                    args.DistErrPct = (0.0);//a hack; we want to be more detailed than gridSnap(queryShape)
                }
                Query         query = strategy.MakeQuery(args);
                SearchResults got   = executeQuery(query, 100);
                ISet <String> remainingExpectedIds = new JCG.LinkedHashSet <string>(expectedIds);
                foreach (SearchResult result in got.results)
                {
                    String id      = result.GetId();
                    bool   removed = remainingExpectedIds.Remove(id);
                    if (!removed && (!opIsDisjoint && !secondaryIds.Contains(id)))
                    {
                        fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape);
                    }
                }
                if (opIsDisjoint)
                {
                    remainingExpectedIds.ExceptWith(secondaryIds);
                }
                if (remainingExpectedIds.Any())
                {
                    var iter = remainingExpectedIds.GetEnumerator();
                    iter.MoveNext();
                    String id = iter.Current;
                    fail("Should have matched", id, indexedShapes, indexedShapesGS, queryShape);
                }
            }
        }
Ejemplo n.º 4
0
        private static void InitFromResourceBundle()
        {
            UResourceBundle keyTypeDataRes = UResourceBundle.GetBundleInstance(
                ICUData.IcuBaseName,
                "keyTypeData",
                ICUResourceBundle.IcuDataAssembly);

            GetKeyInfo(keyTypeDataRes.Get("keyInfo"));
            GetTypeInfo(keyTypeDataRes.Get("typeInfo"));

            UResourceBundle keyMapRes  = keyTypeDataRes.Get("keyMap");
            UResourceBundle typeMapRes = keyTypeDataRes.Get("typeMap");

            // alias data is optional
            UResourceBundle typeAliasRes    = null;
            UResourceBundle bcpTypeAliasRes = null;

            try
            {
                typeAliasRes = keyTypeDataRes.Get("typeAlias");
            }
            catch (MissingManifestResourceException)
            {
                // fall through
            }

            try
            {
                bcpTypeAliasRes = keyTypeDataRes.Get("bcpTypeAlias");
            }
            catch (MissingManifestResourceException)
            {
                // fall through
            }

            // iterate through keyMap resource
            using (UResourceBundleEnumerator keyMapItr = keyMapRes.GetEnumerator())
            {
                IDictionary <string, ISet <string> > _Bcp47Keys = new JCG.LinkedDictionary <string, ISet <string> >(); // ICU4N NOTE: As long as we don't delete, Dictionary keeps insertion order the same as LinkedHashMap

                while (keyMapItr.MoveNext())
                {
                    UResourceBundle keyMapEntry = keyMapItr.Current;
                    string          legacyKeyId = keyMapEntry.Key;
                    string          bcpKeyId    = keyMapEntry.GetString();

                    bool hasSameKey = false;
                    if (bcpKeyId.Length == 0)
                    {
                        // Empty value indicates that BCP key is same with the legacy key.
                        bcpKeyId   = legacyKeyId;
                        hasSameKey = true;
                    }
                    ISet <string> _bcp47Types = new JCG.LinkedHashSet <string>();
                    _Bcp47Keys[bcpKeyId] = _bcp47Types.AsReadOnly();

                    bool isTZ = legacyKeyId.Equals("timezone");

                    // reverse type alias map
                    IDictionary <string, ISet <string> > typeAliasMap = null;
                    if (typeAliasRes != null)
                    {
                        UResourceBundle typeAliasResByKey = null;
                        try
                        {
                            typeAliasResByKey = typeAliasRes.Get(legacyKeyId);
                        }
                        catch (MissingManifestResourceException)
                        {
                            // fall through
                        }
                        if (typeAliasResByKey != null)
                        {
                            typeAliasMap = new Dictionary <string, ISet <string> >();
                            using (UResourceBundleEnumerator typeAliasResItr = typeAliasResByKey.GetEnumerator())
                            {
                                while (typeAliasResItr.MoveNext())
                                {
                                    UResourceBundle typeAliasDataEntry = typeAliasResItr.Current;
                                    string          from = typeAliasDataEntry.Key;
                                    string          to   = typeAliasDataEntry.GetString();
                                    if (isTZ)
                                    {
                                        from = from.Replace(':', '/');
                                    }
                                    if (!typeAliasMap.TryGetValue(to, out ISet <string> aliasSet) || aliasSet == null)
                                    {
                                        aliasSet         = new JCG.HashSet <string>();
                                        typeAliasMap[to] = aliasSet;
                                    }
                                    aliasSet.Add(from);
                                }
                            }
                        }
                    }

                    // reverse bcp type alias map
                    IDictionary <string, ISet <string> > bcpTypeAliasMap = null;
                    if (bcpTypeAliasRes != null)
                    {
                        UResourceBundle bcpTypeAliasResByKey = null;
                        try
                        {
                            bcpTypeAliasResByKey = bcpTypeAliasRes.Get(bcpKeyId);
                        }
                        catch (MissingManifestResourceException)
                        {
                            // fall through
                        }
                        if (bcpTypeAliasResByKey != null)
                        {
                            bcpTypeAliasMap = new Dictionary <string, ISet <string> >();
                            using (UResourceBundleEnumerator bcpTypeAliasResItr = bcpTypeAliasResByKey.GetEnumerator())
                            {
                                while (bcpTypeAliasResItr.MoveNext())
                                {
                                    UResourceBundle bcpTypeAliasDataEntry = bcpTypeAliasResItr.Current;
                                    string          from = bcpTypeAliasDataEntry.Key;
                                    string          to   = bcpTypeAliasDataEntry.GetString();
                                    if (!bcpTypeAliasMap.TryGetValue(to, out ISet <string> aliasSet) || aliasSet == null)
                                    {
                                        aliasSet            = new JCG.HashSet <string>();
                                        bcpTypeAliasMap[to] = aliasSet;
                                    }
                                    aliasSet.Add(from);
                                }
                            }
                        }
                    }

                    IDictionary <string, Type> typeDataMap    = new Dictionary <string, Type>();
                    ISet <SpecialType>         specialTypeSet = null;

                    // look up type map for the key, and walk through the mapping data
                    UResourceBundle typeMapResByKey = null;
                    try
                    {
                        typeMapResByKey = typeMapRes.Get(legacyKeyId);
                    }
                    catch (MissingManifestResourceException)
                    {
                        // type map for each key must exist
                        Debug.Assert(false);
                    }
                    if (typeMapResByKey != null)
                    {
                        using (UResourceBundleEnumerator typeMapResByKeyItr = typeMapResByKey.GetEnumerator())
                            while (typeMapResByKeyItr.MoveNext())
                            {
                                UResourceBundle typeMapEntry = typeMapResByKeyItr.Current;
                                string          legacyTypeId = typeMapEntry.Key;
                                string          bcpTypeId    = typeMapEntry.GetString();

                                // special types
                                char first         = legacyTypeId[0];
                                bool isSpecialType = '9' < first && first < 'a' && bcpTypeId.Length == 0;
                                if (isSpecialType)
                                {
                                    if (specialTypeSet == null)
                                    {
                                        specialTypeSet = new JCG.HashSet <SpecialType>();
                                    }
                                    specialTypeSet.Add((SpecialType)Enum.Parse(typeof(SpecialType), legacyTypeId, true));
                                    _bcp47Types.Add(legacyTypeId);
                                    continue;
                                }

                                if (isTZ)
                                {
                                    // a timezone key uses a colon instead of a slash in the resource.
                                    // e.g. America:Los_Angeles
                                    legacyTypeId = legacyTypeId.Replace(':', '/');
                                }

                                bool hasSameType = false;
                                if (bcpTypeId.Length == 0)
                                {
                                    // Empty value indicates that BCP type is same with the legacy type.
                                    bcpTypeId   = legacyTypeId;
                                    hasSameType = true;
                                }
                                _bcp47Types.Add(bcpTypeId);

                                // Note: legacy type value should never be
                                // equivalent to bcp type value of a different
                                // type under the same key. So we use a single
                                // map for lookup.
                                Type t = new Type(legacyTypeId, bcpTypeId);
                                typeDataMap[AsciiUtil.ToLower(legacyTypeId)] = t;
                                if (!hasSameType)
                                {
                                    typeDataMap[AsciiUtil.ToLower(bcpTypeId)] = t;
                                }

                                // Also put aliases in the map
                                if (typeAliasMap != null)
                                {
                                    if (typeAliasMap.TryGetValue(legacyTypeId, out ISet <string> typeAliasSet) && typeAliasSet != null)
                                    {
                                        foreach (string alias in typeAliasSet)
                                        {
                                            typeDataMap[AsciiUtil.ToLower(alias)] = t;
                                        }
                                    }
                                }
                                if (bcpTypeAliasMap != null)
                                {
                                    if (bcpTypeAliasMap.TryGetValue(bcpTypeId, out ISet <string> bcpTypeAliasSet) && bcpTypeAliasSet != null)
                                    {
                                        foreach (string alias in bcpTypeAliasSet)
                                        {
                                            typeDataMap[AsciiUtil.ToLower(alias)] = t;
                                        }
                                    }
                                }
                            }
                    }

                    KeyData keyData = new KeyData(legacyKeyId, bcpKeyId, typeDataMap, specialTypeSet);

                    KEYMAP[AsciiUtil.ToLower(legacyKeyId)] = keyData;
                    if (!hasSameKey)
                    {
                        KEYMAP[AsciiUtil.ToLower(bcpKeyId)] = keyData;
                    }
                }

                BCP47_KEYS = _Bcp47Keys.AsReadOnly();
            }
        }