public virtual void TestMergeRandom()
        {
            PrefixCodedTerms[]   pb       = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)];
            JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>();

            for (int i = 0; i < pb.Length; i++)
            {
                JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
                int nterms = TestUtil.NextInt32(Random, 0, 10000);
                for (int j = 0; j < nterms; j++)
                {
                    Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4));
                    terms.Add(term);
                }
                superSet.UnionWith(terms);

                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
                foreach (Term @ref in terms)
                {
                    b.Add(@ref);
                }
                pb[i] = b.Finish();
            }

            JCG.List <IEnumerator <Term> > subs = new JCG.List <IEnumerator <Term> >();
            for (int i = 0; i < pb.Length; i++)
            {
                subs.Add(pb[i].GetEnumerator());
            }

            IEnumerator <Term> expected = superSet.GetEnumerator();
            IEnumerator <Term> actual   = new MergedEnumerator <Term>(subs.ToArray());

            while (actual.MoveNext())
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, actual.Current);
            }
            Assert.IsFalse(expected.MoveNext());
        }
        /// <summary>
        /// Returns a sorted array containing unique field numbers. </summary>
        private int[] FlushFieldNums()
        {
            JCG.SortedSet <int> fieldNums = new JCG.SortedSet <int>();
            foreach (DocData dd in pendingDocs)
            {
                foreach (FieldData fd in dd.fields)
                {
                    fieldNums.Add(fd.fieldNum);
                }
            }

            int numDistinctFields = fieldNums.Count;

            Debug.Assert(numDistinctFields > 0);
            int bitsRequired = PackedInt32s.BitsRequired(fieldNums.Max);
            int token        = (Math.Min(numDistinctFields - 1, 0x07) << 5) | bitsRequired;

            vectorsStream.WriteByte((byte)(sbyte)token);
            if (numDistinctFields - 1 >= 0x07)
            {
                vectorsStream.WriteVInt32(numDistinctFields - 1 - 0x07);
            }
            PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, fieldNums.Count, bitsRequired, 1);
            foreach (int fieldNum in fieldNums)
            {
                writer.Add(fieldNum);
            }
            writer.Finish();

            int[] fns = new int[fieldNums.Count];
            int   i   = 0;

            foreach (int key in fieldNums)
            {
                fns[i++] = key;
            }
            return(fns);
        }
Beispiel #3
0
        /// <summary>
        /// Tests that all documents up to maxDoc which are *not* in the
        /// expected result set, have an explanation which indicates that
        /// the document does not match
        /// </summary>
        public static void CheckNoMatchExplanations(Query q, string defaultFieldName, IndexSearcher searcher, int[] results)
        {
            string d = q.ToString(defaultFieldName);

            JCG.SortedSet <int?> ignore = new JCG.SortedSet <int?>();
            for (int i = 0; i < results.Length; i++)
            {
                ignore.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture));
            }

            int maxDoc = searcher.IndexReader.MaxDoc;

            for (int doc = 0; doc < maxDoc; doc++)
            {
                if (ignore.Contains(Convert.ToInt32(doc, CultureInfo.InvariantCulture)))
                {
                    continue;
                }

                Explanation exp = searcher.Explain(q, doc);
                Assert.IsNotNull(exp, "Explanation of [[" + d + "]] for #" + doc + " is null");
                Assert.IsFalse(exp.IsMatch, "Explanation of [[" + d + "]] for #" + doc + " doesn't indicate non-match: " + exp.ToString());
            }
        }
Beispiel #4
0
        private void assertPOSToken(TokenStream ts, String term, params String[] tags)
        {
            ts.IncrementToken();
            assertEquals(term, ts.GetAttribute <ICharTermAttribute>().ToString());

            SCG.ISet <String> actual   = new JCG.SortedSet <String>(StringComparer.Ordinal);
            SCG.ISet <String> expected = new JCG.SortedSet <String>(StringComparer.Ordinal);
            foreach (StringBuilder b in ts.GetAttribute <IMorphosyntacticTagsAttribute>().Tags)
            {
                actual.Add(b.ToString());
            }
            foreach (String s in tags)
            {
                expected.Add(s);
            }

            // LUCENENET: Commented out unnecessary extra check
            //if (!expected.Equals(actual))
            //{
            //    Console.WriteLine("Expected:\n" + expected);
            //    Console.WriteLine("Actual:\n" + actual);
            assertEquals(expected, actual, aggressive: false);
            //}
        }
Beispiel #5
0
            private void UpdateNextGroup(int topN, ShardIter <T> shard)
            {
                while (shard.Iter.MoveNext())
                {
                    ISearchGroup <T> group = shard.Next();
                    bool             isNew = !groupsSeen.TryGetValue(group.GroupValue, out MergedGroup <T> mergedGroup) || mergedGroup == null;
                    //System.out.println("    next group=" + (group.groupValue == null ? "null" : ((BytesRef) group.groupValue).utf8ToString()) + " sort=" + Arrays.toString(group.sortValues));

                    if (isNew)
                    {
                        // Start a new group:
                        //System.out.println("      new");
                        mergedGroup = new MergedGroup <T>(group.GroupValue);
                        mergedGroup.MinShardIndex = shard.ShardIndex;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(group.SortValues != null);
                        }
                        mergedGroup.TopValues        = group.SortValues;
                        groupsSeen[group.GroupValue] = mergedGroup;
                        mergedGroup.IsInQueue        = true;
                        queue.Add(mergedGroup);
                    }
                    else if (mergedGroup.IsProcessed)
                    {
                        // This shard produced a group that we already
                        // processed; move on to next group...
                        continue;
                    }
                    else
                    {
                        //System.out.println("      old");
                        bool competes = false;
                        for (int compIDX = 0; compIDX < groupComp.Comparers.Length; compIDX++)
                        {
                            int cmp = groupComp.Reversed[compIDX] * groupComp.Comparers[compIDX].CompareValues(group.SortValues[compIDX],
                                                                                                               mergedGroup.TopValues[compIDX]);
                            if (cmp < 0)
                            {
                                // Definitely competes
                                competes = true;
                                break;
                            }
                            else if (cmp > 0)
                            {
                                // Definitely does not compete
                                break;
                            }
                            else if (compIDX == groupComp.Comparers.Length - 1)
                            {
                                if (shard.ShardIndex < mergedGroup.MinShardIndex)
                                {
                                    competes = true;
                                }
                            }
                        }

                        //System.out.println("      competes=" + competes);

                        if (competes)
                        {
                            // Group's sort changed -- remove & re-insert
                            if (mergedGroup.IsInQueue)
                            {
                                queue.Remove(mergedGroup);
                            }
                            mergedGroup.TopValues     = group.SortValues;
                            mergedGroup.MinShardIndex = shard.ShardIndex;
                            queue.Add(mergedGroup);
                            mergedGroup.IsInQueue = true;
                        }
                    }

                    mergedGroup.Shards.Add(shard);
                    break;
                }

                // Prune un-competitive groups:
                while (queue.Count > topN)
                {
                    MergedGroup <T> group = queue.Max;
                    queue.Remove(group);
                    //System.out.println("PRUNE: " + group);
                    group.IsInQueue = false;
                }
            }
Beispiel #6
0
            /// <summary>
            /// If back plus this arc is competitive then add to queue:
            /// </summary>
            protected virtual void AddIfCompetitive(FSTPath <T> path)
            {
                Debug.Assert(queue != null);

                T cost = fst.Outputs.Add(path.Cost, path.Arc.Output);

                //System.out.println("  addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);

                if (queue.Count == maxQueueDepth)
                {
                    FSTPath <T> bottom = queue.Max;
                    int         comp   = comparer.Compare(cost, bottom.Cost);
                    if (comp > 0)
                    {
                        // Doesn't compete
                        return;
                    }
                    else if (comp == 0)
                    {
                        // Tie break by alpha sort on the input:
                        path.Input.Grow(path.Input.Length + 1);
                        path.Input.Int32s[path.Input.Length++] = path.Arc.Label;
                        int cmp = bottom.Input.CompareTo(path.Input);
                        path.Input.Length--;

                        // We should never see dups:
                        Debug.Assert(cmp != 0);

                        if (cmp < 0)
                        {
                            // Doesn't compete
                            return;
                        }
                    }
                    // Competes
                }
                else
                {
                    // Queue isn't full yet, so any path we hit competes:
                }

                // copy over the current input to the new input
                // and add the arc.label to the end
                Int32sRef newInput = new Int32sRef(path.Input.Length + 1);

                Array.Copy(path.Input.Int32s, 0, newInput.Int32s, 0, path.Input.Length);
                newInput.Int32s[path.Input.Length] = path.Arc.Label;
                newInput.Length = path.Input.Length + 1;
                FSTPath <T> newPath = new FSTPath <T>(cost, path.Arc, newInput);

                queue.Add(newPath);

                if (queue.Count == maxQueueDepth + 1)
                {
                    // LUCENENET NOTE: SortedSet doesn't have atomic operations,
                    // so we need to add some thread safety just in case.
                    // Perhaps it might make sense to wrap SortedSet into a type
                    // that provides thread safety.
                    lock (syncLock)
                    {
                        queue.Remove(queue.Max);
                    }
                }
            }
        private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument)
        {
            Random random    = Random;
            int    numDocs   = TestUtil.NextInt32(random, 138, 1145) * RandomMultiplier;
            int    numGroups = TestUtil.NextInt32(random, 1, numDocs / 4);
            int    numFacets = TestUtil.NextInt32(random, 1, numDocs / 6);

            if (Verbose)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
            }

            List <string> groups = new List <string>();

            for (int i = 0; i < numGroups; i++)
            {
                groups.Add(GenerateRandomNonEmptyString());
            }
            List <string> facetValues = new List <string>();

            for (int i = 0; i < numFacets; i++)
            {
                facetValues.Add(GenerateRandomNonEmptyString());
            }
            string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)];
            if (Verbose)
            {
                Console.WriteLine("TEST: create fake content");
            }
            for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++)
            {
                contentBrs[contentIDX] = GenerateRandomNonEmptyString();
                if (Verbose)
                {
                    Console.WriteLine("  content=" + contentBrs[contentIDX]);
                }
            }

            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(
                    TEST_VERSION_CURRENT,
                    new MockAnalyzer(random)
                    )
                );
            bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            bool useDv    = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();

            Document doc               = new Document();
            Document docNoGroup        = new Document();
            Document docNoFacet        = new Document();
            Document docNoGroupNoFacet = new Document();
            Field    group             = NewStringField("group", "", Field.Store.NO);
            Field    groupDc           = new SortedDocValuesField("group_dv", new BytesRef());

            if (useDv)
            {
                doc.Add(groupDc);
                docNoFacet.Add(groupDc);
            }
            doc.Add(group);
            docNoFacet.Add(group);
            Field[] facetFields;
            if (useDv)
            {
                Debug.Assert(!multipleFacetValuesPerDocument);
                facetFields    = new Field[2];
                facetFields[0] = NewStringField("facet", "", Field.Store.NO);
                doc.Add(facetFields[0]);
                docNoGroup.Add(facetFields[0]);
                facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
                doc.Add(facetFields[1]);
                docNoGroup.Add(facetFields[1]);
            }
            else
            {
                facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
                for (int i = 0; i < facetFields.Length; i++)
                {
                    facetFields[i] = NewStringField("facet", "", Field.Store.NO);
                    doc.Add(facetFields[i]);
                    docNoGroup.Add(facetFields[i]);
                }
            }
            Field content = NewStringField("content", "", Field.Store.NO);

            doc.Add(content);
            docNoGroup.Add(content);
            docNoFacet.Add(content);
            docNoGroupNoFacet.Add(content);

            ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => {
                if (a == b)
                {
                    return(0);
                }
                else if (a == null)
                {
                    return(-1);
                }
                else if (b == null)
                {
                    return(1);
                }
                else
                {
                    return(a.CompareToOrdinal(b));
                }
            }));

            // LUCENENET NOTE: Need JCG.Dictionary here because of null keys
            IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >();
            int facetWithMostGroups = 0;

            for (int i = 0; i < numDocs; i++)
            {
                string groupValue;
                if (random.nextInt(24) == 17)
                {
                    // So we test the "doc doesn't have the group'd
                    // field" case:
                    if (useDv)
                    {
                        groupValue = "";
                    }
                    else
                    {
                        groupValue = null;
                    }
                }
                else
                {
                    groupValue = groups[random.nextInt(groups.size())];
                }

                string contentStr = contentBrs[random.nextInt(contentBrs.Length)];
                if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups))
                {
                    searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >();
                }

                List <string> facetVals = new List <string>();
                if (useDv || random.nextInt(24) != 18)
                {
                    if (useDv)
                    {
                        string facetValue = facetValues[random.nextInt(facetValues.size())];
                        uniqueFacetValues.Add(facetValue);
                        if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                        {
                            facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                        }
                        groupsInFacet.add(groupValue);
                        if (groupsInFacet.size() > facetWithMostGroups)
                        {
                            facetWithMostGroups = groupsInFacet.size();
                        }
                        facetFields[0].SetStringValue(facetValue);
                        facetFields[1].SetBytesValue(new BytesRef(facetValue));
                        facetVals.Add(facetValue);
                    }
                    else
                    {
                        foreach (Field facetField in facetFields)
                        {
                            string facetValue = facetValues[random.nextInt(facetValues.size())];
                            uniqueFacetValues.Add(facetValue);
                            if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                            {
                                facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                            }
                            groupsInFacet.add(groupValue);
                            if (groupsInFacet.size() > facetWithMostGroups)
                            {
                                facetWithMostGroups = groupsInFacet.size();
                            }
                            facetField.SetStringValue(facetValue);
                            facetVals.Add(facetValue);
                        }
                    }
                }
                else
                {
                    uniqueFacetValues.Add(null);
                    if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet))
                    {
                        facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>();
                    }
                    groupsInFacet.add(groupValue);
                    if (groupsInFacet.size() > facetWithMostGroups)
                    {
                        facetWithMostGroups = groupsInFacet.size();
                    }
                }

                if (Verbose)
                {
                    Console.WriteLine("  doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + Collections.ToString(facetVals));
                }

                if (groupValue != null)
                {
                    if (useDv)
                    {
                        groupDc.SetBytesValue(new BytesRef(groupValue));
                    }
                    group.SetStringValue(groupValue);
                }
                else if (useDv)
                {
                    // DV cannot have missing values:
                    groupDc.SetBytesValue(new BytesRef());
                }
                content.SetStringValue(contentStr);
                if (groupValue == null && facetVals.Count == 0)
                {
                    writer.AddDocument(docNoGroupNoFacet);
                }
                else if (facetVals.Count == 0)
                {
                    writer.AddDocument(docNoFacet);
                }
                else if (groupValue == null)
                {
                    writer.AddDocument(docNoGroup);
                }
                else
                {
                    writer.AddDocument(doc);
                }
            }

            DirectoryReader reader = writer.GetReader();

            writer.Dispose();

            return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv));
        }
Beispiel #8
0
        public virtual void TestIntersectRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            ISet <string>               terms        = new JCG.HashSet <string>();
            ICollection <string>        pendingTerms = new JCG.List <string>();
            IDictionary <BytesRef, int> termToID     = new Dictionary <BytesRef, int>();
            int id = 0;

            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random.Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]      termsArray = new BytesRef[terms.Count];
            ISet <BytesRef> termsSet   = new JCG.HashSet <BytesRef>();

            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (Verbose)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RandomMultiplier; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                ISet <string>            acceptTerms       = new JCG.HashSet <string>();
                JCG.SortedSet <BytesRef> sortedAcceptTerms = new JCG.SortedSet <BytesRef>();
                double    keepPct = Random.NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random.NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random.NextBoolean())
                {
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]      acceptTermsArray = new BytesRef[acceptTerms.Count];
                ISet <BytesRef> acceptTermsSet   = new JCG.HashSet <BytesRef>();
                int             upto             = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (Verbose)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)];

                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm is null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm is null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        Assert.IsTrue(te.MoveNext());
                        BytesRef actual = te.Term;
                        if (Verbose)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual is null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq);
                        docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsFalse(te.MoveNext());
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void Collect(int doc)
        {
            //System.out.println("FP.collect doc=" + doc);

            // If orderedGroups != null we already have collected N groups and
            // can short circuit by comparing this document to the bottom group,
            // without having to find what group this document belongs to.

            // Even if this document belongs to a group in the top N, we'll know that
            // we don't have to update that group.

            // Downside: if the number of unique groups is very low, this is
            // wasted effort as we will most likely be updating an existing group.
            if (m_orderedGroups != null)
            {
                for (int compIDX = 0; ; compIDX++)
                {
                    int c = reversed[compIDX] * comparers[compIDX].CompareBottom(doc);
                    if (c < 0)
                    {
                        // Definitely not competitive. So don't even bother to continue
                        return;
                    }
                    else if (c > 0)
                    {
                        // Definitely competitive.
                        break;
                    }
                    else if (compIDX == compIDXEnd)
                    {
                        // Here c=0. If we're at the last comparer, this doc is not
                        // competitive, since docs are visited in doc Id order, which means
                        // this doc cannot compete with any other document in the queue.
                        return;
                    }
                }
            }

            // TODO: should we add option to mean "ignore docs that
            // don't have the group field" (instead of stuffing them
            // under null group)?
            TGroupValue groupValue = GetDocGroupValue(doc);

            if (!groupMap.TryGetValue(groupValue, out CollectedSearchGroup <TGroupValue> group))
            {
                // First time we are seeing this group, or, we've seen
                // it before but it fell out of the top N and is now
                // coming back

                if (groupMap.Count < topNGroups)
                {
                    // Still in startup transient: we have not
                    // seen enough unique groups to start pruning them;
                    // just keep collecting them

                    // Add a new CollectedSearchGroup:
                    CollectedSearchGroup <TGroupValue> sg = new CollectedSearchGroup <TGroupValue>();
                    sg.GroupValue   = CopyDocGroupValue(groupValue, default);
                    sg.ComparerSlot = groupMap.Count;
                    sg.TopDoc       = docBase + doc;
                    foreach (FieldComparer fc in comparers)
                    {
                        fc.Copy(sg.ComparerSlot, doc);
                    }
                    groupMap[sg.GroupValue] = sg;

                    if (groupMap.Count == topNGroups)
                    {
                        // End of startup transient: we now have max
                        // number of groups; from here on we will drop
                        // bottom group when we insert new one:
                        BuildSortedSet();
                    }

                    return;
                }

                // We already tested that the document is competitive, so replace
                // the bottom group with this new group.
                //CollectedSearchGroup<TGroupValue> bottomGroup = orderedGroups.PollLast();
                CollectedSearchGroup <TGroupValue> bottomGroup;
                UninterruptableMonitor.Enter(m_orderedGroups);
                try
                {
                    bottomGroup = m_orderedGroups.Last();
                    m_orderedGroups.Remove(bottomGroup);
                }
                finally
                {
                    UninterruptableMonitor.Exit(m_orderedGroups);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups - 1);
                }

                groupMap.Remove(bottomGroup.GroupValue);

                // reuse the removed CollectedSearchGroup
                bottomGroup.GroupValue = CopyDocGroupValue(groupValue, bottomGroup.GroupValue);
                bottomGroup.TopDoc     = docBase + doc;

                foreach (FieldComparer fc in comparers)
                {
                    fc.Copy(bottomGroup.ComparerSlot, doc);
                }

                groupMap[bottomGroup.GroupValue] = bottomGroup;
                m_orderedGroups.Add(bottomGroup);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups);
                }

                int lastComparerSlot = m_orderedGroups.Last().ComparerSlot;
                foreach (FieldComparer fc in comparers)
                {
                    fc.SetBottom(lastComparerSlot);
                }

                return;
            }

            // Update existing group:
            for (int compIDX = 0; ; compIDX++)
            {
                FieldComparer fc = comparers[compIDX];
                fc.Copy(spareSlot, doc);

                int c = reversed[compIDX] * fc.Compare(group.ComparerSlot, spareSlot);
                if (c < 0)
                {
                    // Definitely not competitive.
                    return;
                }
                else if (c > 0)
                {
                    // Definitely competitive; set remaining comparers:
                    for (int compIDX2 = compIDX + 1; compIDX2 < comparers.Length; compIDX2++)
                    {
                        comparers[compIDX2].Copy(spareSlot, doc);
                    }
                    break;
                }
                else if (compIDX == compIDXEnd)
                {
                    // Here c=0. If we're at the last comparer, this doc is not
                    // competitive, since docs are visited in doc Id order, which means
                    // this doc cannot compete with any other document in the queue.
                    return;
                }
            }

            // Remove before updating the group since lookup is done via comparers
            // TODO: optimize this

            CollectedSearchGroup <TGroupValue> prevLast;

            if (m_orderedGroups != null)
            {
                UninterruptableMonitor.Enter(m_orderedGroups);
                try
                {
                    prevLast = m_orderedGroups.Last();
                    m_orderedGroups.Remove(group);
                }
                finally
                {
                    UninterruptableMonitor.Exit(m_orderedGroups);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups - 1);
                }
            }
            else
            {
                prevLast = null;
            }

            group.TopDoc = docBase + doc;

            // Swap slots
            int tmp = spareSlot;

            spareSlot          = group.ComparerSlot;
            group.ComparerSlot = tmp;

            // Re-add the changed group
            if (m_orderedGroups != null)
            {
                m_orderedGroups.Add(group);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups);
                }
                var newLast = m_orderedGroups.Last();
                // If we changed the value of the last group, or changed which group was last, then update bottom:
                if (group == newLast || prevLast != newLast)
                {
                    foreach (FieldComparer fc in comparers)
                    {
                        fc.SetBottom(newLast.ComparerSlot);
                    }
                }
            }
        }
Beispiel #10
0
        /// <summary>
        /// Internal constructor, only used by <see cref="InternalLocaleBuilder"/>.
        /// </summary>
        internal LocaleExtensions(IDictionary <CaseInsensitiveChar, string> extensions,
                                  ISet <CaseInsensitiveString> uattributes, IDictionary <CaseInsensitiveString, string> ukeywords)
        {
            bool hasExtension   = (extensions != null && extensions.Count > 0);
            bool hasUAttributes = (uattributes != null && uattributes.Count > 0);
            bool hasUKeywords   = (ukeywords != null && ukeywords.Count > 0);

            if (!hasExtension && !hasUAttributes && !hasUKeywords)
            {
                _map = EmptyMap;
                _id  = "";
                return;
            }

            // Build extension map
            _map = new JCG.SortedDictionary <char, Extension>();
            if (hasExtension)
            {
                foreach (var ext in extensions)
                {
                    char   key   = AsciiUtil.ToLower(ext.Key.Value);
                    string value = ext.Value;

                    if (LanguageTag.IsPrivateusePrefixChar(key))
                    {
                        // we need to exclude special variant in privuateuse, e.g. "x-abc-lvariant-DEF"
                        value = InternalLocaleBuilder.RemovePrivateuseVariant(value);
                        if (value == null)
                        {
                            continue;
                        }
                    }

                    Extension e = new Extension(key, AsciiUtil.ToLower(value));
                    _map[key] = e;
                }
            }

            if (hasUAttributes || hasUKeywords)
            {
                JCG.SortedSet <string> uaset = null;
                JCG.SortedDictionary <string, string> ukmap = null;

                if (hasUAttributes)
                {
                    uaset = new JCG.SortedSet <string>(StringComparer.Ordinal);
                    foreach (CaseInsensitiveString cis in uattributes)
                    {
                        uaset.Add(AsciiUtil.ToLower(cis.Value));
                    }
                }

                if (hasUKeywords)
                {
                    ukmap = new JCG.SortedDictionary <string, string>(StringComparer.Ordinal);
                    foreach (var kwd in ukeywords)
                    {
                        string key  = AsciiUtil.ToLower(kwd.Key.Value);
                        string type = AsciiUtil.ToLower(kwd.Value);
                        ukmap[key] = type;
                    }
                }

                UnicodeLocaleExtension ule = new UnicodeLocaleExtension(uaset, ukmap);
                _map[UnicodeLocaleExtension.Singleton] = ule;
            }

            if (_map.Count == 0)
            {
                // this could happen when only privuateuse with special variant
                _map = EmptyMap;
                _id  = "";
            }
            else
            {
                _id = ToID(_map);
            }
        }