Esempio n. 1
0
        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            numIterations = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) ? 10 * RandomMultiplier : AtLeast(50);
            dir           = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));
            Document          doc    = new Document();
            Field             field  = NewStringField("field", "", Field.Store.YES);

            doc.Add(field);
            terms = new JCG.SortedSet <BytesRef>();

            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random);
                field.SetStringValue(s);
                terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            termsAutomaton = BasicAutomata.MakeStringUnion(terms);

            reader   = writer.GetReader();
            searcher = NewSearcher(reader);
            writer.Dispose();
        }
Esempio n. 2
0
        /// <summary>
        /// Tests that a query matches the an expected set of documents using Hits.
        ///
        /// <para>Note that when using the Hits API, documents will only be returned
        /// if they have a positive normalized score.
        /// </para>
        /// </summary>
        /// <param name="luceneTestCase"> The current test instance. </param>
        /// <param name="query"> the query to test </param>
        /// <param name="searcher"> the searcher to test the query against </param>
        /// <param name="defaultFieldName"> used for displaing the query in assertion messages </param>
        /// <param name="results"> a list of documentIds that must match the query </param>
        /// <seealso cref="CheckHitCollector(LuceneTestCase, Random, Query, string, IndexSearcher, int[])"/>
        // LUCENENET specific
        // Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/>
        public static void DoCheckHits(LuceneTestCase luceneTestCase, Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results)
#endif
        {
            ScoreDoc[] hits = searcher.Search(query, 1000).ScoreDocs;

            JCG.SortedSet <int> correct = new JCG.SortedSet <int>();
            for (int i = 0; i < results.Length; i++)
            {
                correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture));
            }

            JCG.SortedSet <int> actual = new JCG.SortedSet <int>();
            for (int i = 0; i < hits.Length; i++)
            {
                actual.Add(Convert.ToInt32(hits[i].Doc, CultureInfo.InvariantCulture));
            }

            Assert.AreEqual(correct, actual, aggressive: false, () => query.ToString(defaultFieldName));

            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                luceneTestCase,
#endif
                random, query, searcher, LuceneTestCase.Rarely(random));
        }
Esempio n. 3
0
        public static ISet <Type> LoadTypesSubclassing(Type baseClass, params Assembly[] assemblies)
        {
            if (baseClass is null)
            {
                throw new ArgumentNullException(nameof(baseClass));
            }
            if (assemblies is null)
            {
                throw new ArgumentNullException(nameof(assemblies));
            }

            var result = new JCG.SortedSet <Type>(Comparer <Type> .Create((left, right) => left.FullName.CompareToOrdinal(right.FullName)));

            foreach (var assembly in assemblies)
            {
                foreach (var type in assembly.GetTypes())
                {
                    if (baseClass.IsAssignableFrom(type))
                    {
                        result.Add(type);
                    }
                }
            }
            return(result);
        }
Esempio n. 4
0
        /// <summary>
        /// Tests that a query matches the an expected set of documents using a
        /// HitCollector.
        /// <para>
        /// Note that when using the HitCollector API, documents will be collected
        /// if they "match" regardless of what their score is.
        /// </para>
        /// </summary>
        /// <param name="luceneTestCase"> The current test instance. </param>
        /// <param name="query"> The query to test. </param>
        /// <param name="searcher"> The searcher to test the query against. </param>
        /// <param name="defaultFieldName"> Used for displaying the query in assertion messages. </param>
        /// <param name="results"> A list of documentIds that must match the query. </param>
        /// <seealso cref="DoCheckHits(LuceneTestCase, Random, Query, string, IndexSearcher, int[])"/>
        // LUCENENET specific
        // Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/>
        public static void CheckHitCollector(LuceneTestCase luceneTestCase, Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results)
#endif
        {
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                luceneTestCase,
#endif
                random, query, searcher);

            Trace.TraceInformation("Checked");

            JCG.SortedSet <int> correct = new JCG.SortedSet <int>();
            for (int i = 0; i < results.Length; i++)
            {
                correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture));
            }
            JCG.SortedSet <int> actual = new JCG.SortedSet <int>();
            ICollector          c      = new SetCollector(actual);

            searcher.Search(query, c);

            Assert.AreEqual(correct, actual, aggressive: false, () => "Simple: " + query.ToString(defaultFieldName));

            for (int i = -1; i < 2; i++)
            {
                actual.Clear();
                IndexSearcher s = QueryUtils.WrapUnderlyingReader(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    luceneTestCase,
#endif
                    random, searcher, i);
                s.Search(query, c);
                Assert.AreEqual(correct, actual, aggressive: false, () => "Wrap Reader " + i + ": " + query.ToString(defaultFieldName));
            }
        }
Esempio n. 5
0
        internal UnicodeLocaleExtension(JCG.SortedSet <string> attributes, JCG.SortedDictionary <string, string> keywords)
            : this()
        {
            if (attributes != null && attributes.Count > 0)
            {
                _attributes = attributes;
            }
            if (keywords != null && keywords.Count > 0)
            {
                _keywords = keywords;
            }

            if (_attributes.Count > 0 || _keywords.Count > 0)
            {
                StringBuilder sb = new StringBuilder();
                foreach (string attribute in _attributes)
                {
                    sb.Append(LanguageTag.Separator).Append(attribute);
                }
                foreach (var keyword in _keywords)
                {
                    string key   = keyword.Key;
                    string value = keyword.Value;

                    sb.Append(LanguageTag.Separator).Append(key);
                    if (value.Length > 0)
                    {
                        sb.Append(LanguageTag.Separator).Append(value);
                    }
                }
                m_value = sb.ToString(1, sb.Length - 1);   // skip leading '-'
            }
        }
Esempio n. 6
0
        public SpanWeight(SpanQuery query, IndexSearcher searcher)
        {
            this.m_similarity = searcher.Similarity;
            this.m_query      = query;

            m_termContexts = new Dictionary <Term, TermContext>();
            ISet <Term> terms = new JCG.SortedSet <Term>();

            query.ExtractTerms(terms);
            IndexReaderContext context = searcher.TopReaderContext;

            TermStatistics[] termStats = new TermStatistics[terms.Count];
            int i = 0;

            foreach (Term term in terms)
            {
                TermContext state = TermContext.Build(context, term);
                termStats[i]         = searcher.TermStatistics(term, state);
                m_termContexts[term] = state;
                i++;
            }
            string field = query.Field;

            if (field != null)
            {
                m_stats = m_similarity.ComputeWeight(query.Boost, searcher.CollectionStatistics(query.Field), termStats);
            }
        }
        public virtual void TestRandom()
        {
            JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
            int nterms = AtLeast(10000);

            for (int i = 0; i < nterms; i++)
            {
                Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random));
                terms.Add(term);
            }

            PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
            foreach (Term @ref in terms)
            {
                b.Add(@ref);
            }
            PrefixCodedTerms pb = b.Finish();

            IEnumerator <Term> expected = terms.GetEnumerator();

            foreach (Term t in pb)
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, t);
            }
            Assert.IsFalse(expected.MoveNext());
        }
Esempio n. 8
0
        private void GetPayloads(ICollection <byte[]> payloads, SpanQuery query)
        {
            IDictionary <Term, TermContext> termContexts = new Dictionary <Term, TermContext>();
            var terms = new JCG.SortedSet <Term>();

            query.ExtractTerms(terms);
            foreach (Term term in terms)
            {
                termContexts[term] = TermContext.Build(context, term);
            }
            foreach (AtomicReaderContext atomicReaderContext in context.Leaves)
            {
                Spans spans = query.GetSpans(atomicReaderContext, atomicReaderContext.AtomicReader.LiveDocs, termContexts);
                while (spans.MoveNext() == true)
                {
                    if (spans.IsPayloadAvailable)
                    {
                        var payload = spans.GetPayload();
                        foreach (var bytes in payload)
                        {
                            payloads.Add(bytes);
                        }
                    }
                }
            }
        }
Esempio n. 9
0
 public GroupedFacetResult(int size, int minCount, bool orderByCount, int totalCount, int totalMissingCount)
 {
     this.facetEntries      = new JCG.SortedSet <FacetEntry>(orderByCount ? orderByCountAndValue : orderByValue);
     this.totalMissingCount = totalMissingCount;
     this.totalCount        = totalCount;
     maxSize    = size;
     currentMin = minCount;
 }
Esempio n. 10
0
        /// <summary>
        /// Expert: specify a custom maximum possible symbol
        /// (alphaMax); default is <see cref="Character.MaxCodePoint"/>.
        /// </summary>
        public LevenshteinAutomata(int[] word, int alphaMax, bool withTranspositions)
        {
            this.word     = word;
            this.alphaMax = alphaMax;

            // calculate the alphabet
            ISet <int> set = new JCG.SortedSet <int>();

            for (int i = 0; i < word.Length; i++)
            {
                int v = word[i];
                if (v > alphaMax)
                {
                    throw new ArgumentException("alphaMax exceeded by symbol " + v + " in word");
                }
                set.Add(v);
            }
            alphabet = new int[set.Count];
            using (IEnumerator <int> iterator = set.GetEnumerator())
            {
                for (int i = 0; i < alphabet.Length; i++)
                {
                    iterator.MoveNext();
                    alphabet[i] = iterator.Current;
                }
            }

            rangeLower = new int[alphabet.Length + 2];
            rangeUpper = new int[alphabet.Length + 2];
            // calculate the unicode range intervals that exclude the alphabet
            // these are the ranges for all unicode characters not in the alphabet
            int lower = 0;

            for (int i = 0; i < alphabet.Length; i++)
            {
                int higher = alphabet[i];
                if (higher > lower)
                {
                    rangeLower[numRanges] = lower;
                    rangeUpper[numRanges] = higher - 1;
                    numRanges++;
                }
                lower = higher + 1;
            }
            /* add the final endpoint */
            if (lower <= alphaMax)
            {
                rangeLower[numRanges] = lower;
                rangeUpper[numRanges] = alphaMax;
                numRanges++;
            }

            descriptions = new ParametricDescription[] {
                null,
                withTranspositions ? (ParametricDescription) new Lev1TParametricDescription(word.Length) : new Lev1ParametricDescription(word.Length),
                withTranspositions ? (ParametricDescription) new Lev2TParametricDescription(word.Length) : new Lev2ParametricDescription(word.Length)
            };
        }
Esempio n. 11
0
            internal int[] fDtran;                      // Transitions out of this state.
                                                        //   indexed by input character
                                                        //   contents is int index of dest state
                                                        //   in RBBITableBuilder.fDStates

            internal RBBIStateDescriptor(int maxInputSymbol)
            {
                fTagVals   = new JCG.SortedSet <int>();
                fPositions = new JCG.HashSet <RBBINode>();
                fDtran     = new int[maxInputSymbol + 1]; // fDtran needs to be pre-sized.
                                                          //   It is indexed by input symbols, and will
                                                          //   hold  the next state number for each
                                                          //   symbol.
            }
Esempio n. 12
0
        //-----------------------------------------------------------------------------
        //
        //  mergeRuleStatusVals
        //
        //      Allocate positions in the  global array of rule status {tag} values
        //
        //      The RBBI runtime uses an array of {sets of status values} that can
        //      be returned for boundaries.  Each accepting state that has non-zero
        //      status includes an index into this array.  The format of the array
        //      is
        //           Num of status values in group 1
        //              status val
        //              status val
        //              ...
        //           Num of status vals in group 2
        //              status val
        //              status val
        //              ...
        //           etc.
        //
        //
        //-----------------------------------------------------------------------------

        internal virtual void MergeRuleStatusVals()
        {
            //
            //  The basic outline of what happens here is this...
            //
            //    for each state in this state table
            //       if the status tag list for this state is in the global statuses list
            //           record where and
            //           continue with the next state
            //       else
            //           add the tag list for this state to the global list.
            //
            int n;

            // Pre-load a single tag of {0} into the table.
            //   We will need this as a default, for rule sets with no explicit tagging,
            //   or with explicit tagging of {0}.
            if (fRB.fRuleStatusVals.Count == 0)
            {
                fRB.fRuleStatusVals.Add(1);    // Num of statuses in group
                fRB.fRuleStatusVals.Add(0);    //   and our single status of zero

                ISet <int> s0    = new JCG.SortedSet <int>();
                int        izero = 0;
                fRB.fStatusSets[s0] = izero;
                ISet <int> s1 = new JCG.SortedSet <int>();
                s1.Add(izero);
                fRB.fStatusSets[s0] = izero;
            }

            //    For each state, check whether the state's status tag values are
            //       already entered into the status values array, and add them if not.
            for (n = 0; n < fDStates.Count; n++)
            {
                RBBIStateDescriptor sd         = fDStates[n];
                ISet <int>          statusVals = sd.fTagVals;
                int?arrayIndexI = fRB.fStatusSets.Get(statusVals);
                if (arrayIndexI == null)
                {
                    // This is the first encounter of this set of status values.
                    //   Add them to the statusSets map, This map associates
                    //   the set of status values with an index in the runtime status
                    //   values array.
                    arrayIndexI = fRB.fRuleStatusVals.Count;
                    fRB.fStatusSets[statusVals] = arrayIndexI;

                    // Add the new set of status values to the vector of values that
                    //   will eventually become the array used by the runtime engine.
                    fRB.fRuleStatusVals.Add(statusVals.Count);
                    fRB.fRuleStatusVals.AddRange(statusVals);
                }

                // Save the runtime array index back into the state descriptor.
                sd.fTagsIdx = arrayIndexI.Value; // ICU4N NOTE: At this pint the value cannot be null
            }
        }
Esempio n. 13
0
            /// <summary>
            /// Creates an unbounded TopNSearcher </summary>
            /// <param name="fst"> the <see cref="Lucene.Net.Util.Fst.FST{T}"/> to search on </param>
            /// <param name="topN"> the number of top scoring entries to retrieve </param>
            /// <param name="maxQueueDepth"> the maximum size of the queue of possible top entries </param>
            /// <param name="comparer"> the comparer to select the top N </param>
            public TopNSearcher(FST <T> fst, int topN, int maxQueueDepth, IComparer <T> comparer)
            {
                this.fst           = fst;
                this.bytesReader   = fst.GetBytesReader();
                this.topN          = topN;
                this.maxQueueDepth = maxQueueDepth;
                this.comparer      = comparer;

                queue = new JCG.SortedSet <FSTPath <T> >(new TieBreakByInputComparer <T>(comparer));
            }
Esempio n. 14
0
        private void BuildSortedSet()
        {
            var comparer = new BuildSortedSetComparer(this);

            m_orderedGroups = new JCG.SortedSet <CollectedSearchGroup <TGroupValue> >(comparer);
            m_orderedGroups.UnionWith(groupMap.Values);
            Debug.Assert(m_orderedGroups.Count > 0);

            foreach (FieldComparer fc in comparers)
            {
                fc.SetBottom(m_orderedGroups.Last().ComparerSlot);
            }
        }
Esempio n. 15
0
        private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, int length)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.ToString());

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);

            // deduplicate
            JCG.SortedSet <BytesRef> dictionary = new JCG.SortedSet <BytesRef>();
            foreach (BytesRef v in values)
            {
                dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
            }

            /* values */
            data.WriteInt32(length);
            foreach (BytesRef v in dictionary)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
            }

            /* ordinals */
            int valueCount = dictionary.Count;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(valueCount > 0);
            }
            index.WriteInt32(valueCount);
            int maxDoc = state.SegmentInfo.DocCount;

            PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT);

            BytesRef brefDummy;

            foreach (BytesRef v in values)
            {
                brefDummy = v;

                if (v == null)
                {
                    brefDummy = new BytesRef();
                }
                //int ord = dictionary.HeadSet(brefDummy).Size();
                int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0);
                w.Add(ord);
            }
            w.Finish();
        }
Esempio n. 16
0
        /**
         * @param me
         * @param stayWithMe
         */
        private void checkEquals(UnicodeMap <Integer> me, JCG.SortedDictionary <String, Integer> stayWithMe)
        {
            temp.Clear();
            foreach (var e in me.EntrySet())
            {
                temp.Add(e);
            }
            ISet <KeyValuePair <String, Integer> > entrySet = new JCG.HashSet <KeyValuePair <string, Integer> >(stayWithMe);

            if (!entrySet.SetEquals(temp))
            {
                Logln(me.EntrySet().ToString());
                Logln(me.ToString());
                assertEquals("are in parallel", entrySet, temp);
                // we failed. Reset and start again
                entrySet.Clear();
                temp.Clear();
                return;
            }
            // ICU4N: looping through pairs instead of doing explicity table lookups is much faster
            foreach (var pair in stayWithMe)
            {
                assertEquals("containsKey", stayWithMe.ContainsKey(pair.Key), me.ContainsKey(pair.Key));
                Integer value = pair.Value;
                assertEquals("get", value, me.Get(pair.Key));
                assertEquals("containsValue", stayWithMe.ContainsValue(value), me.ContainsValue(value));
                int cp = UnicodeSet.GetSingleCodePoint(pair.Key);
                if (cp != int.MaxValue)
                {
                    assertEquals("get", value, me.Get(cp));
                }
            }
            // ICU4N TODO: complete implementation
            //ISet<String> nonCodePointStrings = stayWithMe.tailMap("").keySet();
            //if (nonCodePointStrings.Count == 0) nonCodePointStrings = null; // for parallel api
            //assertEquals("getNonRangeStrings", nonCodePointStrings, me.GetNonRangeStrings());

            ISet <Integer> values   = new JCG.SortedSet <Integer>(stayWithMe.Values);
            ISet <Integer> myValues = new JCG.SortedSet <Integer>(me.Values());

            assertEquals("values", myValues, values);

            foreach (String key in stayWithMe.Keys)
            {
                assertEquals("containsKey", stayWithMe.ContainsKey(key), me.ContainsKey(key));
            }
        }
Esempio n. 17
0
        private void getEntries(String title, ICollection <KeyValuePair <Integer, String> > m1entries, ICollection <KeyValuePair <Integer, String> > m2entries, StringBuilder buffer, int limit)
        {
            ISet <KeyValuePair <Integer, String> > m1_m2 = new JCG.SortedSet <KeyValuePair <Integer, String> >(ENTRY_COMPARATOR);

            m1_m2.UnionWith(m1entries);
            m1_m2.ExceptWith(m2entries);
            buffer.Append(title + ": " + m1_m2.Count + "\r\n");
            foreach (var entry in m1_m2)
            {
                if (limit-- < 0)
                {
                    return;
                }
                buffer.Append(entry.Key).Append(" => ")
                .Append(entry.Value).Append("\r\n");
            }
        }
Esempio n. 18
0
        public static Spans Wrap(IndexReaderContext topLevelReaderContext, SpanQuery query)
        {
            IDictionary <Term, TermContext> termContexts = new Dictionary <Term, TermContext>();

            JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
            query.ExtractTerms(terms);
            foreach (Term term in terms)
            {
                termContexts[term] = TermContext.Build(topLevelReaderContext, term);
            }
            IList <AtomicReaderContext> leaves = topLevelReaderContext.Leaves;

            if (leaves.Count == 1)
            {
                AtomicReaderContext ctx = leaves[0];
                return(query.GetSpans(ctx, ((AtomicReader)ctx.Reader).LiveDocs, termContexts));
            }
            return(new MultiSpansWrapper(leaves, query, termContexts));
        }
Esempio n. 19
0
        public static bool VerifySetsIdentical(AbstractTestLog here, ISet <T> values1, ISet <T> values2)
        {
            if (SetEqualityComparer <T> .Aggressive.Equals(values1, values2))
            {
                return(true);
            }
            ISet <T> temp;

            TestFmwk.Errln("Values differ:");
            TestFmwk.Errln("UnicodeMap - HashMap");
            temp = new JCG.SortedSet <T>(values1, JCG.Comparer <T> .Default);
            temp.ExceptWith(values2);
            TestFmwk.Errln(Show(temp));
            TestFmwk.Errln("HashMap - UnicodeMap");
            temp = new JCG.SortedSet <T>(values2, JCG.Comparer <T> .Default);
            temp.ExceptWith(values1);
            TestFmwk.Errln(Show(temp));
            return(false);
        }
Esempio n. 20
0
        [Timeout(120000)] // ICU4N: This test can take awhile because of the slowness of adding items to SortedSet
        public void TestUnicodeMapGeneralCategory()
        {
            Logln("Setting General Category");
            UnicodeMap <String>           map1 = new UnicodeMap <string>();
            IDictionary <Integer, String> map2 = new JCG.Dictionary <Integer, String>();

            //Map<Integer, String> map3 = new TreeMap<Integer, String>();
            map1 = new UnicodeMap <String>();
            map2 = new JCG.SortedDictionary <Integer, String>();

            for (int cp = 0; cp <= SET_LIMIT; ++cp)
            {
                int enumValue = UChar.GetIntPropertyValue(cp, propEnum);
                //if (enumValue <= 0) continue; // for smaller set
                String value = UChar.GetPropertyValueName(propEnum, enumValue, NameChoice.Long);
                map1.Put(cp, value);
                map2[new Integer(cp)] = value;
            }
            checkNext(map1, map2, int.MaxValue);

            Logln("Comparing General Category");
            check(map1, map2, -1);
            Logln("Comparing Values");
            ISet <String> values1 = new JCG.SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1);
            ISet <String> values2 = new JCG.SortedSet <String>(map2.Values.Distinct(), StringComparer.Ordinal); // ICU4N NOTE: Added Distinct()

            if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2))
            {
                throw new ArgumentException("Halting");
            }
            Logln("Comparing Sets");
            foreach (string value in values1)
            {
                Logln(value == null ? "null" : value);
                UnicodeSet set1 = map1.KeySet(value);
                UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value);

                if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2))
                {
                    throw new ArgumentException("Halting");
                }
            }
        }
Esempio n. 21
0
        public virtual void TestIntersect()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string                   reg       = AutomatonTestUtil.RandomRegexp(Random);
                Automaton                automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                CompiledAutomaton        ca        = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false);
                TermsEnum                te        = MultiFields.GetTerms(reader, "field").Intersect(ca, null);
                Automaton                expected  = BasicOperations.Intersection(termsAutomaton, automaton);
                JCG.SortedSet <BytesRef> found     = new JCG.SortedSet <BytesRef>();
                while (te.Next() != null)
                {
                    found.Add(BytesRef.DeepCopyOf(te.Term));
                }

                Automaton actual = BasicAutomata.MakeStringUnion(found);
                Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
            }
        }
Esempio n. 22
0
        private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.ToString());

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

            // deduplicate
            JCG.SortedSet <BytesRef> dictionary = new JCG.SortedSet <BytesRef>();
            foreach (BytesRef v in values)
            {
                dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
            }

            /* values */
            long startPosition  = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            long currentAddress = 0;
            Dictionary <BytesRef, long> valueToAddress = new Dictionary <BytesRef, long>();

            foreach (BytesRef v in dictionary)
            {
                currentAddress    = data.Position - startPosition; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                valueToAddress[v] = currentAddress;
                WriteVInt16(data, v.Length);
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
            }

            /* ordinals */
            long totalBytes = data.Position - startPosition; // LUCENENET specific: Renamed from getFilePointer() to match FileStream

            index.WriteInt64(totalBytes);
            int maxDoc = state.SegmentInfo.DocCount;

            PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(currentAddress), PackedInt32s.DEFAULT);

            foreach (BytesRef v in values)
            {
                w.Add(valueToAddress[v ?? new BytesRef()]);
            }
            w.Finish();
        }
Esempio n. 23
0
        /// <summary>
        /// Helper function to create an SortedSet fulfilling the given specific parameters. The function will
        /// create an SortedSet using the Comparer constructor and then add values
        /// to it until it is full. It will begin by adding the desired number of matching,
        /// followed by random (deterministic) elements until the desired count is reached.
        /// </summary>
        protected IEnumerable <T> CreateSortedSet(IEnumerable <T> enumerableToMatchTo, int count, int numberOfMatchingElements)
        {
            JCG.SortedSet <T> set = new JCG.SortedSet <T>(GetIComparer());
            int seed = 528;

            JCG.List <T> match = null;

            // Add Matching elements
            if (enumerableToMatchTo != null)
            {
                match = enumerableToMatchTo.ToList();
                for (int i = 0; i < numberOfMatchingElements; i++)
                {
                    set.Add(match[i]);
                }
            }

            // Add elements to reach the desired count
            while (set.Count < count)
            {
                T toAdd = CreateT(seed++);
                while (set.Contains(toAdd) || (match != null && match.Contains(toAdd, GetIEqualityComparer()))) // Don't want any unexpectedly duplicate values
                {
                    toAdd = CreateT(seed++);
                }
                set.Add(toAdd);
            }

            // Validate that the Enumerable fits the guidelines as expected
            Debug.Assert(set.Count == count);
            if (match != null)
            {
                int actualMatchingCount = 0;
                foreach (T lookingFor in match)
                {
                    actualMatchingCount += set.Contains(lookingFor) ? 1 : 0;
                }
                Assert.Equal(numberOfMatchingElements, actualMatchingCount);
            }

            return(set);
        }
Esempio n. 24
0
        public virtual void TestMergeRandom()
        {
            PrefixCodedTerms[]   pb       = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)];
            JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>();

            for (int i = 0; i < pb.Length; i++)
            {
                JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
                int nterms = TestUtil.NextInt32(Random, 0, 10000);
                for (int j = 0; j < nterms; j++)
                {
                    Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4));
                    terms.Add(term);
                }
                superSet.UnionWith(terms);

                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
                foreach (Term @ref in terms)
                {
                    b.Add(@ref);
                }
                pb[i] = b.Finish();
            }

            List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >();

            for (int i = 0; i < pb.Length; i++)
            {
                subs.Add(pb[i].GetEnumerator());
            }

            IEnumerator <Term> expected = superSet.GetEnumerator();
            IEnumerator <Term> actual   = new MergedEnumerator <Term>(subs.ToArray());

            while (actual.MoveNext())
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, actual.Current);
            }
            Assert.IsFalse(expected.MoveNext());
        }
Esempio n. 25
0
        /// <summary>
        /// Returns a sorted array containing unique field numbers. </summary>
        private int[] FlushFieldNums()
        {
            JCG.SortedSet <int> fieldNums = new JCG.SortedSet <int>();
            foreach (DocData dd in pendingDocs)
            {
                foreach (FieldData fd in dd.fields)
                {
                    fieldNums.Add(fd.fieldNum);
                }
            }

            int numDistinctFields = fieldNums.Count;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numDistinctFields > 0);
            }
            int bitsRequired = PackedInt32s.BitsRequired(fieldNums.Max);
            int token        = (Math.Min(numDistinctFields - 1, 0x07) << 5) | bitsRequired;

            vectorsStream.WriteByte((byte)token);
            if (numDistinctFields - 1 >= 0x07)
            {
                vectorsStream.WriteVInt32(numDistinctFields - 1 - 0x07);
            }
            PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, fieldNums.Count, bitsRequired, 1);
            foreach (int fieldNum in fieldNums)
            {
                writer.Add(fieldNum);
            }
            writer.Finish();

            int[] fns = new int[fieldNums.Count];
            int   i   = 0;

            foreach (int key in fieldNums)
            {
                fns[i++] = key;
            }
            return(fns);
        }
Esempio n. 26
0
        public virtual void TestSort()
        {
            BytesRef @ref = new BytesRef();
            int      num  = AtLeast(2);

            for (int j = 0; j < num; j++)
            {
                // LUCENENET specific - to ensure sorting strings works the same in the SortedSet,
                // we need to use StringComparer.Ordinal, which compares strings the same
                // way they are done in Java.
                JCG.SortedSet <string> strings = new JCG.SortedSet <string>(StringComparer.Ordinal);
                for (int k = 0; k < 797; k++)
                {
                    string str;
                    do
                    {
                        str = TestUtil.RandomRealisticUnicodeString(Random, 1000);
                    } while (str.Length == 0);
                    @ref.CopyChars(str);
                    Hash.Add(@ref);
                    strings.Add(str);
                }
                // We use the UTF-16 comparer here, because we need to be able to
                // compare to native String.CompareTo() [UTF-16]:
#pragma warning disable 612, 618
                int[] sort = Hash.Sort(BytesRef.UTF8SortedAsUTF16Comparer);
#pragma warning restore 612, 618
                Assert.IsTrue(strings.Count < sort.Length);
                int      i       = 0;
                BytesRef scratch = new BytesRef();
                foreach (string @string in strings)
                {
                    @ref.CopyChars(@string);
                    Assert.AreEqual(@ref, Hash.Get(sort[i++], scratch));
                }
                Hash.Clear();
                Assert.AreEqual(0, Hash.Count);
                Hash.Reinit();
            }
        }
Esempio n. 27
0
        /// <summary>
        /// Applies the final rules to convert from a language-specific phonetic representation to a
        /// language-independent representation.
        /// </summary>
        /// <param name="phonemeBuilder">The current phonemes.</param>
        /// <param name="finalRules">The final rules to apply.</param>
        /// <returns>The resulting phonemes.</returns>
        private PhonemeBuilder ApplyFinalRules(PhonemeBuilder phonemeBuilder,
                                               IDictionary <string, IList <Rule> > finalRules)
        {
            if (finalRules == null)
            {
                throw new ArgumentNullException("finalRules can not be null");
            }
            if (finalRules.Count == 0)
            {
                return(phonemeBuilder);
            }

            ISet <Phoneme> phonemes = new JCG.SortedSet <Phoneme>(Phoneme.COMPARER);

            foreach (Phoneme phoneme in phonemeBuilder.Phonemes)
            {
                PhonemeBuilder subBuilder  = PhonemeBuilder.Empty(phoneme.Languages);
                string         phonemeText = phoneme.GetPhonemeText();

                for (int i = 0; i < phonemeText.Length;)
                {
                    RulesApplication rulesApplication =
                        new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).Invoke();
                    bool found = rulesApplication.IsFound;
                    subBuilder = rulesApplication.PhonemeBuilder;

                    if (!found)
                    {
                        // not found, appending as-is
                        subBuilder.Append(phonemeText.Substring(i, 1));
                    }

                    i = rulesApplication.I;
                }

                phonemes.UnionWith(subBuilder.Phonemes);
            }

            return(new PhonemeBuilder(phonemes.ToList()));
        }
Esempio n. 28
0
        /// <summary>
        /// Tests that a query matches the an expected set of documents using a
        /// HitCollector.
        /// <para>
        /// Note that when using the HitCollector API, documents will be collected
        /// if they "match" regardless of what their score is.
        /// </para>
        /// </summary>
        /// <param name="query"> The query to test. </param>
        /// <param name="searcher"> The searcher to test the query against. </param>
        /// <param name="defaultFieldName"> Used for displaying the query in assertion messages. </param>
        /// <param name="results"> A list of documentIds that must match the query. </param>
        /// <seealso cref="DoCheckHits(Random, Query, string, IndexSearcher, int[])"/>
        public static void CheckHitCollector(Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results)
        {
            QueryUtils.Check(random, query, searcher);

            Trace.TraceInformation("Checked");

            JCG.SortedSet <int> correct = new JCG.SortedSet <int>(results);
            JCG.SortedSet <int> actual  = new JCG.SortedSet <int>();
            ICollector          c       = new SetCollector(actual);

            searcher.Search(query, c);

            Assert.AreEqual(correct, actual, aggressive: false, () => "Simple: " + query.ToString(defaultFieldName));

            for (int i = -1; i < 2; i++)
            {
                actual.Clear();
                IndexSearcher s = QueryUtils.WrapUnderlyingReader(random, searcher, i);
                s.Search(query, c);
                Assert.AreEqual(correct, actual, aggressive: false, () => "Wrap Reader " + i + ": " + query.ToString(defaultFieldName));
            }
        }
        private void assertPOSToken(TokenStream ts, String term, params String[] tags)
        {
            ts.IncrementToken();
            assertEquals(term, ts.GetAttribute <ICharTermAttribute>().ToString());

            SCG.ISet <String> actual   = new JCG.SortedSet <String>(StringComparer.Ordinal);
            SCG.ISet <String> expected = new JCG.SortedSet <String>(StringComparer.Ordinal);
            foreach (StringBuilder b in ts.GetAttribute <IMorphosyntacticTagsAttribute>().Tags)
            {
                actual.add(b.toString());
            }
            foreach (String s in tags)
            {
                expected.add(s);
            }

            if (!expected.equals(actual))
            {
                Console.WriteLine("Expected:\n" + expected);
                Console.WriteLine("Actual:\n" + actual);
                assertEquals(expected, actual);
            }
        }
Esempio n. 30
0
        /// <summary>
        /// Tests that all documents up to maxDoc which are *not* in the
        /// expected result set, have an explanation which indicates that
        /// the document does not match
        /// </summary>
        public static void CheckNoMatchExplanations(Query q, string defaultFieldName, IndexSearcher searcher, int[] results)
        {
            string d = q.ToString(defaultFieldName);

            JCG.SortedSet <int?> ignore = new JCG.SortedSet <int?>();
            for (int i = 0; i < results.Length; i++)
            {
                ignore.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture));
            }

            int maxDoc = searcher.IndexReader.MaxDoc;

            for (int doc = 0; doc < maxDoc; doc++)
            {
                if (ignore.Contains(Convert.ToInt32(doc, CultureInfo.InvariantCulture)))
                {
                    continue;
                }

                Explanation exp = searcher.Explain(q, doc);
                Assert.IsNotNull(exp, "Explanation of [[" + d + "]] for #" + doc + " is null");
                Assert.IsFalse(exp.IsMatch, "Explanation of [[" + d + "]] for #" + doc + " doesn't indicate non-match: " + exp.ToString());
            }
        }