Ejemplo n.º 1
        public override FieldsProducer FieldsProducer(SegmentReadState state)
            string             fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, EXTENSION);
            ChecksumIndexInput @in      = state.Directory.OpenChecksumInput(fileName, IOContext.READ_ONCE);

            // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
            var fields = new JCG.SortedDictionary <string, TermsReader>(StringComparer.Ordinal);

                CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
                while (true)
                    int termCount = @in.ReadVInt32();
                    if (termCount == 0)

                    TermsReader termsReader = new TermsReader(state.FieldInfos, @in, termCount);
                    // System.out.println("load field=" + termsReader.field.name);
                    fields.Add(termsReader.field.Name, termsReader);

            return(new FieldsProducerAnonymousInnerClassHelper(fields));
Ejemplo n.º 2
            public override SeekStatus SeekCeil(BytesRef text)
                var newTerms = new JCG.SortedDictionary <BytesRef, SimpleTVPostings>(_terms.Comparer);

                foreach (var p in _terms.Where(p => p.Key.CompareTo(text) >= 0))
                    newTerms.Add(p.Key, p.Value);

                _iterator = newTerms.GetEnumerator();

                // LUCENENET specific: Since in .NET we don't have a HasNext() method, we need
                // to call Next() and check the result if it is null instead. Since we need
                // to check the result of Next() anyway for the Equals() comparison, this makes sense here.
                var next = Next();

                if (next == null)
                    return(next.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
Ejemplo n.º 3
        private void checkNext(UnicodeMap <String> map1, IDictionary <Integer, string> map2, int limit)
            Logln("Comparing nextRange");
            IDictionary <Integer, string> localMap = new JCG.SortedDictionary <Integer, string>();
            UnicodeMapIterator <String>   mi       = new UnicodeMapIterator <String>(map1);

            while (mi.NextRange())
                Logln(Utility.Hex(mi.Codepoint) + ".." + Utility.Hex(mi.CodepointEnd) + " => " + mi.Value);
                for (int i = mi.Codepoint; i <= mi.CodepointEnd; ++i)
                    //if (i >= limit) continue;
                    localMap[new Integer(i)] = mi.Value;
            checkMap(map2, localMap);

            Logln("Comparing next");
            localMap = new JCG.SortedDictionary <Integer, string>();
            //        String lastValue = null;
            while (mi.Next())
                //            if (!UnicodeMap.areEqual(lastValue, mi.value)) {
                //                // System.out.println("Change: " + Utility.hex(mi.codepoint) + " => " + mi.value);
                //                lastValue = mi.value;
                //            }
                //if (mi.codepoint >= limit) continue;
                localMap[new Integer(mi.Codepoint)] = mi.Value;
            checkMap(map2, localMap);
 internal SimpleTVTerms(bool hasOffsets, bool hasPositions, bool hasPayloads)
     _hasOffsetsRenamed   = hasOffsets;
     _hasPositionsRenamed = hasPositions;
     _hasPayloadsRenamed  = hasPayloads;
     terms = new JCG.SortedDictionary <BytesRef, SimpleTVPostings>();
Ejemplo n.º 5
        /// <summary>
        /// Check the hits for duplicates.
        /// </summary>
        private void CheckHits(ScoreDoc[] hits, string prefix)
            if (hits != null)
                IDictionary <int?, int?> idMap = new JCG.SortedDictionary <int?, int?>();
                for (int docnum = 0; docnum < hits.Length; ++docnum)
                    int?luceneId = null;

                    luceneId = Convert.ToInt32(hits[docnum].Doc);
                    if (idMap.TryGetValue(luceneId, out int?value))
                        StringBuilder message = new StringBuilder(prefix);
                        message.Append("Duplicate key for hit index = ");
                        message.Append(", previous index = ");
                        message.Append(", Lucene ID = ");
                        idMap[luceneId] = Convert.ToInt32(docnum);
Ejemplo n.º 6
        /// <summary>
        /// Lookup words in text.
        /// </summary>
        /// <param name="chars">Text.</param>
        /// <param name="off">Offset into text.</param>
        /// <param name="len">Length of text.</param>
        /// <returns>Array of {wordId, position, length}.</returns>
        public int[][] Lookup(char[] chars, int off, int len)
            // TODO: can we avoid this treemap/toIndexArray?
            IDictionary <int, int[]> result = new JCG.SortedDictionary <int, int[]>(); // index, [length, length...]
            bool found = false;                                                        // true if we found any results

            FST.BytesReader fstReader = fst.GetBytesReader();

            FST.Arc <Int64> arc = new FST.Arc <Int64>();
            int             end = off + len;

            for (int startOffset = off; startOffset < end; startOffset++)
                arc = fst.GetFirstArc(arc);
                int output    = 0;
                int remaining = end - startOffset;
                for (int i = 0; i < remaining; i++)
                    int ch = chars[startOffset + i];
                    if (fst.FindTargetArc(ch, arc, arc, i == 0, fstReader) is null)
                        break; // continue to next position
                    output += (int)arc.Output;
                    if (arc.IsFinal)
                        int finalOutput = output + (int)arc.NextFinalOutput;
                        result[startOffset - off] = segmentations[finalOutput];
                        found = true;

            return(found ? ToIndexArray(result) : EMPTY_RESULT);
Ejemplo n.º 7
        private IDictionary <string, long?> ReadFields(IndexInput @in)
            ChecksumIndexInput input = new BufferedChecksumIndexInput(@in);
            var scratch = new BytesRef(10);

            // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
            var fields = new JCG.SortedDictionary <string, long?>(StringComparer.Ordinal);

            while (true)
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(SimpleTextFieldsWriter.END))

                if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD))
                    var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length,
                                                            scratch.Length - SimpleTextFieldsWriter.FIELD.Length);
                    fields[fieldName] = input.GetFilePointer();
            public override SeekStatus SeekCeil(BytesRef text)
                var newTerms = new JCG.SortedDictionary <BytesRef, SimpleTVPostings>(_terms.Comparer);

                foreach (var p in _terms)
                    if (p.Key.CompareTo(text) >= 0)
                        newTerms.Add(p.Key, p.Value);

                _iterator = newTerms.GetEnumerator();

                // LUCENENET specific: Since in .NET we don't have a HasNext() method, we need
                // to call MoveNext(). Since we need
                // to check the result anyway for the Equals() comparison, this makes sense here.
                if (!MoveNext())
                    return(_current.Key.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
Ejemplo n.º 9
        internal UnicodeLocaleExtension(JCG.SortedSet <string> attributes, JCG.SortedDictionary <string, string> keywords)
            : this()
            if (attributes != null && attributes.Count > 0)
                _attributes = attributes;
            if (keywords != null && keywords.Count > 0)
                _keywords = keywords;

            if (_attributes.Count > 0 || _keywords.Count > 0)
                StringBuilder sb = new StringBuilder();
                foreach (string attribute in _attributes)
                foreach (var keyword in _keywords)
                    string key   = keyword.Key;
                    string value = keyword.Value;

                    if (value.Length > 0)
                m_value = sb.ToString(1, sb.Length - 1);   // skip leading '-'
        public void TestSolrASHKENAZI()
            IDictionary <String, String> args;

            // concat is true, ruleType is EXACT
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "ASHKENAZI");
            Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
            args.Put("ruleType", "EXACT");
            Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|angelo|anhelo|anxelo");
            Assert.AreEqual(Encode(args, true, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, true, "Angelo"), "angelo|anxelo");
            Assert.AreEqual(Encode(args, true, "1234"), "");

            // concat is false, ruleType is EXACT
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "ASHKENAZI");
            Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
            args.Put("ruleType", "EXACT");
            Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|angelo|anhelo|anxelo");
            Assert.AreEqual(Encode(args, false, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, false, "Angelo"), "angelo|anxelo");
            Assert.AreEqual(Encode(args, false, "1234"), "");

            // concat is true, ruleType is APPROX
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "ASHKENAZI");
            Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
            args.Put("ruleType", "APPROX");
            Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
            Assert.AreEqual(Encode(args, true, "D'Angelo"), "dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, true, "Angelo"), "AnSelO|AngElO|AngzelO|AnkselO");
            Assert.AreEqual(Encode(args, true, "1234"), "");

            // concat is false, ruleType is APPROX
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "ASHKENAZI");
            Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
            args.Put("ruleType", "APPROX");
            Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
            Assert.AreEqual(Encode(args, false, "D'Angelo"), "dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, false, "Angelo"), "AnSelO|AngElO|AngzelO|AnkselO");
            Assert.AreEqual(Encode(args, false, "1234"), "");
        public void TestSolrSEPHARDIC()
            IDictionary <String, String> args;

            // concat is true, ruleType is EXACT
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "SEPHARDIC");
            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            args.Put("ruleType", "EXACT");
            Assert.AreEqual(Encode(args, true, "Angelo"), "anZelo|andZelo|anxelo");
            Assert.AreEqual(Encode(args, true, "D'Angelo"), "anZelo|andZelo|anxelo");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|anxelo");
            Assert.AreEqual(Encode(args, true, "1234"), "");

            // concat is false, ruleType is EXACT
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "SEPHARDIC");
            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            args.Put("ruleType", "EXACT");
            Assert.AreEqual(Encode(args, false, "Angelo"), "anZelo|andZelo|anxelo");
            Assert.AreEqual(Encode(args, false, "D'Angelo"), "danZelo|dandZelo|danxelo");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|anxelo");
            Assert.AreEqual(Encode(args, false, "1234"), "");

            // concat is true, ruleType is APPROX
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "SEPHARDIC");
            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            args.Put("ruleType", "APPROX");
            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            Assert.AreEqual(Encode(args, true, "D'Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            Assert.AreEqual(Encode(args, true, "1234"), "");

            // concat is false, ruleType is APPROX
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "SEPHARDIC");
            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            args.Put("ruleType", "APPROX");
            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            Assert.AreEqual(Encode(args, false, "D'Angelo"), "danhila|danhilu|danzila|danzilu|nhila|nhilu|nzila|nzilu");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
            Assert.AreEqual(Encode(args, false, "1234"), "");
Ejemplo n.º 12
         * @param me
         * @param stayWithMe
        private void checkEquals(UnicodeMap <Integer> me, JCG.SortedDictionary <String, Integer> stayWithMe)
            foreach (var e in me.EntrySet())
            ISet <KeyValuePair <String, Integer> > entrySet = new JCG.HashSet <KeyValuePair <string, Integer> >(stayWithMe);

            if (!entrySet.SetEquals(temp))
                assertEquals("are in parallel", entrySet, temp);
                // we failed. Reset and start again
            // ICU4N: looping through pairs instead of doing explicity table lookups is much faster
            foreach (var pair in stayWithMe)
                assertEquals("containsKey", stayWithMe.ContainsKey(pair.Key), me.ContainsKey(pair.Key));
                Integer value = pair.Value;
                assertEquals("get", value, me.Get(pair.Key));
                assertEquals("containsValue", stayWithMe.ContainsValue(value), me.ContainsValue(value));
                int cp = UnicodeSet.GetSingleCodePoint(pair.Key);
                if (cp != int.MaxValue)
                    assertEquals("get", value, me.Get(cp));
            // ICU4N TODO: complete implementation
            //ISet<String> nonCodePointStrings = stayWithMe.tailMap("").keySet();
            //if (nonCodePointStrings.Count == 0) nonCodePointStrings = null; // for parallel api
            //assertEquals("getNonRangeStrings", nonCodePointStrings, me.GetNonRangeStrings());

            ISet <Integer> values   = new JCG.SortedSet <Integer>(stayWithMe.Values);
            ISet <Integer> myValues = new JCG.SortedSet <Integer>(me.Values());

            assertEquals("values", myValues, values);

            foreach (String key in stayWithMe.Keys)
                assertEquals("containsKey", stayWithMe.ContainsKey(key), me.ContainsKey(key));
Ejemplo n.º 13
        // make sure the documents returned by the search match the expected list
        private void MatchHits(IndexSearcher searcher, Sort sort)
            // make a query without sorting first
            ScoreDoc[] hitsByRank = searcher.Search(Query, null, int.MaxValue).ScoreDocs;
            CheckHits(hitsByRank, "Sort by rank: "); // check for duplicates
            IDictionary <int?, int?> resultMap = new JCG.SortedDictionary <int?, int?>();

            // store hits in TreeMap - TreeMap does not allow duplicates; existing
            // entries are silently overwritten
            for (int hitid = 0; hitid < hitsByRank.Length; ++hitid)
                resultMap[Convert.ToInt32(hitsByRank[hitid].Doc)] = Convert.ToInt32(hitid); // Value: Hits-Objekt Index -  Key: Lucene
                // Document ID

            // now make a query using the sort criteria
            ScoreDoc[] resultSort = searcher.Search(Query, null, int.MaxValue, sort).ScoreDocs;
            CheckHits(resultSort, "Sort by custom criteria: "); // check for duplicates

            // besides the sorting both sets of hits must be identical
            for (int hitid = 0; hitid < resultSort.Length; ++hitid)
                int?idHitDate = Convert.ToInt32(resultSort[hitid].Doc);  // document ID
                // from sorted
                // search
                if (!resultMap.ContainsKey(idHitDate))
                    Log("ID " + idHitDate + " not found. Possibliy a duplicate.");
                Assert.IsTrue(resultMap.ContainsKey(idHitDate)); // same ID must be in the
                // Map from the rank-sorted
                // search
                // every hit must appear once in both result sets --> remove it from the
                // Map.
                // At the end the Map must be empty!
            if (resultMap.Count == 0)
                // log("All hits matched");
                Log("Couldn't match " + resultMap.Count + " hits.");
            Assert.AreEqual(resultMap.Count, 0);
        public void TestSolrGENERIC()
            IDictionary <String, String> args;

            // concat is true, ruleType is EXACT
            args = new JCG.SortedDictionary <String, String>();
            args.Put("nameType", "GENERIC");
            Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
            args.Put("ruleType", "EXACT");
            Assert.AreEqual(Encode(args, true, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
            Assert.AreEqual(Encode(args, true, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|angelo|anxelo");
            Assert.AreEqual(Encode(args, true, "1234"), "");

            // concat is false, ruleType is EXACT
            args = new JCG.SortedDictionary <String, String>();
            Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
            args.Put("ruleType", "EXACT");
            Assert.AreEqual(Encode(args, false, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
            Assert.AreEqual(Encode(args, false, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|angelo|anxelo");
            Assert.AreEqual(Encode(args, false, "1234"), "");

            // concat is true, ruleType is APPROX
            args = new JCG.SortedDictionary <String, String>();
            Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
            args.Put("ruleType", "APPROX");
            Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
            Assert.AreEqual(Encode(args, true, "D'Angelo"), "(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, true, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
            Assert.AreEqual(Encode(args, true, "1234"), "");

            // concat is false, ruleType is APPROX
            args = new JCG.SortedDictionary <String, String>();
            Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
            args.Put("ruleType", "APPROX");
            Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
            Assert.AreEqual(Encode(args, false, "D'Angelo"), "(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)");
            args.Put("languageSet", "italian,greek,spanish");
            Assert.AreEqual(Encode(args, false, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
            Assert.AreEqual(Encode(args, false, "1234"), "");
Ejemplo n.º 15
        [Timeout(120000)] // ICU4N: This test can take awhile because of the slowness of adding items to SortedSet
        public void TestUnicodeMapGeneralCategory()
            Logln("Setting General Category");
            UnicodeMap <String>           map1 = new UnicodeMap <string>();
            IDictionary <Integer, String> map2 = new JCG.Dictionary <Integer, String>();

            //Map<Integer, String> map3 = new TreeMap<Integer, String>();
            map1 = new UnicodeMap <String>();
            map2 = new JCG.SortedDictionary <Integer, String>();

            for (int cp = 0; cp <= SET_LIMIT; ++cp)
                int enumValue = UChar.GetIntPropertyValue(cp, propEnum);
                //if (enumValue <= 0) continue; // for smaller set
                String value = UChar.GetPropertyValueName(propEnum, enumValue, NameChoice.Long);
                map1.Put(cp, value);
                map2[new Integer(cp)] = value;
            checkNext(map1, map2, int.MaxValue);

            Logln("Comparing General Category");
            check(map1, map2, -1);
            Logln("Comparing Values");
            ISet <String> values1 = new JCG.SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1);
            ISet <String> values2 = new JCG.SortedSet <String>(map2.Values.Distinct(), StringComparer.Ordinal); // ICU4N NOTE: Added Distinct()

            if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2))
                throw new ArgumentException("Halting");
            Logln("Comparing Sets");
            foreach (string value in values1)
                Logln(value == null ? "null" : value);
                UnicodeSet set1 = map1.KeySet(value);
                UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value);

                if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2))
                    throw new ArgumentException("Halting");
Ejemplo n.º 16
        public SimpleTextFieldsReader(SegmentReadState state)
            this.maxDoc = state.SegmentInfo.DocCount;
            fieldInfos  = state.FieldInfos;
            input       = state.Directory.OpenInput(SimpleTextPostingsFormat.GetPostingsFileName(state.SegmentInfo.Name, state.SegmentSuffix), state.Context);
            bool success = false;

                fields  = ReadFields((IndexInput)input.Clone());
                success = true;
                if (!success)
Ejemplo n.º 17
        public virtual Query Build(IQueryNode queryNode)
            MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode;

            MultiPhraseQuery phraseQuery = new MultiPhraseQuery();

            IList <IQueryNode> children = phraseNode.GetChildren();

            if (children != null)
                IDictionary <int?, List <Term> > positionTermMap = new JCG.SortedDictionary <int?, List <Term> >();

                foreach (IQueryNode child in children)
                    FieldQueryNode termNode  = (FieldQueryNode)child;
                    TermQuery      termQuery = (TermQuery)termNode

                    List <Term> termList;
                    if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out termList) || termList == null)
                        termList = new List <Term>();
                        positionTermMap[termNode.PositionIncrement] = termList;


                foreach (int positionIncrement in positionTermMap.Keys)
                    List <Term> termList = positionTermMap[positionIncrement];

                    phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/),

Ejemplo n.º 18
        // parses a list of MappingCharFilter style rules into a custom byte[] type table
        private byte[] ParseTypes(IList <string> rules)
            IDictionary <char, byte> typeMap = new JCG.SortedDictionary <char, byte>();

            foreach (string rule in rules)
                Match m = typePattern.Match(rule);
                if (!m.Success)
                    throw new ArgumentException("Invalid Mapping Rule : [" + rule + "]");
                string lhs = ParseString(m.Groups[1].Value.Trim());
                byte   rhs = ParseType(m.Groups[2].Value.Trim());
                if (lhs.Length != 1)
                    throw new ArgumentException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
                if (rhs == WordDelimiterFilter.NOT_SET)
                    throw new ArgumentException("Invalid Mapping Rule : [" + rule + "]. Illegal type.");
                typeMap[lhs[0]] = rhs;

            // ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
            byte[] types = new byte[Math.Max(typeMap.Keys.LastOrDefault() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
            for (int i = 0; i < types.Length; i++)
                types[i] = WordDelimiterIterator.GetType(i);
            foreach (var mapping in typeMap)
                types[mapping.Key] = mapping.Value;
        public override Fields Get(int doc)
            // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
            var fields = new JCG.SortedDictionary <string, SimpleTVTerms>(StringComparer.Ordinal);

            if (Debugging.AssertsEnabled)
                Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.NUMFIELDS));
            var numFields = ParseInt32At(SimpleTextTermVectorsWriter.NUMFIELDS.Length);

            if (numFields == 0)
                return(null); // no vectors for this doc
            for (var i = 0; i < numFields; i++)
                if (Debugging.AssertsEnabled)
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELD));
                // skip fieldNumber:

                if (Debugging.AssertsEnabled)
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDNAME));
                var fieldName = ReadString(SimpleTextTermVectorsWriter.FIELDNAME.Length, _scratch);

                if (Debugging.AssertsEnabled)
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPOSITIONS));
                var positions = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPOSITIONS.Length, _scratch), CultureInfo.InvariantCulture);

                if (Debugging.AssertsEnabled)
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDOFFSETS));
                var offsets = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDOFFSETS.Length, _scratch), CultureInfo.InvariantCulture);

                if (Debugging.AssertsEnabled)
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPAYLOADS));
                var payloads = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPAYLOADS.Length, _scratch), CultureInfo.InvariantCulture);

                if (Debugging.AssertsEnabled)
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDTERMCOUNT));
                var termCount = ParseInt32At(SimpleTextTermVectorsWriter.FIELDTERMCOUNT.Length);

                var terms = new SimpleTVTerms(offsets, positions, payloads);
                fields.Add(fieldName, terms);

                for (var j = 0; j < termCount; j++)
                    if (Debugging.AssertsEnabled)
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMTEXT));
                    var term       = new BytesRef();
                    var termLength = _scratch.Length - SimpleTextTermVectorsWriter.TERMTEXT.Length;
                    term.Length = termLength;
                    Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.TERMTEXT.Length, term.Bytes, term.Offset, termLength);

                    var postings = new SimpleTVPostings();
                    terms.terms.Add(term, postings);

                    if (Debugging.AssertsEnabled)
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMFREQ));
                    postings.freq = ParseInt32At(SimpleTextTermVectorsWriter.TERMFREQ.Length);

                    if (!positions && !offsets)

                    if (positions)
                        postings.positions = new int[postings.freq];
                        if (payloads)
                            postings.payloads = new BytesRef[postings.freq];

                    if (offsets)
                        postings.startOffsets = new int[postings.freq];
                        postings.endOffsets   = new int[postings.freq];

                    for (var k = 0; k < postings.freq; k++)
                        if (positions)
                            if (Debugging.AssertsEnabled)
                                Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.POSITION));
                            postings.positions[k] = ParseInt32At(SimpleTextTermVectorsWriter.POSITION.Length);
                            if (payloads)
                                if (Debugging.AssertsEnabled)
                                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.PAYLOAD));
                                if (_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length == 0)
                                    postings.payloads[k] = null;
                                    var payloadBytes = new byte[_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length];
                                    Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.PAYLOAD.Length, payloadBytes, 0,
                                    postings.payloads[k] = new BytesRef(payloadBytes);

                        if (!offsets)

                        if (Debugging.AssertsEnabled)
                            Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.STARTOFFSET));
                        postings.startOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.STARTOFFSET.Length);

                        if (Debugging.AssertsEnabled)
                            Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.ENDOFFSET));
                        postings.endOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.ENDOFFSET.Length);
            return(new SimpleTVFields(this, fields));
Ejemplo n.º 20
        /// <summary>
        /// Internal constructor, only used by <see cref="InternalLocaleBuilder"/>.
        /// </summary>
        internal LocaleExtensions(IDictionary <CaseInsensitiveChar, string> extensions,
                                  ISet <CaseInsensitiveString> uattributes, IDictionary <CaseInsensitiveString, string> ukeywords)
            bool hasExtension   = (extensions != null && extensions.Count > 0);
            bool hasUAttributes = (uattributes != null && uattributes.Count > 0);
            bool hasUKeywords   = (ukeywords != null && ukeywords.Count > 0);

            if (!hasExtension && !hasUAttributes && !hasUKeywords)
                _map = EmptyMap;
                _id  = "";

            // Build extension map
            _map = new JCG.SortedDictionary <char, Extension>();
            if (hasExtension)
                foreach (var ext in extensions)
                    char   key   = AsciiUtil.ToLower(ext.Key.Value);
                    string value = ext.Value;

                    if (LanguageTag.IsPrivateusePrefixChar(key))
                        // we need to exclude special variant in privuateuse, e.g. "x-abc-lvariant-DEF"
                        value = InternalLocaleBuilder.RemovePrivateuseVariant(value);
                        if (value == null)

                    Extension e = new Extension(key, AsciiUtil.ToLower(value));
                    _map[key] = e;

            if (hasUAttributes || hasUKeywords)
                JCG.SortedSet <string> uaset = null;
                JCG.SortedDictionary <string, string> ukmap = null;

                if (hasUAttributes)
                    uaset = new JCG.SortedSet <string>(StringComparer.Ordinal);
                    foreach (CaseInsensitiveString cis in uattributes)

                if (hasUKeywords)
                    ukmap = new JCG.SortedDictionary <string, string>(StringComparer.Ordinal);
                    foreach (var kwd in ukeywords)
                        string key  = AsciiUtil.ToLower(kwd.Key.Value);
                        string type = AsciiUtil.ToLower(kwd.Value);
                        ukmap[key] = type;

                UnicodeLocaleExtension ule = new UnicodeLocaleExtension(uaset, ukmap);
                _map[UnicodeLocaleExtension.Singleton] = ule;

            if (_map.Count == 0)
                // this could happen when only privuateuse with special variant
                _map = EmptyMap;
                _id  = "";
                _id = ToID(_map);
Ejemplo n.º 21
  * @param rand
  * @param nextInt
  * @param test
  * @return
 private JCG.SortedDictionary <String, Integer> fillRandomMap(Random rand, int max, JCG.SortedDictionary <String, Integer> test)
     max = rand.Next(max);
     for (int i = 0; i < max; ++i)
         test[GetRandomKey(rand)] = new Integer(rand.Next(50) + 50);
Ejemplo n.º 22
        public void TestAMonkey()
            JCG.SortedDictionary <String, Integer> stayWithMe = new JCG.SortedDictionary <String, Integer>(OneFirstComparator);

            UnicodeMap <Integer> me = new UnicodeMap <Integer>().PutAll(stayWithMe);

            // check one special case, removal near end
            me.PutAll(0x10FFFE, 0x10FFFF, 666);

            int iterations = 100000;

            JCG.SortedDictionary <String, Integer> test = new JCG.SortedDictionary <string, Integer>(StringComparer.Ordinal);

            Random  rand = new Random(0);
            String  other;
            Integer value;

            // try modifications
            for (int i = 0; i < iterations; ++i)
                switch (i == 0 ? 0 : rand.Next(20))
                case 0:

                case 1:
                    fillRandomMap(rand, 5, test);
                    Logln("putAll\t" + test);

                case 2:
                case 3:
                case 4:
                case 5:
                case 6:
                case 7:
                case 8:
                    other = GetRandomKey(rand);
                    //                if (other.equals("\uDBFF\uDFFF") && me.containsKey(0x10FFFF) && me.get(0x10FFFF).equals(me.get(0x10FFFE))) {
                    //                    System.out.println("Remove\t" + other + "\n" + me);
                    //                }
                    Logln("remove\t" + other);
                    catch (ArgumentException e)
                        Errln("remove\t" + other + "\tfailed: " + e.ToString() + "\n" + me);

                    other = GetRandomKey(rand);
                    value = new Integer(rand.Next(50) + 50);
                    Logln("put\t" + other + " = " + value);
                    stayWithMe[other] = value;
                    me.Put(other, value);
                checkEquals(me, stayWithMe);
 internal SimpleTVTermsEnum(JCG.SortedDictionary <BytesRef, SimpleTVPostings> terms)
     _terms    = terms;
     _iterator = terms.GetEnumerator();
Ejemplo n.º 24
        public void TestRandom()
            int numWords = AtLeast(1000);

            IDictionary <string, long> slowCompletor = new JCG.SortedDictionary <string, long>(StringComparer.Ordinal);
            ISet <string> allPrefixes = new JCG.SortedSet <string>(StringComparer.Ordinal);

            Input[] keys = new Input[numWords];

            for (int i = 0; i < numWords; i++)
                String s;
                while (true)
                    // TODO: would be nice to fix this slowCompletor/comparer to
                    // use full range, but we might lose some coverage too...
                    s = TestUtil.RandomSimpleString(LuceneTestCase.Random);
                    if (!slowCompletor.ContainsKey(s))

                for (int j = 1; j < s.Length; j++)
                    allPrefixes.add(s.Substring(0, j));
                // we can probably do Integer.MAX_VALUE here, but why worry.
                int weight = LuceneTestCase.Random.nextInt(1 << 24);
                slowCompletor.Put(s, (long)weight);
                keys[i] = new Input(s, weight);

            WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);

            suggester.Build(new InputArrayEnumerator(keys));

            assertEquals(numWords, suggester.Count);
            Random random = new Random(Random.Next());

            foreach (String prefix in allPrefixes)
                int topN = TestUtil.NextInt32(random, 1, 10);
                IList <Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence(prefix, random).ToString(), false, topN);

                // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
                JCG.List <Lookup.LookupResult> matches = new JCG.List <Lookup.LookupResult>();

                // TODO: could be faster... but its slowCompletor for a reason
                foreach (KeyValuePair <string, long> e in slowCompletor)
                    if (e.Key.StartsWith(prefix, StringComparison.Ordinal))
                        matches.Add(new Lookup.LookupResult(e.Key, e.Value));

                assertTrue(matches.size() > 0);
                matches.Sort(new TestRandomComparer());

                if (matches.size() > topN)
                    //matches.SubList(topN, matches.size()).clear();
                    matches.RemoveRange(topN, matches.size() - topN); // LUCENENET: Converted end index to length

                assertEquals(matches.size(), r.size());

                for (int hit = 0; hit < r.size(); hit++)
                    //System.out.println("  check hit " + hit);
                    assertEquals(matches[hit].Key.toString(), r[hit].Key.toString());
                    assertEquals(matches[hit].Value, r[hit].Value, 0f);
Ejemplo n.º 25
        public void TestTerms()
            Random random = Random;
            int    num    = AtLeast(10000);

#pragma warning disable 612, 618
            IComparer <BytesRef> comparer = random.nextBoolean() ? BytesRef.UTF8SortedAsUnicodeComparer : BytesRef.UTF8SortedAsUTF16Comparer;
#pragma warning restore 612, 618
            IDictionary <BytesRef, KeyValuePair <long, BytesRef> > sorted = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, BytesRef> >(comparer);
            IDictionary <BytesRef, long> sortedWithoutPayload             = new JCG.SortedDictionary <BytesRef, long>(comparer);
            IDictionary <BytesRef, KeyValuePair <long, ISet <BytesRef> > > sortedWithContext = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, ISet <BytesRef> > >(comparer);
            IDictionary <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > > sortedWithPayloadAndContext = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > >(comparer);
            Input[]         unsorted = new Input[num];
            Input[]         unsortedWithoutPayload        = new Input[num];
            Input[]         unsortedWithContexts          = new Input[num];
            Input[]         unsortedWithPayloadAndContext = new Input[num];
            ISet <BytesRef> ctxs;
            for (int i = 0; i < num; i++)
                BytesRef key2;
                BytesRef payload;
                ctxs = new JCG.HashSet <BytesRef>();
                    key2    = new BytesRef(TestUtil.RandomUnicodeString(random));
                    payload = new BytesRef(TestUtil.RandomUnicodeString(random));
                    for (int j = 0; j < AtLeast(2); j++)
                        ctxs.add(new BytesRef(TestUtil.RandomUnicodeString(random)));
                } while (sorted.ContainsKey(key2));
                long value = random.Next();
                sortedWithoutPayload.Put(key2, value);
                sorted.Put(key2, new KeyValuePair <long, BytesRef>(value, payload));
                sortedWithContext.Put(key2, new KeyValuePair <long, ISet <BytesRef> >(value, ctxs));
                sortedWithPayloadAndContext.Put(key2, new KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > >(value, new KeyValuePair <BytesRef, ISet <BytesRef> >(payload, ctxs)));
                unsorted[i] = new Input(key2, value, payload);
                unsortedWithoutPayload[i]        = new Input(key2, value);
                unsortedWithContexts[i]          = new Input(key2, value, ctxs);
                unsortedWithPayloadAndContext[i] = new Input(key2, value, payload, ctxs);

            // test the sorted iterator wrapper with payloads
            IInputIterator wrapper = new SortedInputIterator(new InputArrayIterator(unsorted), comparer);
            IEnumerator <KeyValuePair <BytesRef, KeyValuePair <long, BytesRef> > > expected = sorted.GetEnumerator();
            while (expected.MoveNext())
                KeyValuePair <BytesRef, KeyValuePair <long, BytesRef> > entry = expected.Current;

                assertEquals(entry.Key, wrapper.Next());
                assertEquals(Convert.ToInt64(entry.Value.Key), wrapper.Weight);
                assertEquals(entry.Value.Value, wrapper.Payload);

            // test the sorted iterator wrapper with contexts
            wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithContexts), comparer);
            IEnumerator <KeyValuePair <BytesRef, KeyValuePair <long, ISet <BytesRef> > > > actualEntries = sortedWithContext.GetEnumerator();
            while (actualEntries.MoveNext())
                KeyValuePair <BytesRef, KeyValuePair <long, ISet <BytesRef> > > entry = actualEntries.Current;
                assertEquals(entry.Key, wrapper.Next());
                assertEquals(Convert.ToInt64(entry.Value.Key), wrapper.Weight);
                ISet <BytesRef> actualCtxs = entry.Value.Value;
                assertEquals(actualCtxs, wrapper.Contexts);

            // test the sorted iterator wrapper with contexts and payload
            wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithPayloadAndContext), comparer);
            IEnumerator <KeyValuePair <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > > > expectedPayloadContextEntries = sortedWithPayloadAndContext.GetEnumerator();
            while (expectedPayloadContextEntries.MoveNext())
                KeyValuePair <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > > entry = expectedPayloadContextEntries.Current;
                assertEquals(entry.Key, wrapper.Next());
                assertEquals(Convert.ToInt64(entry.Value.Key), wrapper.Weight);
                ISet <BytesRef> actualCtxs = entry.Value.Value.Value;
                assertEquals(actualCtxs, wrapper.Contexts);
                BytesRef actualPayload = entry.Value.Value.Key;
                assertEquals(actualPayload, wrapper.Payload);

            // test the unsorted iterator wrapper with payloads
            wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted));
            IDictionary <BytesRef, KeyValuePair <long, BytesRef> > actual = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, BytesRef> >();
            BytesRef key;
            while ((key = wrapper.Next()) != null)
                long     value   = wrapper.Weight;
                BytesRef payload = wrapper.Payload;
                actual.Put(BytesRef.DeepCopyOf(key), new KeyValuePair <long, BytesRef>(value, BytesRef.DeepCopyOf(payload)));
            assertEquals(sorted, actual);

            // test the sorted iterator wrapper without payloads
            IInputIterator wrapperWithoutPayload = new SortedInputIterator(new InputArrayIterator(unsortedWithoutPayload), comparer);
            IEnumerator <KeyValuePair <BytesRef, long> > expectedWithoutPayload = sortedWithoutPayload.GetEnumerator();
            while (expectedWithoutPayload.MoveNext())
                KeyValuePair <BytesRef, long> entry = expectedWithoutPayload.Current;

                assertEquals(entry.Key, wrapperWithoutPayload.Next());
                assertEquals(Convert.ToInt64(entry.Value), wrapperWithoutPayload.Weight);

            // test the unsorted iterator wrapper without payloads
            wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload));
            IDictionary <BytesRef, long> actualWithoutPayload = new JCG.SortedDictionary <BytesRef, long>();
            while ((key = wrapperWithoutPayload.Next()) != null)
                long value = wrapperWithoutPayload.Weight;
                actualWithoutPayload.Put(BytesRef.DeepCopyOf(key), value);
            assertEquals(sortedWithoutPayload, actualWithoutPayload);