public override FieldsProducer FieldsProducer(SegmentReadState state) { string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, EXTENSION); ChecksumIndexInput @in = state.Directory.OpenChecksumInput(fileName, IOContext.READ_ONCE); // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java var fields = new JCG.SortedDictionary <string, TermsReader>(StringComparer.Ordinal); try { CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_CURRENT); while (true) { int termCount = @in.ReadVInt32(); if (termCount == 0) { break; } TermsReader termsReader = new TermsReader(state.FieldInfos, @in, termCount); // System.out.println("load field=" + termsReader.field.name); fields.Add(termsReader.field.Name, termsReader); } CodecUtil.CheckFooter(@in); } finally { @in.Dispose(); } return(new FieldsProducerAnonymousInnerClassHelper(fields)); }
public override SeekStatus SeekCeil(BytesRef text) { var newTerms = new JCG.SortedDictionary <BytesRef, SimpleTVPostings>(_terms.Comparer); foreach (var p in _terms.Where(p => p.Key.CompareTo(text) >= 0)) { newTerms.Add(p.Key, p.Value); } _iterator = newTerms.GetEnumerator(); // LUCENENET specific: Since in .NET we don't have a HasNext() method, we need // to call Next() and check the result if it is null instead. Since we need // to check the result of Next() anyway for the Equals() comparison, this makes sense here. var next = Next(); if (next == null) { return(SeekStatus.END); } else { return(next.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND); } }
private void checkNext(UnicodeMap <String> map1, IDictionary <Integer, string> map2, int limit) { Logln("Comparing nextRange"); IDictionary <Integer, string> localMap = new JCG.SortedDictionary <Integer, string>(); UnicodeMapIterator <String> mi = new UnicodeMapIterator <String>(map1); while (mi.NextRange()) { Logln(Utility.Hex(mi.Codepoint) + ".." + Utility.Hex(mi.CodepointEnd) + " => " + mi.Value); for (int i = mi.Codepoint; i <= mi.CodepointEnd; ++i) { //if (i >= limit) continue; localMap[new Integer(i)] = mi.Value; } } checkMap(map2, localMap); Logln("Comparing next"); mi.Reset(); localMap = new JCG.SortedDictionary <Integer, string>(); // String lastValue = null; while (mi.Next()) { // if (!UnicodeMap.areEqual(lastValue, mi.value)) { // // System.out.println("Change: " + Utility.hex(mi.codepoint) + " => " + mi.value); // lastValue = mi.value; // } //if (mi.codepoint >= limit) continue; localMap[new Integer(mi.Codepoint)] = mi.Value; } checkMap(map2, localMap); }
internal SimpleTVTerms(bool hasOffsets, bool hasPositions, bool hasPayloads) { _hasOffsetsRenamed = hasOffsets; _hasPositionsRenamed = hasPositions; _hasPayloadsRenamed = hasPayloads; terms = new JCG.SortedDictionary <BytesRef, SimpleTVPostings>(); }
/// <summary> /// Check the hits for duplicates. /// </summary> private void CheckHits(ScoreDoc[] hits, string prefix) { if (hits != null) { IDictionary <int?, int?> idMap = new JCG.SortedDictionary <int?, int?>(); for (int docnum = 0; docnum < hits.Length; ++docnum) { int?luceneId = null; luceneId = Convert.ToInt32(hits[docnum].Doc); if (idMap.TryGetValue(luceneId, out int?value)) { StringBuilder message = new StringBuilder(prefix); message.Append("Duplicate key for hit index = "); message.Append(docnum); message.Append(", previous index = "); message.Append(value.ToString()); message.Append(", Lucene ID = "); message.Append(luceneId); Log(message.ToString()); } else { idMap[luceneId] = Convert.ToInt32(docnum); } } } }
/// <summary> /// Lookup words in text. /// </summary> /// <param name="chars">Text.</param> /// <param name="off">Offset into text.</param> /// <param name="len">Length of text.</param> /// <returns>Array of {wordId, position, length}.</returns> public int[][] Lookup(char[] chars, int off, int len) { // TODO: can we avoid this treemap/toIndexArray? IDictionary <int, int[]> result = new JCG.SortedDictionary <int, int[]>(); // index, [length, length...] bool found = false; // true if we found any results FST.BytesReader fstReader = fst.GetBytesReader(); FST.Arc <Int64> arc = new FST.Arc <Int64>(); int end = off + len; for (int startOffset = off; startOffset < end; startOffset++) { arc = fst.GetFirstArc(arc); int output = 0; int remaining = end - startOffset; for (int i = 0; i < remaining; i++) { int ch = chars[startOffset + i]; if (fst.FindTargetArc(ch, arc, arc, i == 0, fstReader) is null) { break; // continue to next position } output += (int)arc.Output; if (arc.IsFinal) { int finalOutput = output + (int)arc.NextFinalOutput; result[startOffset - off] = segmentations[finalOutput]; found = true; } } } return(found ? ToIndexArray(result) : EMPTY_RESULT); }
private IDictionary <string, long?> ReadFields(IndexInput @in) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); var scratch = new BytesRef(10); // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java var fields = new JCG.SortedDictionary <string, long?>(StringComparer.Ordinal); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(SimpleTextFieldsWriter.END)) { SimpleTextUtil.CheckFooter(input); return(fields); } if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD)) { var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length, scratch.Length - SimpleTextFieldsWriter.FIELD.Length); fields[fieldName] = input.GetFilePointer(); } } }
public override SeekStatus SeekCeil(BytesRef text) { var newTerms = new JCG.SortedDictionary <BytesRef, SimpleTVPostings>(_terms.Comparer); foreach (var p in _terms) { if (p.Key.CompareTo(text) >= 0) { newTerms.Add(p.Key, p.Value); } } _iterator = newTerms.GetEnumerator(); // LUCENENET specific: Since in .NET we don't have a HasNext() method, we need // to call MoveNext(). Since we need // to check the result anyway for the Equals() comparison, this makes sense here. if (!MoveNext()) { return(SeekStatus.END); } else { return(_current.Key.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND); } }
internal UnicodeLocaleExtension(JCG.SortedSet <string> attributes, JCG.SortedDictionary <string, string> keywords) : this() { if (attributes != null && attributes.Count > 0) { _attributes = attributes; } if (keywords != null && keywords.Count > 0) { _keywords = keywords; } if (_attributes.Count > 0 || _keywords.Count > 0) { StringBuilder sb = new StringBuilder(); foreach (string attribute in _attributes) { sb.Append(LanguageTag.Separator).Append(attribute); } foreach (var keyword in _keywords) { string key = keyword.Key; string value = keyword.Value; sb.Append(LanguageTag.Separator).Append(key); if (value.Length > 0) { sb.Append(LanguageTag.Separator).Append(value); } } m_value = sb.ToString(1, sb.Length - 1); // skip leading '-' } }
public void TestSolrASHKENAZI() { IDictionary <String, String> args; // concat is true, ruleType is EXACT args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "ASHKENAZI"); Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO"); args.Put("ruleType", "EXACT"); Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|angelo|anhelo|anxelo"); Assert.AreEqual(Encode(args, true, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, true, "Angelo"), "angelo|anxelo"); Assert.AreEqual(Encode(args, true, "1234"), ""); // concat is false, ruleType is EXACT args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "ASHKENAZI"); Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO"); args.Put("ruleType", "EXACT"); Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|angelo|anhelo|anxelo"); Assert.AreEqual(Encode(args, false, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, false, "Angelo"), "angelo|anxelo"); Assert.AreEqual(Encode(args, false, "1234"), ""); // concat is true, ruleType is APPROX args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "ASHKENAZI"); Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO"); args.Put("ruleType", "APPROX"); Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO"); Assert.AreEqual(Encode(args, true, "D'Angelo"), "dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, true, "Angelo"), "AnSelO|AngElO|AngzelO|AnkselO"); Assert.AreEqual(Encode(args, true, "1234"), ""); // concat is false, ruleType is APPROX args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "ASHKENAZI"); Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO"); args.Put("ruleType", "APPROX"); Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO"); Assert.AreEqual(Encode(args, false, "D'Angelo"), "dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, false, "Angelo"), "AnSelO|AngElO|AngzelO|AnkselO"); Assert.AreEqual(Encode(args, false, "1234"), ""); }
public void TestSolrSEPHARDIC() { IDictionary <String, String> args; // concat is true, ruleType is EXACT args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "SEPHARDIC"); Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); args.Put("ruleType", "EXACT"); Assert.AreEqual(Encode(args, true, "Angelo"), "anZelo|andZelo|anxelo"); Assert.AreEqual(Encode(args, true, "D'Angelo"), "anZelo|andZelo|anxelo"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|anxelo"); Assert.AreEqual(Encode(args, true, "1234"), ""); // concat is false, ruleType is EXACT args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "SEPHARDIC"); Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); args.Put("ruleType", "EXACT"); Assert.AreEqual(Encode(args, false, "Angelo"), "anZelo|andZelo|anxelo"); Assert.AreEqual(Encode(args, false, "D'Angelo"), "danZelo|dandZelo|danxelo"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|anxelo"); Assert.AreEqual(Encode(args, false, "1234"), ""); // concat is true, ruleType is APPROX args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "SEPHARDIC"); Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); args.Put("ruleType", "APPROX"); Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); Assert.AreEqual(Encode(args, true, "D'Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); Assert.AreEqual(Encode(args, true, "1234"), ""); // concat is false, ruleType is APPROX args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "SEPHARDIC"); Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); args.Put("ruleType", "APPROX"); Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); Assert.AreEqual(Encode(args, false, "D'Angelo"), "danhila|danhilu|danzila|danzilu|nhila|nhilu|nzila|nzilu"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu"); Assert.AreEqual(Encode(args, false, "1234"), ""); }
/** * @param me * @param stayWithMe */ private void checkEquals(UnicodeMap <Integer> me, JCG.SortedDictionary <String, Integer> stayWithMe) { temp.Clear(); foreach (var e in me.EntrySet()) { temp.Add(e); } ISet <KeyValuePair <String, Integer> > entrySet = new JCG.HashSet <KeyValuePair <string, Integer> >(stayWithMe); if (!entrySet.SetEquals(temp)) { Logln(me.EntrySet().ToString()); Logln(me.ToString()); assertEquals("are in parallel", entrySet, temp); // we failed. Reset and start again entrySet.Clear(); temp.Clear(); return; } // ICU4N: looping through pairs instead of doing explicity table lookups is much faster foreach (var pair in stayWithMe) { assertEquals("containsKey", stayWithMe.ContainsKey(pair.Key), me.ContainsKey(pair.Key)); Integer value = pair.Value; assertEquals("get", value, me.Get(pair.Key)); assertEquals("containsValue", stayWithMe.ContainsValue(value), me.ContainsValue(value)); int cp = UnicodeSet.GetSingleCodePoint(pair.Key); if (cp != int.MaxValue) { assertEquals("get", value, me.Get(cp)); } } // ICU4N TODO: complete implementation //ISet<String> nonCodePointStrings = stayWithMe.tailMap("").keySet(); //if (nonCodePointStrings.Count == 0) nonCodePointStrings = null; // for parallel api //assertEquals("getNonRangeStrings", nonCodePointStrings, me.GetNonRangeStrings()); ISet <Integer> values = new JCG.SortedSet <Integer>(stayWithMe.Values); ISet <Integer> myValues = new JCG.SortedSet <Integer>(me.Values()); assertEquals("values", myValues, values); foreach (String key in stayWithMe.Keys) { assertEquals("containsKey", stayWithMe.ContainsKey(key), me.ContainsKey(key)); } }
// make sure the documents returned by the search match the expected list private void MatchHits(IndexSearcher searcher, Sort sort) { // make a query without sorting first ScoreDoc[] hitsByRank = searcher.Search(Query, null, int.MaxValue).ScoreDocs; CheckHits(hitsByRank, "Sort by rank: "); // check for duplicates IDictionary <int?, int?> resultMap = new JCG.SortedDictionary <int?, int?>(); // store hits in TreeMap - TreeMap does not allow duplicates; existing // entries are silently overwritten for (int hitid = 0; hitid < hitsByRank.Length; ++hitid) { resultMap[Convert.ToInt32(hitsByRank[hitid].Doc)] = Convert.ToInt32(hitid); // Value: Hits-Objekt Index - Key: Lucene // Document ID } // now make a query using the sort criteria ScoreDoc[] resultSort = searcher.Search(Query, null, int.MaxValue, sort).ScoreDocs; CheckHits(resultSort, "Sort by custom criteria: "); // check for duplicates // besides the sorting both sets of hits must be identical for (int hitid = 0; hitid < resultSort.Length; ++hitid) { int?idHitDate = Convert.ToInt32(resultSort[hitid].Doc); // document ID // from sorted // search if (!resultMap.ContainsKey(idHitDate)) { Log("ID " + idHitDate + " not found. Possibliy a duplicate."); } Assert.IsTrue(resultMap.ContainsKey(idHitDate)); // same ID must be in the // Map from the rank-sorted // search // every hit must appear once in both result sets --> remove it from the // Map. // At the end the Map must be empty! resultMap.Remove(idHitDate); } if (resultMap.Count == 0) { // log("All hits matched"); } else { Log("Couldn't match " + resultMap.Count + " hits."); } Assert.AreEqual(resultMap.Count, 0); }
public void TestSolrGENERIC() { IDictionary <String, String> args; // concat is true, ruleType is EXACT args = new JCG.SortedDictionary <String, String>(); args.Put("nameType", "GENERIC"); Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo"); args.Put("ruleType", "EXACT"); Assert.AreEqual(Encode(args, true, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo"); Assert.AreEqual(Encode(args, true, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|angelo|anxelo"); Assert.AreEqual(Encode(args, true, "1234"), ""); // concat is false, ruleType is EXACT args = new JCG.SortedDictionary <String, String>(); Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo"); args.Put("ruleType", "EXACT"); Assert.AreEqual(Encode(args, false, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo"); Assert.AreEqual(Encode(args, false, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|angelo|anxelo"); Assert.AreEqual(Encode(args, false, "1234"), ""); // concat is true, ruleType is APPROX args = new JCG.SortedDictionary <String, String>(); Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo"); args.Put("ruleType", "APPROX"); Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo"); Assert.AreEqual(Encode(args, true, "D'Angelo"), "(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, true, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo"); Assert.AreEqual(Encode(args, true, "1234"), ""); // concat is false, ruleType is APPROX args = new JCG.SortedDictionary <String, String>(); Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo"); args.Put("ruleType", "APPROX"); Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo"); Assert.AreEqual(Encode(args, false, "D'Angelo"), "(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)"); args.Put("languageSet", "italian,greek,spanish"); Assert.AreEqual(Encode(args, false, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo"); Assert.AreEqual(Encode(args, false, "1234"), ""); }
[Timeout(120000)] // ICU4N: This test can take awhile because of the slowness of adding items to SortedSet public void TestUnicodeMapGeneralCategory() { Logln("Setting General Category"); UnicodeMap <String> map1 = new UnicodeMap <string>(); IDictionary <Integer, String> map2 = new JCG.Dictionary <Integer, String>(); //Map<Integer, String> map3 = new TreeMap<Integer, String>(); map1 = new UnicodeMap <String>(); map2 = new JCG.SortedDictionary <Integer, String>(); for (int cp = 0; cp <= SET_LIMIT; ++cp) { int enumValue = UChar.GetIntPropertyValue(cp, propEnum); //if (enumValue <= 0) continue; // for smaller set String value = UChar.GetPropertyValueName(propEnum, enumValue, NameChoice.Long); map1.Put(cp, value); map2[new Integer(cp)] = value; } checkNext(map1, map2, int.MaxValue); Logln("Comparing General Category"); check(map1, map2, -1); Logln("Comparing Values"); ISet <String> values1 = new JCG.SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1); ISet <String> values2 = new JCG.SortedSet <String>(map2.Values.Distinct(), StringComparer.Ordinal); // ICU4N NOTE: Added Distinct() if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2)) { throw new ArgumentException("Halting"); } Logln("Comparing Sets"); foreach (string value in values1) { Logln(value == null ? "null" : value); UnicodeSet set1 = map1.KeySet(value); UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value); if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2)) { throw new ArgumentException("Halting"); } } }
public SimpleTextFieldsReader(SegmentReadState state) { this.maxDoc = state.SegmentInfo.DocCount; fieldInfos = state.FieldInfos; input = state.Directory.OpenInput(SimpleTextPostingsFormat.GetPostingsFileName(state.SegmentInfo.Name, state.SegmentSuffix), state.Context); bool success = false; try { fields = ReadFields((IndexInput)input.Clone()); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this); } } }
public virtual Query Build(IQueryNode queryNode) { MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode; MultiPhraseQuery phraseQuery = new MultiPhraseQuery(); IList <IQueryNode> children = phraseNode.GetChildren(); if (children != null) { IDictionary <int?, List <Term> > positionTermMap = new JCG.SortedDictionary <int?, List <Term> >(); foreach (IQueryNode child in children) { FieldQueryNode termNode = (FieldQueryNode)child; TermQuery termQuery = (TermQuery)termNode .GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); List <Term> termList; if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out termList) || termList == null) { termList = new List <Term>(); positionTermMap[termNode.PositionIncrement] = termList; } termList.Add(termQuery.Term); } foreach (int positionIncrement in positionTermMap.Keys) { List <Term> termList = positionTermMap[positionIncrement]; phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/), positionIncrement); } } return(phraseQuery); }
// parses a list of MappingCharFilter style rules into a custom byte[] type table private byte[] ParseTypes(IList <string> rules) { IDictionary <char, byte> typeMap = new JCG.SortedDictionary <char, byte>(); foreach (string rule in rules) { Match m = typePattern.Match(rule); if (!m.Success) { throw new ArgumentException("Invalid Mapping Rule : [" + rule + "]"); } string lhs = ParseString(m.Groups[1].Value.Trim()); byte rhs = ParseType(m.Groups[2].Value.Trim()); if (lhs.Length != 1) { throw new ArgumentException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed."); } if (rhs == WordDelimiterFilter.NOT_SET) { throw new ArgumentException("Invalid Mapping Rule : [" + rule + "]. Illegal type."); } typeMap[lhs[0]] = rhs; } // ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance byte[] types = new byte[Math.Max(typeMap.Keys.LastOrDefault() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)]; for (int i = 0; i < types.Length; i++) { types[i] = WordDelimiterIterator.GetType(i); } foreach (var mapping in typeMap) { types[mapping.Key] = mapping.Value; } return(types); }
public override Fields Get(int doc) { // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java var fields = new JCG.SortedDictionary <string, SimpleTVTerms>(StringComparer.Ordinal); _input.Seek(_offsets[doc]); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.NUMFIELDS)); } var numFields = ParseInt32At(SimpleTextTermVectorsWriter.NUMFIELDS.Length); if (numFields == 0) { return(null); // no vectors for this doc } for (var i = 0; i < numFields; i++) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELD)); } // skip fieldNumber: ParseInt32At(SimpleTextTermVectorsWriter.FIELD.Length); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDNAME)); } var fieldName = ReadString(SimpleTextTermVectorsWriter.FIELDNAME.Length, _scratch); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPOSITIONS)); } var positions = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPOSITIONS.Length, _scratch), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDOFFSETS)); } var offsets = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDOFFSETS.Length, _scratch), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPAYLOADS)); } var payloads = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPAYLOADS.Length, _scratch), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDTERMCOUNT)); } var termCount = ParseInt32At(SimpleTextTermVectorsWriter.FIELDTERMCOUNT.Length); var terms = new SimpleTVTerms(offsets, positions, payloads); fields.Add(fieldName, terms); for (var j = 0; j < termCount; j++) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMTEXT)); } var term = new BytesRef(); var termLength = _scratch.Length - SimpleTextTermVectorsWriter.TERMTEXT.Length; term.Grow(termLength); term.Length = termLength; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.TERMTEXT.Length, term.Bytes, term.Offset, termLength); var postings = new SimpleTVPostings(); terms.terms.Add(term, postings); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMFREQ)); } postings.freq = ParseInt32At(SimpleTextTermVectorsWriter.TERMFREQ.Length); if (!positions && !offsets) { continue; } if (positions) { postings.positions = new int[postings.freq]; if (payloads) { postings.payloads = new BytesRef[postings.freq]; } } if (offsets) { postings.startOffsets = new int[postings.freq]; postings.endOffsets = new int[postings.freq]; } for (var k = 0; k < postings.freq; k++) { if (positions) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.POSITION)); } postings.positions[k] = ParseInt32At(SimpleTextTermVectorsWriter.POSITION.Length); if (payloads) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.PAYLOAD)); } if (_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length == 0) { postings.payloads[k] = null; } else { var payloadBytes = new byte[_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.PAYLOAD.Length, payloadBytes, 0, payloadBytes.Length); postings.payloads[k] = new BytesRef(payloadBytes); } } } if (!offsets) { continue; } ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.STARTOFFSET)); } postings.startOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.STARTOFFSET.Length); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.ENDOFFSET)); } postings.endOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.ENDOFFSET.Length); } } } return(new SimpleTVFields(this, fields)); }
/// <summary> /// Internal constructor, only used by <see cref="InternalLocaleBuilder"/>. /// </summary> internal LocaleExtensions(IDictionary <CaseInsensitiveChar, string> extensions, ISet <CaseInsensitiveString> uattributes, IDictionary <CaseInsensitiveString, string> ukeywords) { bool hasExtension = (extensions != null && extensions.Count > 0); bool hasUAttributes = (uattributes != null && uattributes.Count > 0); bool hasUKeywords = (ukeywords != null && ukeywords.Count > 0); if (!hasExtension && !hasUAttributes && !hasUKeywords) { _map = EmptyMap; _id = ""; return; } // Build extension map _map = new JCG.SortedDictionary <char, Extension>(); if (hasExtension) { foreach (var ext in extensions) { char key = AsciiUtil.ToLower(ext.Key.Value); string value = ext.Value; if (LanguageTag.IsPrivateusePrefixChar(key)) { // we need to exclude special variant in privuateuse, e.g. "x-abc-lvariant-DEF" value = InternalLocaleBuilder.RemovePrivateuseVariant(value); if (value == null) { continue; } } Extension e = new Extension(key, AsciiUtil.ToLower(value)); _map[key] = e; } } if (hasUAttributes || hasUKeywords) { JCG.SortedSet <string> uaset = null; JCG.SortedDictionary <string, string> ukmap = null; if (hasUAttributes) { uaset = new JCG.SortedSet <string>(StringComparer.Ordinal); foreach (CaseInsensitiveString cis in uattributes) { uaset.Add(AsciiUtil.ToLower(cis.Value)); } } if (hasUKeywords) { ukmap = new JCG.SortedDictionary <string, string>(StringComparer.Ordinal); foreach (var kwd in ukeywords) { string key = AsciiUtil.ToLower(kwd.Key.Value); string type = AsciiUtil.ToLower(kwd.Value); ukmap[key] = type; } } UnicodeLocaleExtension ule = new UnicodeLocaleExtension(uaset, ukmap); _map[UnicodeLocaleExtension.Singleton] = ule; } if (_map.Count == 0) { // this could happen when only privuateuse with special variant _map = EmptyMap; _id = ""; } else { _id = ToID(_map); } }
/** * @param rand * @param nextInt * @param test * @return */ private JCG.SortedDictionary <String, Integer> fillRandomMap(Random rand, int max, JCG.SortedDictionary <String, Integer> test) { test.Clear(); max = rand.Next(max); for (int i = 0; i < max; ++i) { test[GetRandomKey(rand)] = new Integer(rand.Next(50) + 50); } return(test); }
public void TestAMonkey() { JCG.SortedDictionary <String, Integer> stayWithMe = new JCG.SortedDictionary <String, Integer>(OneFirstComparator); UnicodeMap <Integer> me = new UnicodeMap <Integer>().PutAll(stayWithMe); // check one special case, removal near end me.PutAll(0x10FFFE, 0x10FFFF, 666); me.Remove(0x10FFFF); int iterations = 100000; JCG.SortedDictionary <String, Integer> test = new JCG.SortedDictionary <string, Integer>(StringComparer.Ordinal); Random rand = new Random(0); String other; Integer value; // try modifications for (int i = 0; i < iterations; ++i) { switch (i == 0 ? 0 : rand.Next(20)) { case 0: Logln("clear"); stayWithMe.Clear(); me.Clear(); break; case 1: fillRandomMap(rand, 5, test); Logln("putAll\t" + test); stayWithMe.PutAll(test); me.PutAll(test); break; case 2: case 3: case 4: case 5: case 6: case 7: case 8: other = GetRandomKey(rand); // if (other.equals("\uDBFF\uDFFF") && me.containsKey(0x10FFFF) && me.get(0x10FFFF).equals(me.get(0x10FFFE))) { // System.out.println("Remove\t" + other + "\n" + me); // } Logln("remove\t" + other); stayWithMe.Remove(other); try { me.Remove(other); } catch (ArgumentException e) { Errln("remove\t" + other + "\tfailed: " + e.ToString() + "\n" + me); me.Clear(); stayWithMe.Clear(); } break; default: other = GetRandomKey(rand); value = new Integer(rand.Next(50) + 50); Logln("put\t" + other + " = " + value); stayWithMe[other] = value; me.Put(other, value); break; } checkEquals(me, stayWithMe); } }
internal SimpleTVTermsEnum(JCG.SortedDictionary <BytesRef, SimpleTVPostings> terms) { _terms = terms; _iterator = terms.GetEnumerator(); }
public void TestRandom() { int numWords = AtLeast(1000); IDictionary <string, long> slowCompletor = new JCG.SortedDictionary <string, long>(StringComparer.Ordinal); ISet <string> allPrefixes = new JCG.SortedSet <string>(StringComparer.Ordinal); Input[] keys = new Input[numWords]; for (int i = 0; i < numWords; i++) { String s; while (true) { // TODO: would be nice to fix this slowCompletor/comparer to // use full range, but we might lose some coverage too... s = TestUtil.RandomSimpleString(LuceneTestCase.Random); if (!slowCompletor.ContainsKey(s)) { break; } } for (int j = 1; j < s.Length; j++) { allPrefixes.add(s.Substring(0, j)); } // we can probably do Integer.MAX_VALUE here, but why worry. int weight = LuceneTestCase.Random.nextInt(1 << 24); slowCompletor.Put(s, (long)weight); keys[i] = new Input(s, weight); } WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); suggester.Build(new InputArrayEnumerator(keys)); assertEquals(numWords, suggester.Count); Random random = new Random(Random.Next()); foreach (String prefix in allPrefixes) { int topN = TestUtil.NextInt32(random, 1, 10); IList <Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence(prefix, random).ToString(), false, topN); // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion JCG.List <Lookup.LookupResult> matches = new JCG.List <Lookup.LookupResult>(); // TODO: could be faster... but its slowCompletor for a reason foreach (KeyValuePair <string, long> e in slowCompletor) { if (e.Key.StartsWith(prefix, StringComparison.Ordinal)) { matches.Add(new Lookup.LookupResult(e.Key, e.Value)); } } assertTrue(matches.size() > 0); matches.Sort(new TestRandomComparer()); if (matches.size() > topN) { //matches.SubList(topN, matches.size()).clear(); matches.RemoveRange(topN, matches.size() - topN); // LUCENENET: Converted end index to length } assertEquals(matches.size(), r.size()); for (int hit = 0; hit < r.size(); hit++) { //System.out.println(" check hit " + hit); assertEquals(matches[hit].Key.toString(), r[hit].Key.toString()); assertEquals(matches[hit].Value, r[hit].Value, 0f); } } }
public void TestTerms() { Random random = Random; int num = AtLeast(10000); #pragma warning disable 612, 618 IComparer <BytesRef> comparer = random.nextBoolean() ? BytesRef.UTF8SortedAsUnicodeComparer : BytesRef.UTF8SortedAsUTF16Comparer; #pragma warning restore 612, 618 IDictionary <BytesRef, KeyValuePair <long, BytesRef> > sorted = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, BytesRef> >(comparer); IDictionary <BytesRef, long> sortedWithoutPayload = new JCG.SortedDictionary <BytesRef, long>(comparer); IDictionary <BytesRef, KeyValuePair <long, ISet <BytesRef> > > sortedWithContext = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, ISet <BytesRef> > >(comparer); IDictionary <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > > sortedWithPayloadAndContext = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > >(comparer); Input[] unsorted = new Input[num]; Input[] unsortedWithoutPayload = new Input[num]; Input[] unsortedWithContexts = new Input[num]; Input[] unsortedWithPayloadAndContext = new Input[num]; ISet <BytesRef> ctxs; for (int i = 0; i < num; i++) { BytesRef key2; BytesRef payload; ctxs = new JCG.HashSet <BytesRef>(); do { key2 = new BytesRef(TestUtil.RandomUnicodeString(random)); payload = new BytesRef(TestUtil.RandomUnicodeString(random)); for (int j = 0; j < AtLeast(2); j++) { ctxs.add(new BytesRef(TestUtil.RandomUnicodeString(random))); } } while (sorted.ContainsKey(key2)); long value = random.Next(); sortedWithoutPayload.Put(key2, value); sorted.Put(key2, new KeyValuePair <long, BytesRef>(value, payload)); sortedWithContext.Put(key2, new KeyValuePair <long, ISet <BytesRef> >(value, ctxs)); sortedWithPayloadAndContext.Put(key2, new KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > >(value, new KeyValuePair <BytesRef, ISet <BytesRef> >(payload, ctxs))); unsorted[i] = new Input(key2, value, payload); unsortedWithoutPayload[i] = new Input(key2, value); unsortedWithContexts[i] = new Input(key2, value, ctxs); unsortedWithPayloadAndContext[i] = new Input(key2, value, payload, ctxs); } // test the sorted iterator wrapper with payloads IInputIterator wrapper = new SortedInputIterator(new InputArrayIterator(unsorted), comparer); IEnumerator <KeyValuePair <BytesRef, KeyValuePair <long, BytesRef> > > expected = sorted.GetEnumerator(); while (expected.MoveNext()) { KeyValuePair <BytesRef, KeyValuePair <long, BytesRef> > entry = expected.Current; assertEquals(entry.Key, wrapper.Next()); assertEquals(Convert.ToInt64(entry.Value.Key), wrapper.Weight); assertEquals(entry.Value.Value, wrapper.Payload); } assertNull(wrapper.Next()); // test the sorted iterator wrapper with contexts wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithContexts), comparer); IEnumerator <KeyValuePair <BytesRef, KeyValuePair <long, ISet <BytesRef> > > > actualEntries = sortedWithContext.GetEnumerator(); while (actualEntries.MoveNext()) { KeyValuePair <BytesRef, KeyValuePair <long, ISet <BytesRef> > > entry = actualEntries.Current; assertEquals(entry.Key, wrapper.Next()); assertEquals(Convert.ToInt64(entry.Value.Key), wrapper.Weight); ISet <BytesRef> actualCtxs = entry.Value.Value; assertEquals(actualCtxs, wrapper.Contexts); } assertNull(wrapper.Next()); // test the sorted iterator wrapper with contexts and payload wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithPayloadAndContext), comparer); IEnumerator <KeyValuePair <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > > > expectedPayloadContextEntries = sortedWithPayloadAndContext.GetEnumerator(); while (expectedPayloadContextEntries.MoveNext()) { KeyValuePair <BytesRef, KeyValuePair <long, KeyValuePair <BytesRef, ISet <BytesRef> > > > entry = expectedPayloadContextEntries.Current; assertEquals(entry.Key, wrapper.Next()); assertEquals(Convert.ToInt64(entry.Value.Key), wrapper.Weight); ISet <BytesRef> actualCtxs = entry.Value.Value.Value; assertEquals(actualCtxs, wrapper.Contexts); BytesRef actualPayload = entry.Value.Value.Key; assertEquals(actualPayload, wrapper.Payload); } assertNull(wrapper.Next()); // test the unsorted iterator wrapper with payloads wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted)); IDictionary <BytesRef, KeyValuePair <long, BytesRef> > actual = new JCG.SortedDictionary <BytesRef, KeyValuePair <long, BytesRef> >(); BytesRef key; while ((key = wrapper.Next()) != null) { long value = wrapper.Weight; BytesRef payload = wrapper.Payload; actual.Put(BytesRef.DeepCopyOf(key), new KeyValuePair <long, BytesRef>(value, BytesRef.DeepCopyOf(payload))); } assertEquals(sorted, actual); // test the sorted iterator wrapper without payloads IInputIterator wrapperWithoutPayload = new SortedInputIterator(new InputArrayIterator(unsortedWithoutPayload), comparer); IEnumerator <KeyValuePair <BytesRef, long> > expectedWithoutPayload = sortedWithoutPayload.GetEnumerator(); while (expectedWithoutPayload.MoveNext()) { KeyValuePair <BytesRef, long> entry = expectedWithoutPayload.Current; assertEquals(entry.Key, wrapperWithoutPayload.Next()); assertEquals(Convert.ToInt64(entry.Value), wrapperWithoutPayload.Weight); assertNull(wrapperWithoutPayload.Payload); } assertNull(wrapperWithoutPayload.Next()); // test the unsorted iterator wrapper without payloads wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload)); IDictionary <BytesRef, long> actualWithoutPayload = new JCG.SortedDictionary <BytesRef, long>(); while ((key = wrapperWithoutPayload.Next()) != null) { long value = wrapperWithoutPayload.Weight; assertNull(wrapperWithoutPayload.Payload); actualWithoutPayload.Put(BytesRef.DeepCopyOf(key), value); } assertEquals(sortedWithoutPayload, actualWithoutPayload); }