internal FieldsReader(PerFieldDocValuesFormat outerInstance, FieldsReader other) { this.outerInstance = outerInstance; IDictionary <DocValuesProducer, DocValuesProducer> oldToNew = new JCG.Dictionary <DocValuesProducer, DocValuesProducer>(IdentityEqualityComparer <DocValuesProducer> .Default); // First clone all formats foreach (KeyValuePair <string, DocValuesProducer> ent in other.formats) { DocValuesProducer values = ent.Value; formats[ent.Key] = values; oldToNew[ent.Value] = values; } // Then rebuild fields: foreach (KeyValuePair <string, DocValuesProducer> ent in other.fields) { DocValuesProducer producer; oldToNew.TryGetValue(ent.Value, out producer); if (Debugging.AssertsEnabled) { Debugging.Assert(producer != null); } fields[ent.Key] = producer; } }
private IDictionary <string, Document> GenerateIndexDocuments(int ndocs) { IDictionary <string, Document> docs = new JCG.Dictionary <string, Document>(); for (int i = 0; i < ndocs; i++) { Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i); Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i); Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i); Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_" + i + "_0")); Document doc = new Document(); doc.Add(field); doc.Add(payload); doc.Add(weight1); doc.Add(weight2); doc.Add(weight3); doc.Add(contexts); for (int j = 1; j < AtLeast(3); j++) { contexts.SetBytesValue(new BytesRef("ctx_" + i + "_" + j)); doc.Add(contexts); } docs.Put(field.GetStringValue(), doc); } return(docs); }
public void TestUnicodeMapRandom() { // do random change to both, then compare var random = new Random(12345); // reproducible results Logln("Comparing against HashMap"); UnicodeMap <string> map1 = new UnicodeMap <string>(); IDictionary <Integer, string> map2 = new JCG.Dictionary <Integer, string>(); for (int counter = 0; counter < ITERATIONS; ++counter) { int start = random.Next(LIMIT); string value = TEST_VALUES[random.Next(TEST_VALUES.Length)]; string logline = Utility.Hex(start) + "\t" + value; if (SHOW_PROGRESS) { Logln(counter + "\t" + logline); } log.Add(logline); if (DEBUG && counter == 144) { Console.Out.WriteLine(" debug"); } map1.Put(start, value); map2[new Integer(start)] = value; check(map1, map2, counter); } checkNext(map1, map2, LIMIT); }
/// <summary> /// Checks some basic behaviour of an <see cref="Attribute"/>. </summary> /// <param name="att"><see cref="Attribute"/> to reflect</param> /// <param name="reflectedValues"> Contains a <see cref="IDictionary{String, Object}"/> with "AttributeSubclassType/key" as values.</param> public static void AssertAttributeReflection(Attribute att, IDictionary <string, object> reflectedValues) { IDictionary <string, object> map = new JCG.Dictionary <string, object>(); att.ReflectWith(new AttributeReflectorAnonymousClass(map)); Assert.AreEqual(reflectedValues, map, aggressive: false, "Reflection does not produce same map"); }
public virtual void DoRandom(int iter, bool ignoreCase) { CharArrayMap <int?> map = new CharArrayMap <int?>(TEST_VERSION_CURRENT, 1, ignoreCase); IDictionary <string, int?> hmap = new JCG.Dictionary <string, int?>(); char[] key; for (int i = 0; i < iter; i++) { int len = Random.Next(5); key = new char[len]; for (int j = 0; j < key.Length; j++) { key[j] = (char)Random.Next(127); } string keyStr = new string(key); string hmapKey = ignoreCase ? keyStr.ToLowerInvariant() : keyStr; int val = Random.Next(); object o1 = map.Put(key, val); object o2 = hmap.Put(hmapKey, val); assertEquals(o1, o2); // add it again with the string method assertEquals(val, map.Put(keyStr, val)); assertEquals(val, map.Get(key, 0, key.Length)); assertEquals(val, map.Get(key)); assertEquals(val, map.Get(keyStr)); assertEquals(hmap.Count, map.size()); } }
public virtual void TestReuseDocsEnumNoReuse() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { AtomicReader indexReader = (AtomicReader)ctx.Reader; Terms terms = indexReader.GetTerms("body"); TermsEnum iterator = terms.GetEnumerator(); IDictionary <DocsEnum, bool?> enums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc); while (iterator.MoveNext()) { DocsEnum docs = iterator.Docs(Random.NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); } IOUtils.Dispose(writer, open, dir); }
public void TestBogusArguments() { ArgumentException expected = NUnit.Framework.Assert.Throws <ArgumentException>(() => { JCG.Dictionary <String, String> @params = new JCG.Dictionary <String, String>(); @params["bogusArg"] = "bogusValue"; new MorfologikFilterFactory(@params); }); assertTrue(expected.Message.Contains("Unknown parameters")); }
public override void SetReusableComponents(Analyzer analyzer, string fieldName, TokenStreamComponents components) { var componentsPerField = (IDictionary <string, TokenStreamComponents>)GetStoredValue(analyzer); if (componentsPerField == null) { // LUCENENET-615: This needs to support nullable keys componentsPerField = new JCG.Dictionary <string, TokenStreamComponents>(); SetStoredValue(analyzer, componentsPerField); } componentsPerField[fieldName] = components; }
public virtual void TestEmpty() { IDictionary <string, MethodInfo> functions = new JCG.Dictionary <string, MethodInfo>(); try { JavascriptCompiler.Compile("sqrt(20)", functions); Assert.Fail(); } catch (ArgumentException e) { Assert.IsTrue(e.Message.Contains("Unrecognized method")); } }
public void TestMissingDictionary() { IResourceLoader loader = new ClasspathResourceLoader(typeof(TestMorfologikFilterFactory)); IOException expected = NUnit.Framework.Assert.Throws <IOException>(() => { IDictionary <String, String> @params = new JCG.Dictionary <String, String>(); @params[MorfologikFilterFactory.DICTIONARY_ATTRIBUTE] = "missing-dictionary-resource.dict"; MorfologikFilterFactory factory = new MorfologikFilterFactory(@params); factory.Inform(loader); }); assertTrue(expected.Message.Contains("Resource not found")); }
private void Clean() { if (_hm.Count == 0) { return; } var newHm = new JCG.Dictionary <WeakKey <TKey>, TValue>(_hm.Count); foreach (var entry in _hm.Where(x => x.Key != null && x.Key.IsAlive)) { newHm.Add(entry.Key, entry.Value); } _hm = newHm; }
public virtual void TestReuseDocsEnumDifferentReader() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random); writer.Commit(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader secondReader = DirectoryReader.Open(dir); IList <AtomicReaderContext> leaves = firstReader.Leaves; IList <AtomicReaderContext> leaves2 = secondReader.Leaves; foreach (AtomicReaderContext ctx in leaves) { Terms terms = ((AtomicReader)ctx.Reader).GetTerms("body"); TermsEnum iterator = terms.GetEnumerator(); IDictionary <DocsEnum, bool?> enums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc); iterator = terms.GetEnumerator(); DocsEnum docs = null; BytesRef term = null; while (iterator.MoveNext()) { term = iterator.Term; docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); iterator = terms.GetEnumerator(); enums.Clear(); docs = null; while (iterator.MoveNext()) { term = iterator.Term; docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); } IOUtils.Dispose(writer, firstReader, secondReader, dir); }
public void TestAUnicodeMapInverse() { UnicodeMap <Char> foo1 = new UnicodeMap <Char>() .PutAll('a', 'z', 'b') .Put("ab", 'c') .Put('x', 'b') .Put("xy", 'c') ; IDictionary <Char, UnicodeSet> target = new JCG.Dictionary <Char, UnicodeSet>(); foo1.AddInverseTo(target); UnicodeMap <Char> reverse = new UnicodeMap <Char>().PutAllInverse(target); assertEquals("", foo1, reverse); }
private IDictionary <string, long> BytesUsedByExtension(Directory d) { IDictionary <string, long> bytesUsedByExtension = new JCG.Dictionary <string, long>(); foreach (string file in d.ListAll()) { string ext = IndexFileNames.GetExtension(file); long previousLength = bytesUsedByExtension.TryGetValue(ext, out long length) ? length : 0; bytesUsedByExtension[ext] = previousLength + d.FileLength(file); } foreach (string item in ExcludedExtensionsFromByteCounts) { bytesUsedByExtension.Remove(item); } return(bytesUsedByExtension); }
public void TestExplicitDictionary() { IResourceLoader loader = new ClasspathResourceLoader(typeof(TestMorfologikFilterFactory)); StringReader reader = new StringReader("inflected1 inflected2"); IDictionary <String, String> @params = new JCG.Dictionary <string, string>(); @params[MorfologikFilterFactory.DICTIONARY_ATTRIBUTE] = "custom-dictionary.dict"; MorfologikFilterFactory factory = new MorfologikFilterFactory(@params); factory.Inform(loader); TokenStream stream = new MockTokenizer(reader); // whitespaceMockTokenizer(reader); stream = factory.Create(stream); AssertTokenStreamContents(stream, new String[] { "lemma1", "lemma2" }); }
public void TestDateRange() { String startDate = getLocalizedDate(2002, 1, 1, false); String endDate = getLocalizedDate(2002, 1, 4, false); // we use the default Locale/TZ since LuceneTestCase randomizes it //Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); //endDateExpected.set(2002, 1, 4, 23, 59, 59); //endDateExpected.set(Calendar.MILLISECOND, 999); DateTime endDateExpected = new GregorianCalendar().ToDateTime(2002, 1, 4, 23, 59, 59, 999); String defaultField = "default"; String monthField = "month"; String hourField = "hour"; PrecedenceQueryParser qp = new PrecedenceQueryParser(new MockAnalyzer(Random)); IDictionary <string, DateResolution> fieldMap = new JCG.Dictionary <string, DateResolution>(); // set a field specific date resolution fieldMap.Put(monthField, DateResolution.MONTH); #pragma warning disable 612, 618 qp.SetDateResolution(fieldMap); #pragma warning restore 612, 618 // set default date resolution to MILLISECOND qp.SetDateResolution(DateResolution.MILLISECOND); // set second field specific date resolution fieldMap.Put(hourField, DateResolution.HOUR); #pragma warning disable 612, 618 qp.SetDateResolution(fieldMap); #pragma warning restore 612, 618 // for this field no field specific date resolution has been set, // so verify if the default resolution is used assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, endDateExpected, DateResolution.MILLISECOND); // verify if field specific date resolutions are used for these two fields assertDateRangeQueryEquals(qp, monthField, startDate, endDate, endDateExpected, DateResolution.MONTH); assertDateRangeQueryEquals(qp, hourField, startDate, endDate, endDateExpected, DateResolution.HOUR); }
public virtual void TestNestedPulsing() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat()); BaseDirectoryWrapper dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO)); // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm') // this is because we only track the 'last' enum we reused (not all). // but this seems 'good enough' for now. iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); TermsEnum te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { reuse = te.Docs(null, reuse, DocsFlags.NONE); allEnums[reuse] = true; } assertEquals(4, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(4, allEnums.Count); ir.Dispose(); dir.Dispose(); }
[Timeout(120000)] // ICU4N: This test can take awhile because of the slowness of adding items to SortedSet public void TestUnicodeMapGeneralCategory() { Logln("Setting General Category"); UnicodeMap <String> map1 = new UnicodeMap <string>(); IDictionary <Integer, String> map2 = new JCG.Dictionary <Integer, String>(); //Map<Integer, String> map3 = new TreeMap<Integer, String>(); map1 = new UnicodeMap <String>(); map2 = new JCG.SortedDictionary <Integer, String>(); for (int cp = 0; cp <= SET_LIMIT; ++cp) { int enumValue = UChar.GetIntPropertyValue(cp, propEnum); //if (enumValue <= 0) continue; // for smaller set String value = UChar.GetPropertyValueName(propEnum, enumValue, NameChoice.Long); map1.Put(cp, value); map2[new Integer(cp)] = value; } checkNext(map1, map2, int.MaxValue); Logln("Comparing General Category"); check(map1, map2, -1); Logln("Comparing Values"); ISet <String> values1 = new JCG.SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1); ISet <String> values2 = new JCG.SortedSet <String>(map2.Values.Distinct(), StringComparer.Ordinal); // ICU4N NOTE: Added Distinct() if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2)) { throw new ArgumentException("Halting"); } Logln("Comparing Sets"); foreach (string value in values1) { Logln(value == null ? "null" : value); UnicodeSet set1 = map1.KeySet(value); UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value); if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2)) { throw new ArgumentException("Halting"); } } }
public virtual void TestSophisticatedReuse() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO)); iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); TermsEnum te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { reuse = te.Docs(null, reuse, DocsFlags.NONE); allEnums[reuse] = true; } assertEquals(2, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(2, allEnums.Count); ir.Dispose(); dir.Dispose(); }
private int[] CreateExpectedGroupHeads(string searchTerm, GroupDoc[] groupDocs, Sort docSort, bool sortByScoreOnly, int[] fieldIdToDocID) { IDictionary <BytesRef, List <GroupDoc> > groupHeads = new JCG.Dictionary <BytesRef, List <GroupDoc> >(); foreach (GroupDoc groupDoc in groupDocs) { if (!groupDoc.content.StartsWith(searchTerm, StringComparison.Ordinal)) { continue; } if (!groupHeads.TryGetValue(groupDoc.group, out List <GroupDoc> grouphead)) { List <GroupDoc> list = new List <GroupDoc>(); list.Add(groupDoc); groupHeads[groupDoc.group] = list; continue; } grouphead.Add(groupDoc); } int[] allGroupHeads = new int[groupHeads.Count]; int i = 0; foreach (BytesRef groupValue in groupHeads.Keys) { List <GroupDoc> docs = groupHeads[groupValue]; // LUCENENET TODO: The original API Collections.Sort does not currently exist. // This call ultimately results in calling TimSort, which is why this line was replaced // with CollectionUtil.TimSort(IList<T>, IComparer<T>). // // NOTE: List.Sort(comparer) won't work in this case because it calls the comparer when the // values are the same, which results in this test failing. TimSort only calls the comparer // when the values differ. //Collections.Sort(docs, GetComparer(docSort, sortByScoreOnly, fieldIdToDocID)); CollectionUtil.TimSort(docs, GetComparer(docSort, sortByScoreOnly, fieldIdToDocID)); allGroupHeads[i++] = docs[0].id; } return(allGroupHeads); }
private void Clean() { if (_hm.Count == 0) { return; } var newHm = new JCG.Dictionary <WeakKey <TKey>, TValue>(_hm.Count); foreach (var kvp in _hm) { if (kvp.Key.TryGetTarget(out TKey _)) { // LUCENENET: There is a tiny chance that a call to remove the item // from the dictionary can happen before this line is executed. Therefore, // just discard the reference and add it as is, even if it is no longer valid // in this edge case. It is far more efficient to re-use the same instances, anyway. newHm.Add(kvp.Key, kvp.Value); } } _hm = newHm; }
public void TestModify() { Random random = new Random(0); UnicodeMap <string> unicodeMap = new UnicodeMap <string>(); JCG.Dictionary <int, string> hashMap = new JCG.Dictionary <int, string>(); String[] values = { null, "the", "quick", "brown", "fox" }; for (int count = 1; count <= MODIFY_TEST_ITERATIONS; ++count) { String value = values[random.Next(values.Length)]; int start = random.Next(MODIFY_TEST_LIMIT); // test limited range int end = random.Next(MODIFY_TEST_LIMIT); if (start > end) { int temp = start; start = end; end = temp; } int modCount = count & 0xFF; if (modCount == 0 && IsVerbose()) { Logln("***" + count); Logln(unicodeMap.ToString()); } unicodeMap.PutAll(start, end, value); if (modCount == 1 && IsVerbose()) { Logln(">>>\t" + Utility.Hex(start) + ".." + Utility.Hex(end) + "\t" + value); Logln(unicodeMap.ToString()); } for (int i = start; i <= end; ++i) { hashMap[i] = value; } if (!hasSameValues(unicodeMap, hashMap)) { Errln("Failed at " + count); } } }
// FST is pruned private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now verify pruned " + pairs.Count + " terms; outputs=" + outputs); foreach (InputOutput <T> pair in pairs) { Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output)); } } // To validate the FST, we brute-force compute all prefixes // in the terms, matched to their "common" outputs, prune that // set according to the prune thresholds, then assert the FST // matches that same set. // NOTE: Crazy RAM intensive!! //System.out.println("TEST: tally prefixes"); // build all prefixes IDictionary <Int32sRef, CountMinOutput <T> > prefixes = new JCG.Dictionary <Int32sRef, CountMinOutput <T> >(); Int32sRef scratch = new Int32sRef(10); foreach (InputOutput <T> pair in pairs) { scratch.CopyInt32s(pair.Input); for (int idx = 0; idx <= pair.Input.Length; idx++) { scratch.Length = idx; if (!prefixes.TryGetValue(scratch, out CountMinOutput <T> cmo) || cmo == null) { cmo = new CountMinOutput <T>(); cmo.Count = 1; cmo.Output = pair.Output; prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo; } else { cmo.Count++; T output1 = cmo.Output; if (output1.Equals(outputs.NoOutput)) { output1 = outputs.NoOutput; } T output2 = pair.Output; if (output2.Equals(outputs.NoOutput)) { output2 = outputs.NoOutput; } cmo.Output = outputs.Common(output1, output2); } if (idx == pair.Input.Length) { cmo.IsFinal = true; cmo.FinalOutput = cmo.Output; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now prune"); } // prune 'em // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since // in .NET you cannot delete records while enumerating forward through a dictionary. for (int i = prefixes.Count - 1; i >= 0; i--) { KeyValuePair <Int32sRef, CountMinOutput <T> > ent = prefixes.ElementAt(i); Int32sRef prefix = ent.Key; CountMinOutput <T> cmo = ent.Value; if (LuceneTestCase.VERBOSE) { Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal); } bool keep; if (prune1 > 0) { keep = cmo.Count >= prune1; } else { Debug.Assert(prune2 > 0); if (prune2 > 1 && cmo.Count >= prune2) { keep = true; } else if (prefix.Length > 0) { // consult our parent scratch.Length = prefix.Length - 1; Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length); prefixes.TryGetValue(scratch, out CountMinOutput <T> cmo2); //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1))); } else if (cmo.Count >= prune2) { keep = true; } else { keep = false; } } if (!keep) { prefixes.Remove(prefix); //System.out.println(" remove"); } else { // clear isLeaf for all ancestors //System.out.println(" keep"); scratch.CopyInt32s(prefix); scratch.Length--; while (scratch.Length >= 0) { if (prefixes.TryGetValue(scratch, out CountMinOutput <T> cmo2) && cmo2 != null) { //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); cmo2.IsLeaf = false; } scratch.Length--; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: after prune"); foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes) { Console.WriteLine(" " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal); if (ent.Value.IsFinal) { Console.WriteLine(" finalOutput=" + outputs.OutputToString(ent.Value.FinalOutput)); } } } if (prefixes.Count <= 1) { Assert.IsNull(fst); return; } Assert.IsNotNull(fst); // make sure FST only enums valid prefixes if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check pruned enum"); } Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst); Int32sRefFSTEnum.InputOutput <T> current; while ((current = fstEnum.Next()) != null) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + outputs.OutputToString(current.Output)); } prefixes.TryGetValue(current.Input, out CountMinOutput <T> cmo); Assert.IsNotNull(cmo); Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal); //if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, current.Output); } else { Assert.AreEqual(cmo.Output, current.Output); } } // make sure all non-pruned prefixes are present in the FST if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify all prefixes"); } int[] stopNode = new int[1]; foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes) { if (ent.Key.Length > 0) { CountMinOutput <T> cmo = ent.Value; T output = Run(fst, ent.Key, stopNode); if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + outputs.OutputToString(cmo.Output)); } // if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, output); } else { Assert.AreEqual(cmo.Output, output); } Assert.AreEqual(ent.Key.Length, stopNode[0]); } } }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <see cref="SpanQuery"/>. /// </summary> /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param> /// <param name="spanQuery"><see cref="SpanQuery"/> to extract Terms from</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> protected virtual void ExtractWeightedSpanTerms(IDictionary <string, WeightedSpanTerm> terms, SpanQuery spanQuery) { ISet <string> fieldNames; if (fieldName == null) { fieldNames = new JCG.HashSet <string>(); CollectSpanQueryFields(spanQuery, fieldNames); } else { fieldNames = new JCG.HashSet <string> { fieldName }; } // To support the use of the default field name if (defaultField != null) { fieldNames.Add(defaultField); } IDictionary <string, SpanQuery> queries = new JCG.Dictionary <string, SpanQuery>(); var nonWeightedTerms = new JCG.HashSet <Term>(); bool mustRewriteQuery = MustRewriteQuery(spanQuery); if (mustRewriteQuery) { foreach (string field in fieldNames) { SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetLeafContext().Reader); queries[field] = rewrittenQuery; rewrittenQuery.ExtractTerms(nonWeightedTerms); } } else { spanQuery.ExtractTerms(nonWeightedTerms); } List <PositionSpan> spanPositions = new List <PositionSpan>(); foreach (string field in fieldNames) { SpanQuery q; q = mustRewriteQuery ? queries[field] : spanQuery; AtomicReaderContext context = GetLeafContext(); var termContexts = new JCG.Dictionary <Term, TermContext>(); ISet <Term> extractedTerms = new JCG.SortedSet <Term>(); q.ExtractTerms(extractedTerms); foreach (Term term in extractedTerms) { termContexts[term] = TermContext.Build(context, term); } IBits acceptDocs = context.AtomicReader.LiveDocs; Spans.Spans spans = q.GetSpans(context, acceptDocs, termContexts); // collect span positions while (spans.MoveNext()) { spanPositions.Add(new PositionSpan(spans.Start, spans.End - 1)); } } if (spanPositions.Count == 0) { // no spans found return; } foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparer(queryTerm.Field)) { if (!terms.TryGetValue(queryTerm.Text, out WeightedSpanTerm weightedSpanTerm) || weightedSpanTerm == null) { weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text); weightedSpanTerm.AddPositionSpans(spanPositions); weightedSpanTerm.IsPositionSensitive = true; terms[queryTerm.Text] = weightedSpanTerm; } else { if (spanPositions.Count > 0) { weightedSpanTerm.AddPositionSpans(spanPositions); } } } } }
private GroupedFacetResult CreateExpectedFacetResult(string searchTerm, IndexContext context, int offset, int limit, int minCount, bool orderByCount, string facetPrefix) { JCG.Dictionary <string, ISet <string> > facetGroups; if (!context.searchTermToFacetGroups.TryGetValue(searchTerm, out facetGroups)) { facetGroups = new JCG.Dictionary <string, ISet <string> >(); } int totalCount = 0; int totalMissCount = 0; ISet <string> facetValues; if (facetPrefix != null) { facetValues = new JCG.HashSet <string>(); foreach (string facetValue in context.facetValues) { if (facetValue != null && facetValue.StartsWith(facetPrefix, StringComparison.Ordinal)) { facetValues.add(facetValue); } } } else { facetValues = context.facetValues; } List <TermGroupFacetCollector.FacetEntry> entries = new List <TermGroupFacetCollector.FacetEntry>(facetGroups.size()); // also includes facets with count 0 foreach (string facetValue in facetValues) { if (facetValue == null) { continue; } int count = facetGroups.TryGetValue(facetValue, out ISet <string> groups) && groups != null?groups.size() : 0; if (count >= minCount) { entries.Add(new TermGroupFacetCollector.FacetEntry(new BytesRef(facetValue), count)); } totalCount += count; } // Only include null count when no facet prefix is specified if (facetPrefix == null) { if (facetGroups.TryGetValue(null, out ISet <string> groups) && groups != null) { totalMissCount = groups.size(); } } entries.Sort(Comparer <TermGroupFacetCollector.FacetEntry> .Create((a, b) => { if (orderByCount) { int cmp = b.Count - a.Count; if (cmp != 0) { return(cmp); } } return(a.Value.CompareTo(b.Value)); })); int endOffset = offset + limit; IList <TermGroupFacetCollector.FacetEntry> entriesResult; if (offset >= entries.size()) { entriesResult = Collections.EmptyList <TermGroupFacetCollector.FacetEntry>(); } else if (endOffset >= entries.size()) { entriesResult = entries.GetRange(offset, entries.size() - offset); } else { entriesResult = entries.GetRange(offset, endOffset - offset); } return(new GroupedFacetResult(totalCount, totalMissCount, entriesResult)); }
/** Returns Pair(list of invalid document terms, Map of document term -> document) */ private KeyValuePair <List <string>, IDictionary <string, Document> > GenerateIndexDocuments(int ndocs, bool requiresPayload, bool requiresContexts) { IDictionary <string, Document> docs = new JCG.Dictionary <string, Document>(); List <string> invalidDocTerms = new List <string>(); for (int i = 0; i < ndocs; i++) { Document doc = new Document(); bool invalidDoc = false; Field field = null; // usually have valid term field in document if (Usually()) { field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); doc.Add(field); } else { invalidDoc = true; } // even if payload is not required usually have it if (requiresPayload || Usually()) { // usually have valid payload field in document if (Usually()) { Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); doc.Add(payload); } else if (requiresPayload) { invalidDoc = true; } } if (requiresContexts || Usually()) { if (Usually()) { for (int j = 0; j < AtLeast(2); j++) { doc.Add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_" + j))); } } // we should allow entries without context } // usually have valid weight field in document if (Usually()) { Field weight = (Rarely()) ? (Field) new StoredField(WEIGHT_FIELD_NAME, 100d + i) : (Field) new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i); doc.Add(weight); } string term = null; if (invalidDoc) { term = (field != null) ? field.GetStringValue() : "invalid_" + i; invalidDocTerms.Add(term); } else { term = field.GetStringValue(); } docs.Put(term, doc); } return(new KeyValuePair <List <string>, IDictionary <string, Document> >(invalidDocTerms, docs)); }
private IndexContext CreateIndexContext() { Random random = Random; DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.BINARY, DocValuesType.SORTED }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()) ); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER; string[] groupValues = new string[numDocs / 5]; string[] countValues = new string[numDocs / 10]; for (int i = 0; i < groupValues.Length; i++) { groupValues[i] = GenerateRandomNonEmptyString(); } for (int i = 0; i < countValues.Length; i++) { countValues[i] = GenerateRandomNonEmptyString(); } List <string> contentStrings = new List <string>(); IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new JCG.Dictionary <string, IDictionary <string, ISet <string> > >(); for (int i = 1; i <= numDocs; i++) { string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)]; string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)]; string content = "random" + random.nextInt(numDocs / 20); IDictionary <string, ISet <string> > groupToCounts; if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts)) { // Groups sort always DOCID asc... searchTermToGroupCounts.Add(content, groupToCounts = new JCG.LinkedDictionary <string, ISet <string> >()); contentStrings.Add(content); } ISet <string> countsVals; if (!groupToCounts.TryGetValue(groupValue, out countsVals)) { groupToCounts.Add(groupValue, countsVals = new JCG.HashSet <string>()); } countsVals.Add(countValue); Document doc = new Document(); doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES)); if (groupValue != null) { AddField(doc, groupField, groupValue, dvType); } if (countValue != null) { AddField(doc, countField, countValue, dvType); } doc.Add(new TextField("content", content, Field.Store.YES)); w.AddDocument(doc); } DirectoryReader reader = w.GetReader(); if (VERBOSE) { for (int docID = 0; docID < reader.MaxDoc; docID++) { Document doc = reader.Document(docID); Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher")); } } w.Dispose(); return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/))); }
private static IDictionary <string, IList <Rule> > ParseRules(TextReader reader, string location) { IDictionary <string, IList <Rule> > lines = new JCG.Dictionary <string, IList <Rule> >(); int currentLine = 0; bool inMultilineComment = false; string rawLine; try { while ((rawLine = reader.ReadLine()) != null) { currentLine++; string line = rawLine; if (inMultilineComment) { if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal)) { inMultilineComment = false; } } else { if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal)) { inMultilineComment = true; } else { // discard comments int cmtI = line.IndexOf(ResourceConstants.CMT, StringComparison.Ordinal); if (cmtI >= 0) { line = line.Substring(0, cmtI); } // trim leading-trailing whitespace line = line.Trim(); if (line.Length == 0) { continue; // empty lines can be safely skipped } if (line.StartsWith(HASH_INCLUDE, StringComparison.Ordinal)) { // include statement string incl = line.Substring(HASH_INCLUDE.Length).Trim(); if (incl.Contains(" ")) { throw new ArgumentException("Malformed import statement '" + rawLine + "' in " + location); } else { lines.PutAll(ParseRules(CreateScanner(incl), location + "->" + incl)); } } else { // rule string[] parts = WHITESPACE.Split(line).TrimEnd(); if (parts.Length != 4) { throw new ArgumentException("Malformed rule statement split into " + parts.Length + " parts: " + rawLine + " in " + location); } else { try { string pat = StripQuotes(parts[0]); string lCon = StripQuotes(parts[1]); string rCon = StripQuotes(parts[2]); IPhonemeExpr ph = ParsePhonemeExpr(StripQuotes(parts[3])); int cLine = currentLine; Rule r = new RuleAnonymousClass(pat, lCon, rCon, ph, cLine, location); string patternKey = r.pattern.Substring(0, 1 - 0); if (!lines.TryGetValue(patternKey, out IList <Rule> rules) || rules == null) { rules = new List <Rule>(); lines[patternKey] = rules; } rules.Add(r); } catch (Exception e) when(e.IsIllegalArgumentException()) { throw new InvalidOperationException("Problem parsing line '" + currentLine + "' in " + location, e); } } } } } } } finally { reader.Dispose(); } return(lines); }
/* * Non-recursive version of object descend. this consumes more memory than recursive in-depth * traversal but prevents stack overflows on long chains of objects * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain * so not too much). */ private static long MeasureObjectSize(object root) { // Objects seen so far. IdentityHashSet <object> seen = new IdentityHashSet <object>(); // Class cache with reference Field and precalculated shallow size. IDictionary <Type, ClassCache> classCache = new JCG.Dictionary <Type, ClassCache>(IdentityEqualityComparer <Type> .Default); // Stack of objects pending traversal. Recursion caused stack overflows. Stack <object> stack = new Stack <object>(); stack.Push(root); long totalSize = 0; while (stack.Count > 0) { object ob = stack.Pop(); if (ob is null || seen.Contains(ob)) { continue; } seen.Add(ob); Type obClazz = ob.GetType(); // LUCENENET specific - .NET cannot return a null type for an object, so no need to assert it if (obClazz.Equals(typeof(string))) { // LUCENENET specific - we can get a closer estimate of a string // by using simple math. Reference: http://stackoverflow.com/a/8171099. // This fixes the TestSanity test. totalSize += (2 * (((string)ob).Length + 1)); } if (obClazz.IsArray) { /* * Consider an array, possibly of primitive types. Push any of its references to * the processing stack and accumulate this array's shallow size. */ long size = NUM_BYTES_ARRAY_HEADER; Array array = (Array)ob; int len = array.Length; if (len > 0) { Type componentClazz = obClazz.GetElementType(); if (componentClazz.IsPrimitive) { size += (long)len * primitiveSizes[componentClazz]; } else { size += (long)NUM_BYTES_OBJECT_REF * len; // Push refs for traversal later. for (int i = len; --i >= 0;) { object o = array.GetValue(i); if (o != null && !seen.Contains(o)) { stack.Push(o); } } } } totalSize += AlignObjectSize(size); } else { /* * Consider an object. Push any references it has to the processing stack * and accumulate this object's shallow size. */ try { if (!classCache.TryGetValue(obClazz, out ClassCache cachedInfo) || cachedInfo is null) { classCache[obClazz] = cachedInfo = CreateCacheEntry(obClazz); } foreach (FieldInfo f in cachedInfo.ReferenceFields) { // Fast path to eliminate redundancies. object o = f.GetValue(ob); if (o != null && !seen.Contains(o)) { stack.Push(o); } } totalSize += cachedInfo.AlignedShallowInstanceSize; } catch (Exception e) when(e.IsIllegalAccessException()) { // this should never happen as we enabled setAccessible(). throw RuntimeException.Create("Reflective field access failed?", e); } } } // Help the GC (?). seen.Clear(); stack.Clear(); classCache.Clear(); return(totalSize); }
private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument) { Random random = Random; int numDocs = TestUtil.NextInt32(random, 138, 1145) * RandomMultiplier; int numGroups = TestUtil.NextInt32(random, 1, numDocs / 4); int numFacets = TestUtil.NextInt32(random, 1, numDocs / 6); if (Verbose) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <string> groups = new List <string>(); for (int i = 0; i < numGroups; i++) { groups.Add(GenerateRandomNonEmptyString()); } List <string> facetValues = new List <string>(); for (int i = 0; i < numFacets; i++) { facetValues.Add(GenerateRandomNonEmptyString()); } string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)]; if (Verbose) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++) { contentBrs[contentIDX] = GenerateRandomNonEmptyString(); if (Verbose) { Console.WriteLine(" content=" + contentBrs[contentIDX]); } } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random) ) ); bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = NewStringField("group", "", Field.Store.NO); Field groupDc = new SortedDocValuesField("group_dv", new BytesRef()); if (useDv) { doc.Add(groupDc); docNoFacet.Add(groupDc); } doc.Add(group); docNoFacet.Add(group); Field[] facetFields; if (useDv) { Debug.Assert(!multipleFacetValuesPerDocument); facetFields = new Field[2]; facetFields[0] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[0]); docNoGroup.Add(facetFields[0]); facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef()); doc.Add(facetFields[1]); docNoGroup.Add(facetFields[1]); } else { facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; for (int i = 0; i < facetFields.Length; i++) { facetFields[i] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[i]); docNoGroup.Add(facetFields[i]); } } Field content = NewStringField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); docNoFacet.Add(content); docNoGroupNoFacet.Add(content); ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => { if (a == b) { return(0); } else if (a == null) { return(-1); } else if (b == null) { return(1); } else { return(a.CompareToOrdinal(b)); } })); // LUCENENET NOTE: Need JCG.Dictionary here because of null keys IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >(); int facetWithMostGroups = 0; for (int i = 0; i < numDocs; i++) { string groupValue; if (random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: if (useDv) { groupValue = ""; } else { groupValue = null; } } else { groupValue = groups[random.nextInt(groups.size())]; } string contentStr = contentBrs[random.nextInt(contentBrs.Length)]; if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups)) { searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >(); } List <string> facetVals = new List <string>(); if (useDv || random.nextInt(24) != 18) { if (useDv) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetFields[0].SetStringValue(facetValue); facetFields[1].SetBytesValue(new BytesRef(facetValue)); facetVals.Add(facetValue); } else { foreach (Field facetField in facetFields) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet)) { facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetField.SetStringValue(facetValue); facetVals.Add(facetValue); } } } else { uniqueFacetValues.Add(null); if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet)) { facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>(); } groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } } if (Verbose) { Console.WriteLine(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + Collections.ToString(facetVals)); } if (groupValue != null) { if (useDv) { groupDc.SetBytesValue(new BytesRef(groupValue)); } group.SetStringValue(groupValue); } else if (useDv) { // DV cannot have missing values: groupDc.SetBytesValue(new BytesRef()); } content.SetStringValue(contentStr); if (groupValue == null && facetVals.Count == 0) { writer.AddDocument(docNoGroupNoFacet); } else if (facetVals.Count == 0) { writer.AddDocument(docNoFacet); } else if (groupValue == null) { writer.AddDocument(docNoGroup); } else { writer.AddDocument(doc); } } DirectoryReader reader = writer.GetReader(); writer.Dispose(); return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv)); }