public static void InitializeEmbeddable() { // try avoid reflection PostingsFormat.SetPostingsFormatFactory(new CustomPostingsFormatFactory()); Codec.SetCodecFactory(new CustomCodecFactory()); DocValuesFormat.SetDocValuesFormatFactory(new CustomDocValuesFormatFactory()); //Codec.Default = new Lucene46Codec(); }
public override TermsConsumer AddField(FieldInfo field) { PostingsFormat format = outerInstance.GetPostingsFormatForField(field.Name); if (format == null) { throw new InvalidOperationException("invalid null PostingsFormat for field=\"" + field.Name + "\""); } string formatName = format.Name; string previousValue = field.PutAttribute(PER_FIELD_FORMAT_KEY, formatName); Debug.Assert(previousValue == null); int?suffix; FieldsConsumerAndSuffix consumer; if (!formats.TryGetValue(format, out consumer) || consumer == null) { // First time we are seeing this format; create a new instance // bump the suffix if (!suffixes.TryGetValue(formatName, out suffix) || suffix == null) { suffix = 0; } else { suffix = suffix + 1; } suffixes[formatName] = suffix; string segmentSuffix = GetFullSegmentSuffix(field.Name, segmentWriteState.SegmentSuffix, GetSuffix(formatName, Convert.ToString(suffix, CultureInfo.InvariantCulture))); consumer = new FieldsConsumerAndSuffix(); consumer.Consumer = format.FieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix)); consumer.Suffix = suffix.Value; // LUCENENET NOTE: At this point suffix cannot be null formats[format] = consumer; } else { // we've already seen this format, so just grab its suffix Debug.Assert(suffixes.ContainsKey(formatName)); suffix = consumer.Suffix; } previousValue = field.PutAttribute(PER_FIELD_SUFFIX_KEY, Convert.ToString(suffix, CultureInfo.InvariantCulture)); Debug.Assert(previousValue == null); // TODO: we should only provide the "slice" of FIS // that this PF actually sees ... then stuff like // .hasProx could work correctly? // NOTE: .hasProx is already broken in the same way for the non-perfield case, // if there is a fieldinfo with prox that has no postings, you get a 0 byte file. return(consumer.Consumer.AddField(field)); }
/// <summary> /// Return a <see cref="Codec"/> that can read any of the /// default codecs and formats, but always writes in the specified /// format. /// </summary> public static Codec AlwaysPostingsFormat(PostingsFormat format) { // TODO: we really need for postings impls etc to announce themselves // (and maybe their params, too) to infostream on flush and merge. // otherwise in a real debugging situation we won't know whats going on! if (LuceneTestCase.Verbose) { Console.WriteLine("forcing postings format to:" + format); } return(new Lucene46CodecAnonymousClass(format)); }
public static string GetPostingsFormat(Codec codec, string field) { PostingsFormat p = codec.PostingsFormat; if (p is PerFieldPostingsFormat perFieldPostingsFormat) { return(perFieldPostingsFormat.GetPostingsFormatForField(field).Name); } else { return(p.Name); } }
private void Initialize() { LazyInitializer.EnsureInitialized(ref initializationTarget, () => { // Setup the factories ConfigurationSettings.SetConfigurationFactory(ConfigurationFactory); Codec.SetCodecFactory(CodecFactory); DocValuesFormat.SetDocValuesFormatFactory(DocValuesFormatFactory); PostingsFormat.SetPostingsFormatFactory(PostingsFormatFactory); IncrementInitalizationCount(); // For testing return(new object()); // Placeholder to indicate our initializer has been run already }); }
public FieldsReader(PerFieldPostingsFormat outerInstance, SegmentReadState readState) { this.outerInstance = outerInstance; // Read _X.per and init each format: bool success = false; try { // Read field name -> format name foreach (FieldInfo fi in readState.FieldInfos) { if (fi.IsIndexed) { string fieldName = fi.Name; string formatName = fi.GetAttribute(PER_FIELD_FORMAT_KEY); if (formatName != null) { // null formatName means the field is in fieldInfos, but has no postings! string suffix = fi.GetAttribute(PER_FIELD_SUFFIX_KEY); if (Debugging.AssertsEnabled) { Debugging.Assert(suffix != null); } PostingsFormat format = PostingsFormat.ForName(formatName); string segmentSuffix = GetSuffix(formatName, suffix); // LUCENENET: Eliminated extra lookup by using TryGetValue instead of ContainsKey if (!formats.TryGetValue(segmentSuffix, out Codecs.FieldsProducer field)) { formats[segmentSuffix] = field = format.FieldsProducer(new SegmentReadState(readState, segmentSuffix)); } fields[fieldName] = field; } } } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(formats.Values); } } }
/// <summary> /// Check codec restrictions. /// </summary> /// <exception cref="AssumptionViolatedException"> if the class does not work with a given codec. </exception> private void CheckCodecRestrictions(Codec codec) { LuceneTestCase.AssumeFalse("Class not allowed to use codec: " + codec.Name + ".", ShouldAvoidCodec(codec.Name)); if (codec is RandomCodec randomCodec && avoidCodecs.Count > 0) { foreach (string name in randomCodec.FormatNames) { LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + name + ".", ShouldAvoidCodec(name)); } } PostingsFormat pf = codec.PostingsFormat; LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + pf.Name + ".", ShouldAvoidCodec(pf.Name)); LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TestPostingsFormat + ".", ShouldAvoidCodec(LuceneTestCase.TestPostingsFormat)); }
public override PostingsFormat GetPostingsFormatForField(string name) { PostingsFormat codec = PreviousMappings[name]; if (codec == null) { codec = Formats[Math.Abs(PerFieldSeed ^ name.GetHashCode()) % Formats.Count]; /*if (codec is SimpleTextPostingsFormat && PerFieldSeed % 5 != 0) * { * // make simpletext rarer, choose again * codec = Formats[Math.Abs(PerFieldSeed ^ name.ToUpper(CultureInfo.InvariantCulture).GetHashCode()) % Formats.Count]; * }*/ PreviousMappings[name] = codec; // Safety: Debug.Assert(PreviousMappings.Count < 10000, "test went insane"); } return(codec); }
/// <summary> /// Check codec restrictions. /// </summary> /// <exception cref="AssumptionViolatedException"> if the class does not work with a given codec. </exception> private void CheckCodecRestrictions(Codec codec) { NUnit.Framework.Assume.That(true); LuceneTestCase.AssumeFalse("Class not allowed to use codec: " + codec.Name + ".", ShouldAvoidCodec(codec.Name)); if (codec is RandomCodec && avoidCodecs.Count > 0) { foreach (string name in ((RandomCodec)codec).FormatNames) { LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + name + ".", ShouldAvoidCodec(name)); } } PostingsFormat pf = codec.PostingsFormat; LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + pf.Name + ".", ShouldAvoidCodec(pf.Name)); LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", ShouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT)); }
public FieldsReader(PerFieldPostingsFormat outerInstance, SegmentReadState readState) { this.OuterInstance = outerInstance; // Read _X.per and init each format: bool success = false; try { // Read field name -> format name foreach (FieldInfo fi in readState.FieldInfos) { if (fi.Indexed) { string fieldName = fi.Name; string formatName = fi.GetAttribute(PER_FIELD_FORMAT_KEY); if (formatName != null) { // null formatName means the field is in fieldInfos, but has no postings! string suffix = fi.GetAttribute(PER_FIELD_SUFFIX_KEY); Debug.Assert(suffix != null); PostingsFormat format = PostingsFormat.ForName(formatName); string segmentSuffix = GetSuffix(formatName, suffix); if (!Formats.ContainsKey(segmentSuffix)) { Formats[segmentSuffix] = format.FieldsProducer(new SegmentReadState(readState, segmentSuffix)); } Fields[fieldName] = Formats[segmentSuffix]; } } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(Formats.Values); } } }
private void Initialize() { LazyInitializer.EnsureInitialized(ref initializationTarget, () => { // Setup the factories ConfigurationSettings.SetConfigurationFactory(ConfigurationFactory); Codec.SetCodecFactory(CodecFactory); DocValuesFormat.SetDocValuesFormatFactory(DocValuesFormatFactory); PostingsFormat.SetPostingsFormatFactory(PostingsFormatFactory); // Enable "asserts" for tests. In Java, these were actual asserts, // but in .NET we simply mock this as a boolean static setting that can be // toggled on and off, even in release mode. Note this must be done after // the ConfigurationFactory is set. Lucene.Net.Diagnostics.Debugging.AssertsEnabled = SystemProperties.GetPropertyAsBoolean("assert", true); IncrementInitalizationCount(); // For testing return(new object()); // Placeholder to indicate our initializer has been run already }); }
private void Initialize() { LazyInitializer.EnsureInitialized(ref initializationTarget, () => { // Setup the factories ConfigurationSettings.SetConfigurationFactory(ConfigurationFactory); Codec.SetCodecFactory(CodecFactory); DocValuesFormat.SetDocValuesFormatFactory(DocValuesFormatFactory); PostingsFormat.SetPostingsFormatFactory(PostingsFormatFactory); // Enable "asserts" for tests. In Java, these were actual asserts, // but in .NET we simply mock this as a boolean static setting that can be // toggled on and off, even in release mode. Note this must be done after // the ConfigurationFactory is set. Lucene.Net.Diagnostics.Debugging.AssertsEnabled = SystemProperties.GetPropertyAsBoolean("assert", true); // Identify NUnit exceptions down in Lucene.Net so they can be ignored in catch blocks that // catch Java "Exception" types that do subclass Error (for the ExceptionExtensions.IsException() method). Lucene.ExceptionExtensions.NUnitResultStateExceptionType = typeof(NUnit.Framework.ResultStateException); Lucene.ExceptionExtensions.NUnitAssertionExceptionType = typeof(NUnit.Framework.AssertionException); Lucene.ExceptionExtensions.NUnitMultipleAssertExceptionType = typeof(NUnit.Framework.MultipleAssertException); Lucene.ExceptionExtensions.NUnitInconclusiveExceptionType = typeof(NUnit.Framework.InconclusiveException); Lucene.ExceptionExtensions.NUnitSuccessExceptionType = typeof(NUnit.Framework.SuccessException); Lucene.ExceptionExtensions.NUnitInvalidPlatformException = Type.GetType("NUnit.Framework.Internal.InvalidPlatformException, NUnit.Framework"); // Identify the Debug.Assert() exception so it can be excluded from being swallowed by catch blocks. // These types are internal, so we can identify them using Reflection. Lucene.ExceptionExtensions.DebugAssertExceptionType = // .NET 5/.NET Core 3.x Type.GetType("System.Diagnostics.DebugProvider+DebugAssertException, System.Private.CoreLib") // .NET Core 2.x ?? Type.GetType("System.Diagnostics.Debug+DebugAssertException, System.Private.CoreLib"); // .NET Framework doesn't throw in this case AfterInitialization(); return(new object()); // Placeholder to indicate our initializer has been run already }); }
public override void Before(LuceneTestCase testInstance) { // LUCENENET specific - SOLR setup code removed // if verbose: print some debugging stuff about which codecs are loaded. if (LuceneTestCase.Verbose) { // LUCENENET: Only list the services if the underlying ICodecFactory // implements IServiceListable if (Codec.GetCodecFactory() is IServiceListable) { ICollection <string> codecs = Codec.AvailableCodecs; foreach (string codec in codecs) { Console.WriteLine("Loaded codec: '" + codec + "': " + Codec.ForName(codec).GetType().Name); } } // LUCENENET: Only list the services if the underlying IPostingsFormatFactory // implements IServiceListable if (PostingsFormat.GetPostingsFormatFactory() is IServiceListable) { ICollection <string> postingsFormats = PostingsFormat.AvailablePostingsFormats; foreach (string postingsFormat in postingsFormats) { Console.WriteLine("Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.ForName(postingsFormat).GetType().Name); } } } savedInfoStream = InfoStream.Default; Random random = LuceneTestCase.Random; bool v = random.NextBoolean(); if (LuceneTestCase.UseInfoStream) { InfoStream.Default = new ThreadNameFixingPrintStreamInfoStream(Console.Out); } else if (v) { InfoStream.Default = new NullInfoStream(); } Type targetClass = testInstance?.GetType() ?? LuceneTestCase.GetTestClass(); avoidCodecs = new JCG.HashSet <string>(); var suppressCodecsAttribute = targetClass.GetCustomAttribute <LuceneTestCase.SuppressCodecsAttribute>(); if (suppressCodecsAttribute != null) { avoidCodecs.UnionWith(suppressCodecsAttribute.Value); } // set back to default LuceneTestCase.OldFormatImpersonationIsActive = false; savedCodec = Codec.Default; int randomVal = random.Next(10); if ("Lucene3x".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestDocValuesFormat, StringComparison.Ordinal) && randomVal == 3 && !ShouldAvoidCodec("Lucene3x"))) // preflex-only setup { codec = Codec.ForName("Lucene3x"); if (Debugging.AssertsEnabled) { Debugging.Assert((codec is PreFlexRWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } LuceneTestCase.OldFormatImpersonationIsActive = true; } else if ("Lucene40".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal) && randomVal == 0 && !ShouldAvoidCodec("Lucene40"))) // 4.0 setup { codec = Codec.ForName("Lucene40"); LuceneTestCase.OldFormatImpersonationIsActive = true; if (Debugging.AssertsEnabled) { Debugging.Assert((codec is Lucene40RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } if (Debugging.AssertsEnabled) { Debugging.Assert((PostingsFormat.ForName("Lucene40") is Lucene40RWPostingsFormat), "fix your IPostingsFormatFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } } else if ("Lucene41".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestDocValuesFormat, StringComparison.Ordinal) && randomVal == 1 && !ShouldAvoidCodec("Lucene41"))) { codec = Codec.ForName("Lucene41"); LuceneTestCase.OldFormatImpersonationIsActive = true; if (Debugging.AssertsEnabled) { Debugging.Assert((codec is Lucene41RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } } else if ("Lucene42".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestDocValuesFormat, StringComparison.Ordinal) && randomVal == 2 && !ShouldAvoidCodec("Lucene42"))) { codec = Codec.ForName("Lucene42"); LuceneTestCase.OldFormatImpersonationIsActive = true; if (Debugging.AssertsEnabled) { Debugging.Assert((codec is Lucene42RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } } else if ("Lucene45".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TestDocValuesFormat, StringComparison.Ordinal) && randomVal == 5 && !ShouldAvoidCodec("Lucene45"))) { codec = Codec.ForName("Lucene45"); LuceneTestCase.OldFormatImpersonationIsActive = true; if (Debugging.AssertsEnabled) { Debugging.Assert((codec is Lucene45RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } } else if (("random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal) == false) || ("random".Equals(LuceneTestCase.TestDocValuesFormat, StringComparison.Ordinal) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... PostingsFormat format; if ("random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal)) { format = PostingsFormat.ForName("Lucene41"); } else if ("MockRandom".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal)) { format = new MockRandomPostingsFormat(new Random(random.Next())); } else { format = PostingsFormat.ForName(LuceneTestCase.TestPostingsFormat); } DocValuesFormat dvFormat; if ("random".Equals(LuceneTestCase.TestDocValuesFormat, StringComparison.Ordinal)) { dvFormat = DocValuesFormat.ForName("Lucene45"); } else { dvFormat = DocValuesFormat.ForName(LuceneTestCase.TestDocValuesFormat); } codec = new Lucene46CodecAnonymousClass(format, dvFormat); } else if ("SimpleText".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && randomVal == 9 && LuceneTestCase.Rarely(random) && !ShouldAvoidCodec("SimpleText"))) { codec = new SimpleTextCodec(); } else if ("CheapBastard".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && randomVal == 8 && !ShouldAvoidCodec("CheapBastard") && !ShouldAvoidCodec("Lucene41"))) { // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses. codec = new CheapBastardCodec(); } else if ("Asserting".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && randomVal == 6 && !ShouldAvoidCodec("Asserting"))) { codec = new AssertingCodec(); } else if ("Compressing".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal) && randomVal == 5 && !ShouldAvoidCodec("Compressing"))) { codec = CompressingCodec.RandomInstance(random); } else if (!"random".Equals(LuceneTestCase.TestCodec, StringComparison.Ordinal)) { codec = Codec.ForName(LuceneTestCase.TestCodec); } else if ("random".Equals(LuceneTestCase.TestPostingsFormat, StringComparison.Ordinal)) { codec = new RandomCodec(random, avoidCodecs); } else { if (Debugging.AssertsEnabled) { Debugging.Assert(false); } } Codec.Default = codec; // Initialize locale/ timezone. string testLocale = SystemProperties.GetProperty("tests:locale", "random"); // LUCENENET specific - reformatted with : string testTimeZone = SystemProperties.GetProperty("tests:timezone", "random"); // LUCENENET specific - reformatted with : // Always pick a random one for consistency (whether tests.locale was specified or not). savedLocale = CultureInfo.CurrentCulture; CultureInfo randomLocale = LuceneTestCase.RandomCulture(random); locale = testLocale.Equals("random", StringComparison.Ordinal) ? randomLocale : LuceneTestCase.CultureForName(testLocale); #if FEATURE_CULTUREINFO_CURRENTCULTURE_SETTER CultureInfo.CurrentCulture = locale; #else Thread.CurrentThread.CurrentCulture = locale; #endif // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale. // So store the original property value and restore it at end. // LUCENENET specific - commented //restoreProperties["user:timezone"] = SystemProperties.GetProperty("user:timezone"); savedTimeZone = TimeZoneInfo.Local; TimeZoneInfo randomTimeZone = LuceneTestCase.RandomTimeZone(random); timeZone = testTimeZone.Equals("random", StringComparison.Ordinal) ? randomTimeZone : TimeZoneInfo.FindSystemTimeZoneById(testTimeZone); //TimeZone.Default = TimeZone; // LUCENENET NOTE: There doesn't seem to be an equivalent to this, but I don't think we need it. similarity = random.NextBoolean() ? (Similarity) new DefaultSimilarity() : new RandomSimilarityProvider(random); // Check codec restrictions once at class level. try { CheckCodecRestrictions(codec); } catch (Exception e) { Console.Error.WriteLine("NOTE: " + e.Message + " Suppressed codecs: " + avoidCodecs); throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } }
/// <summary> /// Sole constructor. </summary> public Lucene42Codec() : base() { postingsFormat = new PerFieldPostingsFormatAnonymousInnerClassHelper(this); docValuesFormat = new PerFieldDocValuesFormatAnonymousInnerClassHelper(this); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
/// <summary> /// Creates Bloom filters for a selection of fields created in the index. This /// is recorded as a set of Bitsets held as a segment summary in an additional /// "blm" file. This PostingsFormat delegates to a choice of delegate /// PostingsFormat for encoding all other postings data. /// </summary> /// <param name="delegatePostingsFormat">The PostingsFormat that records all the non-bloom filter data i.e. postings info.</param> /// <param name="bloomFilterFactory">The {@link BloomFilterFactory} responsible for sizing BloomFilters appropriately</param> public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat, BloomFilterFactory bloomFilterFactory) : base(BLOOM_CODEC_NAME) { _delegatePostingsFormat = delegatePostingsFormat; _bloomFilterFactory = bloomFilterFactory; }
/// <summary> /// Creates Bloom filters for a selection of fields created in the index. This /// is recorded as a set of Bitsets held as a segment summary in an additional /// "blm" file. This <see cref="PostingsFormat"/> delegates to a choice of delegate /// <see cref="PostingsFormat"/> for encoding all other postings data. This choice of /// constructor defaults to the <see cref="DefaultBloomFilterFactory"/> for /// configuring per-field BloomFilters. /// </summary> /// <param name="delegatePostingsFormat">The <see cref="PostingsFormat"/> that records all the non-bloom filter data i.e. postings info.</param> public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) : this(delegatePostingsFormat, new DefaultBloomFilterFactory()) { }
public override void Before(LuceneTestCase testInstance) { // if verbose: print some debugging stuff about which codecs are loaded. if (LuceneTestCase.VERBOSE) { ICollection <string> codecs = Codec.AvailableCodecs(); foreach (string codec in codecs) { Console.WriteLine("Loaded codec: '" + codec + "': " + Codec.ForName(codec).GetType().Name); } ICollection <string> postingsFormats = PostingsFormat.AvailablePostingsFormats(); foreach (string postingsFormat in postingsFormats) { Console.WriteLine("Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.ForName(postingsFormat).GetType().Name); } } savedInfoStream = InfoStream.Default; Random random = LuceneTestCase.Random(); bool v = random.NextBoolean(); if (LuceneTestCase.INFOSTREAM) { InfoStream.Default = new ThreadNameFixingPrintStreamInfoStream(Console.Out); } else if (v) { InfoStream.Default = new NullInfoStream(); } Type targetClass = testInstance.GetType(); avoidCodecs = new HashSet <string>(); var suppressCodecsAttribute = targetClass.GetTypeInfo().GetCustomAttribute <LuceneTestCase.SuppressCodecsAttribute>(); if (suppressCodecsAttribute != null) { avoidCodecs.AddAll(suppressCodecsAttribute.Value); } // set back to default LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; savedCodec = Codec.Default; int randomVal = random.Next(10); if ("Lucene3x".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT, StringComparison.Ordinal) && randomVal == 3 && !ShouldAvoidCodec("Lucene3x"))) // preflex-only setup { codec = Codec.ForName("Lucene3x"); Debug.Assert((codec is PreFlexRWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; } else if ("Lucene40".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal) && randomVal == 0 && !ShouldAvoidCodec("Lucene40"))) // 4.0 setup { codec = Codec.ForName("Lucene40"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert((codec is Lucene40RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); Debug.Assert((PostingsFormat.ForName("Lucene40") is Lucene40RWPostingsFormat), "fix your IPostingsFormatFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } else if ("Lucene41".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT, StringComparison.Ordinal) && randomVal == 1 && !ShouldAvoidCodec("Lucene41"))) { codec = Codec.ForName("Lucene41"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert((codec is Lucene41RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } else if ("Lucene42".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT, StringComparison.Ordinal) && randomVal == 2 && !ShouldAvoidCodec("Lucene42"))) { codec = Codec.ForName("Lucene42"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert((codec is Lucene42RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } else if ("Lucene45".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT, StringComparison.Ordinal) && randomVal == 5 && !ShouldAvoidCodec("Lucene45"))) { codec = Codec.ForName("Lucene45"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert((codec is Lucene45RWCodec), "fix your ICodecFactory to scan Lucene.Net.Tests before Lucene.Net.TestFramework"); } else if (("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal) == false) || ("random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT, StringComparison.Ordinal) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... PostingsFormat format; if ("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal)) { format = PostingsFormat.ForName("Lucene41"); } else if ("MockRandom".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal)) { format = new MockRandomPostingsFormat(new Random(random.Next())); } else { format = PostingsFormat.ForName(LuceneTestCase.TEST_POSTINGSFORMAT); } DocValuesFormat dvFormat; if ("random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT, StringComparison.Ordinal)) { dvFormat = DocValuesFormat.ForName("Lucene45"); } else { dvFormat = DocValuesFormat.ForName(LuceneTestCase.TEST_DOCVALUESFORMAT); } codec = new Lucene46CodecAnonymousInnerClassHelper(this, format, dvFormat); } else if ("SimpleText".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && randomVal == 9 && LuceneTestCase.Rarely(random) && !ShouldAvoidCodec("SimpleText"))) { codec = new SimpleTextCodec(); } else if ("CheapBastard".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && randomVal == 8 && !ShouldAvoidCodec("CheapBastard") && !ShouldAvoidCodec("Lucene41"))) { // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses. codec = new CheapBastardCodec(); } else if ("Asserting".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && randomVal == 6 && !ShouldAvoidCodec("Asserting"))) { codec = new AssertingCodec(); } else if ("Compressing".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) || ("random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal) && randomVal == 5 && !ShouldAvoidCodec("Compressing"))) { codec = CompressingCodec.RandomInstance(random); } else if (!"random".Equals(LuceneTestCase.TEST_CODEC, StringComparison.Ordinal)) { codec = Codec.ForName(LuceneTestCase.TEST_CODEC); } else if ("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT, StringComparison.Ordinal)) { codec = new RandomCodec(random, avoidCodecs); } else { Debug.Assert(false); } Codec.Default = codec; // Initialize locale/ timezone. string testLocale = SystemProperties.GetProperty("tests.locale", "random"); string testTimeZone = SystemProperties.GetProperty("tests.timezone", "random"); // Always pick a random one for consistency (whether tests.locale was specified or not). savedLocale = CultureInfo.CurrentCulture; CultureInfo randomLocale = LuceneTestCase.RandomLocale(random); locale = testLocale.Equals("random", StringComparison.Ordinal) ? randomLocale : LuceneTestCase.LocaleForName(testLocale); #if NETSTANDARD CultureInfo.CurrentCulture = locale; #else Thread.CurrentThread.CurrentCulture = locale; #endif // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale. // So store the original property value and restore it at end. restoreProperties["user.timezone"] = SystemProperties.GetProperty("user.timezone"); savedTimeZone = testInstance.TimeZone; TimeZoneInfo randomTimeZone = LuceneTestCase.RandomTimeZone(random); timeZone = testTimeZone.Equals("random", StringComparison.Ordinal) ? randomTimeZone : TimeZoneInfo.FindSystemTimeZoneById(testTimeZone); //TimeZone.Default = TimeZone; // LUCENENET NOTE: There doesn't seem to be an equivalent to this, but I don't think we need it. similarity = random.NextBoolean() ? (Similarity) new DefaultSimilarity() : new RandomSimilarityProvider(random); // Check codec restrictions once at class level. try { CheckCodecRestrictions(codec); } catch (Exception e) { Console.Error.WriteLine("NOTE: " + e.Message + " Suppressed codecs: " + Arrays.ToString(avoidCodecs.ToArray())); throw e; } }
/// <summary> /// Sole constructor. </summary> public Lucene40Codec() : base() { postingsFormat = new PerFieldPostingsFormatAnonymousClass(this); }
/// <summary> /// Sole constructor. </summary> public Lucene45Codec() : base("Lucene45") { postingsFormat = new PerFieldPostingsFormatAnonymousInnerClassHelper(this); docValuesFormat = new PerFieldDocValuesFormatAnonymousInnerClassHelper(this); }
public CustomPerFieldCodec() { SimpleTextFormat = Codecs.PostingsFormat.ForName("SimpleText"); DefaultFormat = Codecs.PostingsFormat.ForName("Lucene41"); MockSepFormat = Codecs.PostingsFormat.ForName("MockSep"); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
public virtual void TestRandomWithPrefix() { Directory dir = NewDirectory(); ISet <string> prefixes = new JCG.HashSet <string>(); int numPrefix = TestUtil.NextInt32(Random, 2, 7); if (VERBOSE) { Console.WriteLine("TEST: use " + numPrefix + " prefixes"); } while (prefixes.Count < numPrefix) { prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random)); //prefixes.Add(TestUtil.RandomSimpleString(random)); } string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/); int NUM_TERMS = AtLeast(20); ISet <BytesRef> terms = new JCG.HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = prefixesArray[Random.Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random); //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Sometimes swap in codec that impls ord(): if (Random.Next(10) == 7) { Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; ISet <int?> ordsForDocSet = new JCG.HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt32(Random, 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random.Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (VERBOSE) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.GetReader(); w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + r); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); foreach (string prefix in prefixesArray) { BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix); int[][] idToOrdsPrefix = new int[NUM_DOCS][]; for (int id = 0; id < NUM_DOCS; id++) { int[] docOrds = idToOrds[id]; IList <int?> newOrds = new List <int?>(); foreach (int ord in idToOrds[id]) { if (StringHelper.StartsWith(termsArray[ord], prefixRef)) { newOrds.Add(ord); } } int[] newOrdsArray = new int[newOrds.Count]; int upto = 0; foreach (int ord in newOrds) { newOrdsArray[upto++] = ord; } idToOrdsPrefix[id] = newOrdsArray; } foreach (AtomicReaderContext ctx in r.Leaves) { if (VERBOSE) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { Console.WriteLine("TEST: top reader"); } Verify(slowR, idToOrdsPrefix, termsArray, prefixRef); } FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
public Lucene46CodecAnonymousClass(PostingsFormat format) { this.format = format; }
/// <summary> /// Sole constructor. </summary> public Lucene46Codec() : base() { postingsFormat = new PerFieldPostingsFormatAnonymousClass(this); docValuesFormat = new PerFieldDocValuesFormatAnonymousClass(this); }
/// <summary> /// Sole constructor. </summary> public Lucene40Codec() : base("Lucene40") { postingsFormat = new PerFieldPostingsFormatAnonymousInnerClassHelper(this); }
public Lucene46CodecAnonymousClass(PostingsFormat format, DocValuesFormat dvFormat) { this.format = format; this.dvFormat = dvFormat; }
public Lucene46CodecAnonymousInnerClassHelper(TestRuleSetupAndRestoreClassEnv outerInstance, PostingsFormat format, DocValuesFormat dvFormat) { this.OuterInstance = outerInstance; this.Format = format; this.DvFormat = dvFormat; }
public Lucene46CodecAnonymousInnerClassHelper(TestRuleSetupAndRestoreClassEnv outerInstance, PostingsFormat format, DocValuesFormat dvFormat) { this.outerInstance = outerInstance; this.format = format; this.dvFormat = dvFormat; }
/// <summary> /// Creates Bloom filters for a selection of fields created in the index. This /// is recorded as a set of Bitsets held as a segment summary in an additional /// "blm" file. This <see cref="PostingsFormat"/> delegates to a choice of delegate /// <see cref="PostingsFormat"/> for encoding all other postings data. /// </summary> /// <param name="delegatePostingsFormat">The <see cref="PostingsFormat"/> that records all the non-bloom filter data i.e. postings info.</param> /// <param name="bloomFilterFactory">The <see cref="BloomFilterFactory"/> responsible for sizing BloomFilters appropriately.</param> public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat, BloomFilterFactory bloomFilterFactory) : base() { _delegatePostingsFormat = delegatePostingsFormat; _bloomFilterFactory = bloomFilterFactory; }
public virtual void TestRandom() { Directory dir = NewDirectory(); int NUM_TERMS = AtLeast(20); ISet <BytesRef> terms = new JCG.HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = TestUtil.RandomRealisticUnicodeString(Random); //final String s = TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(/*new BytesRef[terms.Count]*/); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Sometimes swap in codec that impls ord(): if (Random.Next(10) == 7) { // Make sure terms index has ords: Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; ISet <int?> ordsForDocSet = new JCG.HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt32(Random, 0, 20 * RandomMultiplier); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random.Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (Verbose) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (Verbose) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.GetReader(); w.Dispose(); if (Verbose) { Console.WriteLine("TEST: reader=" + r); } foreach (AtomicReaderContext ctx in r.Leaves) { if (Verbose) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrds, termsArray, null); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (Verbose) { Console.WriteLine("TEST: top reader"); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); Verify(slowR, idToOrds, termsArray, null); FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
/// <summary> /// Creates Bloom filters for a selection of fields created in the index. This /// is recorded as a set of Bitsets held as a segment summary in an additional /// "blm" file. This PostingsFormat delegates to a choice of delegate /// PostingsFormat for encoding all other postings data. This choice of /// constructor defaults to the {@link DefaultBloomFilterFactory} for /// configuring per-field BloomFilters. /// </summary> /// <param name="delegatePostingsFormat">The PostingsFormat that records all the non-bloom filter data i.e. postings info.</param> public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) : this(delegatePostingsFormat, new DefaultBloomFilterFactory()) { }