/// <summary> /// Frees persistent resources used by this <see cref="Analyzer"/> </summary> public virtual void Dispose() { if (storedValue != null) { storedValue.Dispose(); storedValue = null; } }
/// <summary> /// Frees persistent resources used by this <see cref="Analyzer"/> /// </summary> protected virtual void Dispose(bool disposing) { if (disposing) { if (storedValue != null) { storedValue.Dispose(); storedValue = null; } } }
/// <summary>Set the configuration parameters of this doc maker.</summary> public virtual void SetConfig(Config config, ContentSource source) { this.m_config = config; this.m_source = source; bool stored = config.Get("doc.stored", false); bool bodyStored = config.Get("doc.body.stored", stored); bool tokenized = config.Get("doc.tokenized", true); bool bodyTokenized = config.Get("doc.body.tokenized", tokenized); bool norms = config.Get("doc.tokenized.norms", false); bool bodyNorms = config.Get("doc.body.tokenized.norms", true); bool termVec = config.Get("doc.term.vector", false); bool termVecPositions = config.Get("doc.term.vector.positions", false); bool termVecOffsets = config.Get("doc.term.vector.offsets", false); m_valType = new FieldType(TextField.TYPE_NOT_STORED); m_valType.IsStored = stored; m_valType.IsTokenized = tokenized; m_valType.OmitNorms = !norms; m_valType.StoreTermVectors = termVec; m_valType.StoreTermVectorPositions = termVecPositions; m_valType.StoreTermVectorOffsets = termVecOffsets; m_valType.Freeze(); m_bodyValType = new FieldType(TextField.TYPE_NOT_STORED); m_bodyValType.IsStored = bodyStored; m_bodyValType.IsTokenized = bodyTokenized; m_bodyValType.OmitNorms = !bodyNorms; m_bodyValType.StoreTermVectors = termVec; m_bodyValType.StoreTermVectorPositions = termVecPositions; m_bodyValType.StoreTermVectorOffsets = termVecOffsets; m_bodyValType.Freeze(); storeBytes = config.Get("doc.store.body.bytes", false); m_reuseFields = config.Get("doc.reuse.fields", true); // In a multi-rounds run, it is important to reset DocState since settings // of fields may change between rounds, and this is the only way to reset // the cache of all threads. docState = new DisposableThreadLocal <DocState>(); m_indexProperties = config.Get("doc.index.props", false); updateDocIDLimit = config.Get("doc.random.id.limit", -1); if (updateDocIDLimit != -1) { r = new Random(179); } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = new JCG.HashSet <string>(tk.terms).Count; Assert.AreEqual((long)termCount, terms.Count); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads); ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>(); foreach (string term in tk.freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparer); TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { Assert.IsTrue(termsEnum.MoveNext()); Assert.AreEqual(sortedTerms[i], termsEnum.Term); Assert.AreEqual(1, termsEnum.DocFreq); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID); Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions || terms.HasOffsets) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq; Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet <int> indexes; if (terms.HasPositions) { indexes = tk.positionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset]; Assert.IsNotNull(indexes); } if (terms.HasPositions) { bool foundPosition = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets) { bool foundOffset = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads) { bool foundPayload = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload())) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) when(e.IsException()) { // ok } catch (Exception e) when(e.IsAssertionError()) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsFalse(termsEnum.MoveNext()); for (int i = 0; i < 5; ++i) { if (Random.NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes))); } } }