public override void SetUp() { base.SetUp(); DocHelper.SetupDoc(testDoc); SegmentInfo info = DocHelper.WriteDoc(dir, testDoc); reader = SegmentReader.Get(info); }
public virtual void TestDocument() { SegmentReader[] readers = new SegmentReader[2]; Directory dir = NewDirectory(); Document doc1 = new Document(); Document doc2 = new Document(); DocHelper.SetupDoc(doc1); DocHelper.SetupDoc(doc2); DocHelper.WriteDoc(Random(), dir, doc1); DocHelper.WriteDoc(Random(), dir, doc2); DirectoryReader reader = DirectoryReader.Open(dir); Assert.IsTrue(reader != null); Assert.IsTrue(reader is StandardDirectoryReader); Document newDoc1 = reader.Document(0); Assert.IsTrue(newDoc1 != null); Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.Unstored.Count); Document newDoc2 = reader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.Unstored.Count); Terms vector = reader.GetTermVectors(0).Terms(DocHelper.TEXT_FIELD_2_KEY); Assert.IsNotNull(vector); reader.Dispose(); if (readers[0] != null) { readers[0].Dispose(); } if (readers[1] != null) { readers[1].Dispose(); } dir.Dispose(); }
static void CreateOpenBitSets(SegmentReader reader, IDictionary<string, HashSet<string>> feeds, IDictionary<string, IDictionary<string, OpenBitSet>> bitSetLookup) { for (int n = 0; n < reader.MaxDoc; n++) { if (reader.IsDeleted(n)) { continue; } Document document = reader.Document(n); string id = document.Get("Id"); if (id == null) { continue; } foreach (var feed in feeds) { if (feed.Value.Contains(id)) { bitSetLookup[feed.Key][reader.SegmentName].Set(n); } } } }
protected internal override object DoBody(string segmentFileName) { var sis = new SegmentInfos(); sis.Read(directory, segmentFileName); var readers = new SegmentReader[sis.Size()]; for (int i = sis.Size() - 1; i >= 0; i--) { System.IO.IOException prior = null; bool success = false; try { readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ); success = true; } catch (System.IO.IOException ex) { prior = ex; } finally { if (!success) { IOUtils.CloseWhileHandlingException(prior, readers); } } } return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false); }
public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek) { InitBlock(enclosingInstance); this.in_Renamed = in_Renamed; this.number = number; this.normSeek = normSeek; }
protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc) { lock (parent) { this.deletedDocs = parent.deletedDocs; } }
public virtual void TestBadSeek() { try { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir)); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term("textField2", "bad")); Assert.IsTrue(segTermDocs.Next() == false); reader.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } try { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir)); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term("junk", "bad")); Assert.IsTrue(segTermDocs.Next() == false); reader.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
private int[] starts; // 1st docno for each segment #endregion Fields #region Constructors /// <summary>Construct reading the named set of readers. </summary> internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory, bool readOnly) : base(directory, sis, closeDirectory, readOnly) { // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i)); } catch (System.IO.IOException e) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } throw e; } } Initialize(readers); }
public override void SetUp() { base.SetUp(); DocHelper.SetupDoc(testDoc); SegmentInfo info = DocHelper.WriteDoc(dir, testDoc); reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); }
public SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.freqStream.Clone(); this.deletedDocs = parent.deletedDocs; this.skipInterval = parent.tis.GetSkipInterval(); }
protected internal AllTermDocs(SegmentReader parent) { lock (parent) { this.deletedDocs = parent.deletedDocs; } this.maxDoc = parent.MaxDoc(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); DocHelper.SetupDoc(TestDoc); SegmentCommitInfo info = DocHelper.WriteDoc(Random(), Dir, TestDoc); Reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.READ); }
public override void SetUp() { base.SetUp(); DocHelper.SetupDoc(doc1); SegmentInfo info1 = DocHelper.WriteDoc(merge1Dir, doc1); DocHelper.SetupDoc(doc2); SegmentInfo info2 = DocHelper.WriteDoc(merge2Dir, doc2); reader1 = SegmentReader.Get(true, info1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); reader2 = SegmentReader.Get(true, info2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); }
public override void SetUp() { base.SetUp(); DocHelper.SetupDoc(doc1); SegmentInfo info1 = DocHelper.WriteDoc(merge1Dir, doc1); DocHelper.SetupDoc(doc2); SegmentInfo info2 = DocHelper.WriteDoc(merge2Dir, doc2); reader1 = SegmentReader.Get(info1); reader2 = SegmentReader.Get(info2); }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().GetSkipInterval(); this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels(); }
/*protected internal*/ public SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().SkipInterval; this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels; }
public override void SetUp() { base.SetUp(); MergedDir = NewDirectory(); Merge1Dir = NewDirectory(); Merge2Dir = NewDirectory(); DocHelper.SetupDoc(Doc1); SegmentCommitInfo info1 = DocHelper.WriteDoc(Random(), Merge1Dir, Doc1); DocHelper.SetupDoc(Doc2); SegmentCommitInfo info2 = DocHelper.WriteDoc(Random(), Merge2Dir, Doc2); Reader1 = new SegmentReader(info1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); Reader2 = new SegmentReader(info2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); }
protected virtual void SetUp() { try { DocHelper.SetupDoc(testDoc); DocHelper.WriteDoc(dir, testDoc); reader = new SegmentReader(new SegmentInfo("test", 1, dir)); } catch (System.IO.IOException e) { } }
public virtual void TestAddDocument() { Document testDoc = new Document(); DocHelper.SetupDoc(testDoc); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.AddDocument(testDoc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); Assert.IsTrue(reader != null); Document doc = reader.Document(0); Assert.IsTrue(doc != null); //System.out.println("Document: " + doc); IndexableField[] fields = doc.GetFields("textField2"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_2_TEXT)); Assert.IsTrue(fields[0].FieldType().StoreTermVectors); fields = doc.GetFields("textField1"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_1_TEXT)); Assert.IsFalse(fields[0].FieldType().StoreTermVectors); fields = doc.GetFields("keyField"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.KEYWORD_TEXT)); fields = doc.GetFields(DocHelper.NO_NORMS_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.NO_NORMS_TEXT)); fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_3_TEXT)); // test that the norms are not present in the segment if // omitNorms is true foreach (FieldInfo fi in reader.FieldInfos) { if (fi.Indexed) { Assert.IsTrue(fi.OmitsNorms() == (reader.GetNormValues(fi.Name) == null)); } } reader.Dispose(); }
internal static void Merge(System.String seg1, System.String seg2, System.String merged) { Directory directory = FSDirectory.GetDirectory("test", false); SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory)); SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory)); SegmentMerger merger = new SegmentMerger(directory, merged, false); merger.Add(r1); merger.Add(r2); merger.Merge(); merger.CloseReaders(); directory.Close(); }
// This is needed if for the test to pass and mimic what happens wiht JUnit // For some reason, JUnit is creating a new member variable for each sub-test // but NUnit is not -- who is wrong/right, I don't know. private void SetUpInternal() // {{Aroush-1.9}} See note above { //The variables for the new merged segment mergedDir = new RAMDirectory(); mergedSegment = "test"; //First segment to be merged merge1Dir = new RAMDirectory(); doc1 = new Lucene.Net.Documents.Document(); //merge1Segment = "test-1"; reader1 = null; //Second Segment to be merged merge2Dir = new RAMDirectory(); doc2 = new Lucene.Net.Documents.Document(); //merge2Segment = "test-2"; reader2 = null; }
protected virtual void SetUp() { DocHelper.SetupDoc(doc1); DocHelper.WriteDoc(merge1Dir, merge1Segment, doc1); DocHelper.SetupDoc(doc2); DocHelper.WriteDoc(merge2Dir, merge2Segment, doc2); try { reader1 = new SegmentReader(new SegmentInfo(merge1Segment, 1, merge1Dir)); reader2 = new SegmentReader(new SegmentInfo(merge2Segment, 1, merge2Dir)); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); } }
public virtual void TestTermDocs(int indexDivisor) { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random())); Assert.IsTrue(reader != null); Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor); TermsEnum terms = reader.Fields.Terms(DocHelper.TEXT_FIELD_2_KEY).Iterator(null); terms.SeekCeil(new BytesRef("field")); DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS); if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int docId = termDocs.DocID(); Assert.IsTrue(docId == 0); int freq = termDocs.Freq(); Assert.IsTrue(freq == 3); } reader.Dispose(); }
protected virtual void SetUp() { DocHelper.SetupDoc(doc1); DocHelper.SetupDoc(doc2); DocHelper.WriteDoc(dir, "seg-1", doc1); DocHelper.WriteDoc(dir, "seg-2", doc2); try { sis.Write(dir); reader1 = new SegmentReader(new SegmentInfo("seg-1", 1, dir)); reader2 = new SegmentReader(new SegmentInfo("seg-2", 1, dir)); readers[0] = reader1; readers[1] = reader2; } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); } }
public virtual void TestBadSeek(int indexDivisor) { { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random())); Assert.IsTrue(reader != null); DocsEnum termDocs = TestUtil.Docs(Random(), reader, "textField2", new BytesRef("bad"), reader.LiveDocs, null, 0); Assert.IsNull(termDocs); reader.Dispose(); } { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random())); Assert.IsTrue(reader != null); DocsEnum termDocs = TestUtil.Docs(Random(), reader, "junk", new BytesRef("bad"), reader.LiveDocs, null, 0); Assert.IsNull(termDocs); reader.Dispose(); } }
public virtual void TestTermDocs() { try { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir)); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field")); if (segTermDocs.Next() == true) { int docId = segTermDocs.Doc(); Assert.IsTrue(docId == 0); int freq = segTermDocs.Freq(); Assert.IsTrue(freq == 3); } reader.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
public virtual void TestAddDocument() { Analyzer analyzer = new WhitespaceAnalyzer(); Similarity similarity = Similarity.GetDefault(); DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50); Assert.IsTrue(writer != null); try { writer.AddDocument("test", testDoc); //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir)); Assert.IsTrue(reader != null); Document doc = reader.Document(0); Assert.IsTrue(doc != null); //System.out.println("Document: " + doc); Field[] fields = doc.GetFields("textField2"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_2_TEXT)); Assert.IsTrue(fields[0].IsTermVectorStored() == true); fields = doc.GetFields("textField1"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_1_TEXT)); Assert.IsTrue(fields[0].IsTermVectorStored() == false); fields = doc.GetFields("keyField"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.KEYWORD_TEXT)); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
/// <summary> /// Used by near real-time search </summary> internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes) { // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; IList <SegmentReader> readers = new List <SegmentReader>(); Directory dir = writer.Directory; SegmentInfos segmentInfos = (SegmentInfos)infos.Clone(); int infosUpto = 0; bool success = false; try { for (int i = 0; i < numSegments; i++) { // NOTE: important that we use infos not // segmentInfos here, so that we are passing the // actual instance of SegmentInfoPerCommit in // IndexWriter's segmentInfos: SegmentCommitInfo info = infos.Info(i); Debug.Assert(info.Info.Dir == dir); ReadersAndUpdates rld = writer.readerPool.Get(info, true); try { SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ); if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments) { // Steal the ref: readers.Add(reader); infosUpto++; } else { reader.DecRef(); segmentInfos.Remove(infosUpto); } } finally { writer.readerPool.Release(rld); } } writer.IncRefDeleter(segmentInfos); StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes); success = true; return(result); } finally { if (!success) { foreach (SegmentReader r in readers) { try { r.DecRef(); } #pragma warning disable 168 catch (Exception th) #pragma warning restore 168 { // ignore any exception that is thrown here to not mask any original // exception. } } } } }
// Writes field updates (new _X_N updates files) to the directory public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { lock (this) { //Debug.Assert(Thread.holdsLock(Writer)); //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); Debug.Assert(dvUpdates.Any()); // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.reader == null ? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE) : this.reader; try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes != null) { foreach (KeyValuePair <string, string> e in fi.Attributes) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.numericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.binaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.fType); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; string segmentSuffix = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX)); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat; DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.DisposeWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (isMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { DocValuesFieldUpdates updates; if (!mergingDVUpdates.TryGetValue(e.Key, out updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { DocValuesFieldUpdates updates; if (!mergingDVUpdates.TryGetValue(e.Key, out updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.SetGenUpdatesFiles(newGenUpdatesFiles); // wrote new files, should checkpoint() writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (reader != null) { SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount); bool reopened = false; try { reader.DecRef(); reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } }
internal MySegmentTermDocs(SegmentReader p) : base(p) { }
internal SegmentTermPositions(SegmentReader p) : base(p) { this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time }
public override void Run() { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread"); } while (Environment.TickCount < stopTimeMS) { try { IndexSearcher s = outerInstance.CurrentSearcher; try { // Verify 1) IW is correctly setting // diagnostics, and 2) segment warming for // merged segments is actually happening: foreach (AtomicReaderContext sub in s.IndexReader.Leaves) { SegmentReader segReader = (SegmentReader)sub.Reader; IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics; assertNotNull(diagnostics); string source; diagnostics.TryGetValue("source", out source); assertNotNull(source); if (source.Equals("merge", StringComparison.Ordinal)) { assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo, !outerInstance.assertMergedSegmentsWarmed || outerInstance.warmed.ContainsKey(segReader.core)); } } if (s.IndexReader.NumDocs > 0) { outerInstance.SmokeTestSearcher(s); Fields fields = MultiFields.GetFields(s.IndexReader); if (fields == null) { continue; } Terms terms = fields.GetTerms("body"); if (terms == null) { continue; } TermsEnum termsEnum = terms.GetIterator(null); int seenTermCount = 0; int shift; int trigger; if (totTermCount.Get() < 30) { shift = 0; trigger = 1; } else { trigger = totTermCount.Get() / 30; shift = Random().Next(trigger); } while (Environment.TickCount < stopTimeMS) { BytesRef term = termsEnum.Next(); if (term == null) { totTermCount.Set(seenTermCount); break; } seenTermCount++; // search 30 terms if ((seenTermCount + shift) % trigger == 0) { //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString()); //} totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", term)))); } } //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + ": search done"); //} } } finally { outerInstance.ReleaseSearcher(s); } } catch (Exception t) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); outerInstance.failed.Set(true); Console.WriteLine(t.ToString()); throw new Exception(t.ToString(), t); } } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(() => (StoredFieldsReader)fieldsReaderOrig.Clone()); termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(() => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone()); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(fields != null); } // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(normsProducer != null); } } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
private void AssertDocDeleted(SegmentReader reader, SegmentReader reader2, int doc) { Assert.AreEqual(reader.IsDeleted(doc), reader2.IsDeleted(doc)); }
internal MySegmentTermDocs(SegmentReader p, IState state) : base(p, state) { }
private void InitBlock(SegmentReader enclosingInstance) { this.enclosingInstance = enclosingInstance; }
/// <summary>Merges all segments together into a single segment, optimizing an index /// for search. /// </summary> public virtual void Optimize() { lock (this) { FlushRamSegments(); while (segmentInfos.Count > 1 || (segmentInfos.Count == 1 && (SegmentReader.HasDeletions(segmentInfos.Info(0)) || segmentInfos.Info(0).dir != directory || (useCompoundFile && (!SegmentReader.UsesCompoundFile(segmentInfos.Info(0)) || SegmentReader.HasSeparateNorms(segmentInfos.Info(0))))))) { int minSegment = segmentInfos.Count - mergeFactor; MergeSegments(minSegment < 0?0:minSegment); } segmentInfos.Optimize(directory); } }
// Delete by Term private long ApplyTermDeletes(IEnumerable <Term> termsIter, ReadersAndUpdates rld, SegmentReader reader) { lock (this) { long delCount = 0; Fields fields = reader.Fields; if (fields == null) { // this reader has no postings return(0); } TermsEnum termsEnum = null; string currentField = null; DocsEnum docs = null; Debug.Assert(CheckDeleteTerm(null)); bool any = false; //System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader); foreach (Term term in termsIter) { // Since we visit terms sorted, we gain performance // by re-using the same TermsEnum and seeking only // forwards if (!string.Equals(term.Field, currentField, StringComparison.Ordinal)) { Debug.Assert(currentField == null || currentField.CompareToOrdinal(term.Field) < 0); currentField = term.Field; Terms terms = fields.GetTerms(currentField); if (terms != null) { termsEnum = terms.GetIterator(termsEnum); } else { termsEnum = null; } } if (termsEnum == null) { continue; } Debug.Assert(CheckDeleteTerm(term)); // System.out.println(" term=" + term); if (termsEnum.SeekExact(term.Bytes)) { // we don't need term frequencies for this DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE); //System.out.println("BDS: got docsEnum=" + docsEnum); if (docsEnum != null) { while (true) { int docID = docsEnum.NextDoc(); //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (!any) { rld.InitWritableLiveDocs(); any = true; } // NOTE: there is no limit check on the docID // when deleting by Term (unlike by Query) // because on flush we apply all Term deletes to // each segment. So all Term deleting here is // against prior segments: if (rld.Delete(docID)) { delCount++; } } } } } return(delCount); } }
public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number) { InitBlock(enclosingInstance); this.in_Renamed = in_Renamed; this.number = number; }
private void AssertDelDocsRefCountEquals(int refCount, SegmentReader reader) { Assert.AreEqual(refCount, reader.deletedDocsRef_ForNUnit.RefCount()); }
public virtual void TestFieldNumberGaps() { int numIters = AtLeast(13); for (int i = 0; i < numIters; i++) { Directory dir = NewDirectory(); { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d1 first field", Field.Store.YES)); d.Add(new TextField("f2", "d1 second field", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d2 first field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d3 first field", Field.Store.YES)); d.Add(new TextField("f2", "d3 second field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(2)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f1", fis3.FieldInfo(0).Name); Assert.AreEqual("f2", fis3.FieldInfo(1).Name); Assert.AreEqual("f3", fis3.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); writer.DeleteDocuments(new Term("f1", "d1")); // nuke the first segment entirely so that the segment with gaps is // loaded first! writer.ForceMergeDeletes(); writer.Dispose(); } IndexWriter writer_ = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()).SetInfoStream(new FailOnNonBulkMergesInfoStream())); writer_.ForceMerge(1); writer_.Dispose(); SegmentInfos sis_ = new SegmentInfos(); sis_.Read(dir); Assert.AreEqual(1, sis_.Size()); FieldInfos fis1_ = SegmentReader.ReadFieldInfos(sis_.Info(0)); Assert.AreEqual("f1", fis1_.FieldInfo(0).Name); Assert.AreEqual("f2", fis1_.FieldInfo(1).Name); Assert.AreEqual("f3", fis1_.FieldInfo(2).Name); dir.Dispose(); } }
public override void SetUp() { base.SetUp(); /* * for (int i = 0; i < testFields.Length; i++) { * fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); * } */ Array.Sort(TestTerms); int tokenUpto = 0; for (int i = 0; i < TestTerms.Length; i++) { Positions[i] = new int[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order Positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10); TestToken token = Tokens[tokenUpto++] = new TestToken(this); token.Text = TestTerms[i]; token.Pos = Positions[i][j]; token.StartOffset = j * 10; token.EndOffset = j * 10 + TestTerms[i].Length; } } Array.Sort(Tokens); Dir = NewDirectory(); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false)); Document doc = new Document(); for (int i = 0; i < TestFields.Length; i++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); if (TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; } else if (TestFieldsStorePos[i] && !TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; } else if (!TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorOffsets = true; } else { customType.StoreTermVectors = true; } doc.Add(new Field(TestFields[i], "", customType)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) { writer.AddDocument(doc); } writer.Commit(); Seg = writer.NewestSegment(); writer.Dispose(); FieldInfos = SegmentReader.ReadFieldInfos(Seg); }
// DocValues updates private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate { lock (this) { Fields fields = reader.Fields; if (fields == null) { // this reader has no postings return; } // TODO: we can process the updates per DV field, from last to first so that // if multiple terms affect same document for the same field, we add an update // only once (that of the last term). To do that, we can keep a bitset which // marks which documents have already been updated. So e.g. if term T1 // updates doc 7, and then we process term T2 and it updates doc 7 as well, // we don't apply the update since we know T1 came last and therefore wins // the update. // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so // that these documents aren't even returned. string currentField = null; TermsEnum termsEnum = null; DocsEnum docs = null; //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader); foreach (DocValuesUpdate update in updates) { Term term = update.term; int limit = update.docIDUpto; // TODO: we traverse the terms in update order (not term order) so that we // apply the updates in the correct order, i.e. if two terms udpate the // same document, the last one that came in wins, irrespective of the // terms lexical order. // we can apply the updates in terms order if we keep an updatesGen (and // increment it with every update) and attach it to each NumericUpdate. Note // that we cannot rely only on docIDUpto because an app may send two updates // which will get same docIDUpto, yet will still need to respect the order // those updates arrived. if (!string.Equals(term.Field, currentField, StringComparison.Ordinal)) { // if we change the code to process updates in terms order, enable this assert // assert currentField == null || currentField.CompareToOrdinal(term.Field) < 0; currentField = term.Field; Terms terms = fields.GetTerms(currentField); if (terms != null) { termsEnum = terms.GetIterator(termsEnum); } else { termsEnum = null; continue; // no terms in that field } } if (termsEnum == null) { continue; } // System.out.println(" term=" + term); if (termsEnum.SeekExact(term.Bytes)) { // we don't need term frequencies for this DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE); //System.out.println("BDS: got docsEnum=" + docsEnum); DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.field, update.type); if (dvUpdates == null) { dvUpdates = dvUpdatesContainer.NewUpdates(update.field, update.type, reader.MaxDoc); } int doc; while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID); if (doc >= limit) { break; // no more docs that can be updated for this term } dvUpdates.Add(doc, update.value); } } } } }
public virtual void TestSegmentReaderDelDocsReferenceCounting() { Directory dir1 = new MockRAMDirectory(); TestIndexReaderReopen.CreateIndex(dir1, false); IndexReader origReader = IndexReader.Open(dir1, false); SegmentReader origSegmentReader = SegmentReader.GetOnlySegmentReader(origReader); // deletedDocsRef should be null because nothing has updated yet Assert.IsNull(origSegmentReader.deletedDocsRef_ForNUnit); // we deleted a document, so there is now a deletedDocs bitvector and a // reference to it origReader.DeleteDocument(1); AssertDelDocsRefCountEquals(1, origSegmentReader); // the cloned segmentreader should have 2 references, 1 to itself, and 1 to // the original segmentreader IndexReader clonedReader = (IndexReader)origReader.Clone(); SegmentReader clonedSegmentReader = SegmentReader.GetOnlySegmentReader(clonedReader); AssertDelDocsRefCountEquals(2, origSegmentReader); // deleting a document creates a new deletedDocs bitvector, the refs goes to // 1 clonedReader.DeleteDocument(2); AssertDelDocsRefCountEquals(1, origSegmentReader); AssertDelDocsRefCountEquals(1, clonedSegmentReader); // make sure the deletedocs objects are different (copy // on write) Assert.IsTrue(origSegmentReader.deletedDocs_ForNUnit != clonedSegmentReader.deletedDocs_ForNUnit); AssertDocDeleted(origSegmentReader, clonedSegmentReader, 1); Assert.IsTrue(!origSegmentReader.IsDeleted(2)); // doc 2 should not be deleted // in original segmentreader Assert.IsTrue(clonedSegmentReader.IsDeleted(2)); // doc 2 should be deleted in // cloned segmentreader // deleting a doc from the original segmentreader should throw an exception Assert.Throws <LockObtainFailedException>(() => origReader.DeleteDocument(4), "expected exception"); origReader.Close(); // try closing the original segment reader to see if it affects the // clonedSegmentReader clonedReader.DeleteDocument(3); clonedReader.Flush(); AssertDelDocsRefCountEquals(1, clonedSegmentReader); // test a reopened reader IndexReader reopenedReader = clonedReader.Reopen(); IndexReader cloneReader2 = (IndexReader)reopenedReader.Clone(); SegmentReader cloneSegmentReader2 = SegmentReader.GetOnlySegmentReader(cloneReader2); AssertDelDocsRefCountEquals(2, cloneSegmentReader2); clonedReader.Close(); reopenedReader.Close(); cloneReader2.Close(); dir1.Close(); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = CfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { CfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos_Renamed; this.TermsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat(); SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields Fields = format.FieldsProducer(segmentReadState); Debug.Assert(Fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms()) { NormsProducer = codec.NormsFormat().NormsProducer(segmentReadState); Debug.Assert(NormsProducer != null); } else { NormsProducer = null; } StoredFieldsFormat sff = si.Info.Codec.StoredFieldsFormat(); try { FieldsReaderOrig = sff.FieldsReader(cfsDir, si.Info, fieldInfos, context); } catch (System.AccessViolationException ave) { } //FieldsReaderOrig = si.Info.Codec.StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors()) // open term vector files only as needed { TermVectorsReaderOrig = si.Info.Codec.TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { TermVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader, IState state) { var status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(state); TermPositions termPositions = reader.TermPositions(state); // Used only to count up # deleted docs for this term var myTermDocs = new MySegmentTermDocs(reader, state); int maxDoc = reader.MaxDoc; while (termEnum.Next(state)) { status.termCount++; Term term = termEnum.Term; int docFreq = termEnum.DocFreq(); termPositions.Seek(term, state); int lastDoc = -1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next(state)) { freq0++; int doc = termPositions.Doc; int freq = termPositions.Freq; if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(state); if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions) { myTermDocs.Seek(term, state); while (myTermDocs.Next(state)) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
public virtual void TestMerge() { Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(new List <AtomicReader> { Reader1, Reader2 }, si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random), true); MergeState mergeState = merger.Merge(); int docsMerged = mergeState.SegmentInfo.DocCount; Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count); DocsEnum termDocs = TestUtil.Docs(Random, mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int tvCount = 0; foreach (FieldInfo fieldInfo in mergedReader.FieldInfos) { if (fieldInfo.HasVectors) { tvCount++; } } //System.out.println("stored size: " + stored.Size()); Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector"); Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY); Assert.IsNotNull(vector); Assert.AreEqual(3, vector.Count); TermsEnum termsEnum = vector.GetIterator(null); int i = 0; while (termsEnum.Next() != null) { string term = termsEnum.Term.Utf8ToString(); int freq = (int)termsEnum.TotalTermFreq; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term, StringComparison.Ordinal) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); i++; } TestSegmentReader.CheckNorms(mergedReader); mergedReader.Dispose(); }
/// <summary>Returns a <see cref="Status" /> instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(List <string> onlySegments, IState state) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir, state); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; var segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName, state); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(state); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.UserData; System.String userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } foreach (string s in onlySegments) { if (infoStream != null) { infoStream.Write(" " + s); } } result.segmentsChecked.AddRange(onlySegments); Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } var segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile(state)); segInfoStat.compound = info.GetUseCompoundFile(state); Msg(" hasProx=" + info.HasProx); segInfoStat.hasProx = info.HasProx; Msg(" numFiles=" + info.Files(state).Count); segInfoStat.numFiles = info.Files(state).Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes(state) / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes(state) / (1024.0 * 1024.0); IDictionary <string, string> diagnostics = info.Diagnostics; segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.DocStoreOffset; if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.DocStoreSegment); segInfoStat.docStoreSegment = info.DocStoreSegment; Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile); segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile; } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, state); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.deletedDocs.Count() != info.GetDelCount(state)) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount(state) + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc) { throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount(state)) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount(state) + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount(state) != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount(state) + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc != info.docCount) { throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader, state); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader, state); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf, state); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf, state); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); const string comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add((SegmentInfo)info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
public virtual void TestSameFieldNumbersAcrossSegments() { for (int i = 0; i < 2; i++) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.Add(new StringField("f1", "first field", Field.Store.YES)); d1.Add(new StringField("f2", "second field", Field.Store.YES)); writer.AddDocument(d1); if (i == 1) { writer.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); } else { writer.Commit(); } Document d2 = new Document(); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; d2.Add(new TextField("f2", "second field", Field.Store.NO)); d2.Add(new Field("f1", "first field", customType2)); d2.Add(new TextField("f3", "third field", Field.Store.NO)); d2.Add(new TextField("f4", "fourth field", Field.Store.NO)); writer.AddDocument(d2); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.AreEqual("f2", fis2.FieldInfo(1).Name); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f4", fis2.FieldInfo(3).Name); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.ForceMerge(1); writer.Dispose(); sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(0)); Assert.AreEqual("f1", fis3.FieldInfo(0).Name); Assert.AreEqual("f2", fis3.FieldInfo(1).Name); Assert.AreEqual("f3", fis3.FieldInfo(2).Name); Assert.AreEqual("f4", fis3.FieldInfo(3).Name); dir.Dispose(); } }
public virtual void TestPositions() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // f1,f2,f3: docs only FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_ONLY; Field f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); Field f2 = NewField("f2", "this field has docs only", ft); d.Add(f2); Field f3 = NewField("f3", "this field has docs only", ft); d.Add(f3); FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED); ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS; // f4,f5,f6 docs and freqs Field f4 = NewField("f4", "this field has docs and freqs", ft2); d.Add(f4); Field f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); Field f6 = NewField("f6", "this field has docs and freqs", ft2); d.Add(f6); FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED); ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; // f7,f8,f9 docs/freqs/positions Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3); d.Add(f7); Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3); d.Add(f8); Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8, // and docs/freqs/positions for f3, f6, f9 d = new Document(); // f1,f4,f7: docs only f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); f4 = NewField("f4", "this field has docs only", ft); d.Add(f4); f7 = NewField("f7", "this field has docs only", ft); d.Add(f7); // f2, f5, f8: docs and freqs f2 = NewField("f2", "this field has docs and freqs", ft2); d.Add(f2); f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); f8 = NewField("f8", "this field has docs and freqs", ft2); d.Add(f8); // f3, f6, f9: docs and freqs and positions f3 = NewField("f3", "this field has docs and freqs and positions", ft3); d.Add(f3); f6 = NewField("f6", "this field has docs and freqs and positions", ft3); d.Add(f6); f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; // docs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions); // docs + docs/freqs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions); // docs + docs/freqs/pos = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions); // docs/freqs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions); // docs/freqs + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions); // docs/freqs + docs/freqs/pos = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions); // docs/freqs/pos + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions); // docs/freqs/pos + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions); // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions); reader.Dispose(); ram.Dispose(); }
// Delete by query private static long ApplyQueryDeletes(IEnumerable <QueryAndLimit> queriesIter, ReadersAndUpdates rld, SegmentReader reader) { long delCount = 0; AtomicReaderContext readerContext = reader.AtomicContext; bool any = false; foreach (QueryAndLimit ent in queriesIter) { Query query = ent.Query; int limit = ent.Limit; DocIdSet docs = (new QueryWrapperFilter(query)).GetDocIdSet(readerContext, reader.LiveDocs); if (docs != null) { DocIdSetIterator it = docs.GetIterator(); if (it != null) { while (true) { int doc = it.NextDoc(); if (doc >= limit) { break; } if (!any) { rld.InitWritableLiveDocs(); any = true; } if (rld.Delete(doc)) { delCount++; } } } } } return(delCount); }
/// <summary> /// Create new <see cref="SegmentReader"/> sharing core from a previous /// <see cref="SegmentReader"/> and loading new live docs from a new /// deletes file. Used by <see cref="DirectoryReader.OpenIfChanged(DirectoryReader)"/>. /// </summary> internal SegmentReader(SegmentCommitInfo si, SegmentReader sr) : this(si, sr, si.Info.Codec.LiveDocsFormat.ReadLiveDocs(si.Info.Dir, si, IOContext.READ_ONCE), si.Info.DocCount - si.DelCount) { }
/// <summary> Test term vectors for a segment.</summary> private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { var status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.Write(" test: term vectors........"); } for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; ITermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.Length; } } } Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
/// <summary> /// this constructor is only used for <seealso cref="#doOpenIfChanged(SegmentInfos)"/> </summary> private static DirectoryReader Open(Directory directory, SegmentInfos infos, IList <AtomicReader> oldReaders, int termInfosIndexDivisor) { // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name IDictionary <string, int?> segmentReaders = new Dictionary <string, int?>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0, c = oldReaders.Count; i < c; i++) { SegmentReader sr = (SegmentReader)oldReaders[i]; segmentReaders[sr.SegmentName] = Convert.ToInt32(i); } } SegmentReader[] newReaders = new SegmentReader[infos.Count]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers bool[] readerShared = new bool[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { // find SegmentReader for this segment int?oldReaderIndex; segmentReaders.TryGetValue(infos.Info(i).Info.Name, out oldReaderIndex); if (oldReaderIndex == null) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = (SegmentReader)oldReaders[(int)oldReaderIndex]; } bool success = false; Exception prior = null; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).Info.UseCompoundFile != newReaders[i].SegmentInfo.Info.UseCompoundFile) { // this is a new reader; in case we hit an exception we can close it safely newReader = new SegmentReader(infos.Info(i), termInfosIndexDivisor, IOContext.READ); readerShared[i] = false; newReaders[i] = newReader; } else { if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen&& newReaders[i].SegmentInfo.FieldInfosGen == infos.Info(i).FieldInfosGen) { // No change; this reader will be shared between // the old and the new one, so we must incRef // it: readerShared[i] = true; newReaders[i].IncRef(); } else { // there are changes to the reader, either liveDocs or DV updates readerShared[i] = false; // Steal the ref returned by SegmentReader ctor: Debug.Assert(infos.Info(i).Info.Dir == newReaders[i].SegmentInfo.Info.Dir); Debug.Assert(infos.Info(i).HasDeletions || infos.Info(i).HasFieldUpdates); if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen) { // only DV updates newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i], newReaders[i].LiveDocs, newReaders[i].NumDocs); } else { // both DV and liveDocs have changed newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i]); } } } success = true; } catch (Exception ex) { prior = ex; } finally { if (!success) { for (i++; i < infos.Count; i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Dispose(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (Exception t) { if (prior == null) { prior = t; } } } } } // throw the first exception IOUtils.ReThrow(prior); } } return(new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false)); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions()) { docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Close(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long)docCount) * 8 != fdxFileLength) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { docCount += ((IndexReader)iter.Current).NumDocs(); } } return(docCount); }
/// <summary> Test field norms.</summary> private Status.FieldNormStatus TestFieldNorms(IEnumerable <string> fieldNames, SegmentReader reader, IState state) { var status = new Status.FieldNormStatus(); try { // Test Field Norms if (infoStream != null) { infoStream.Write(" test: field norms........."); } var b = new byte[reader.MaxDoc]; foreach (string fieldName in fieldNames) { if (reader.HasNorms(fieldName, state)) { reader.Norms(fieldName, b, 0, state); ++status.totFields; } } Msg("OK [" + status.totFields + " fields]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
public virtual void TestMerge() { //System.out.println("----------------TestMerge------------------"); SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment, false); merger.Add(reader1); merger.Add(reader2); try { int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - 2); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - 2); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.ICollection stored = mergedReader.GetIndexedFieldNames(true); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 2); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != - 1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } //System.out.println("---------------------end TestMerge-------------------"); }
public virtual void TestPayloadFieldBit() { Directory ram = NewDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field won't have any payloads d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // only add payload data for field f2 #pragma warning disable 612, 618 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); #pragma warning restore 612, 618 writer.AddDocument(d); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads, "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").HasPayloads, "Payload field bit should not be set."); reader.Dispose(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field analyzer = new PayloadAnalyzer(); // Clear payload state for each field writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE)); d = new Document(); d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // add payload data for field f2 and f3 #pragma warning disable 612, 618 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3); #pragma warning restore 612, 618 writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads, "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").HasPayloads, "Payload field bit should be set."); reader.Dispose(); ram.Dispose(); }