public virtual void Test() { //Positive test of FieldInfos Assert.IsTrue(testDoc != null); FieldInfos fieldInfos = new FieldInfos(); fieldInfos.Add(testDoc); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); System.String name = "testFile"; IndexOutput output = dir.CreateOutput(name); Assert.IsTrue(output != null); //Use a RAMOutputStream try { fieldInfos.Write(output); output.Close(); Assert.IsTrue(output.Length() > 0); FieldInfos readIn = new FieldInfos(dir, name); Assert.IsTrue(fieldInfos.Size() == readIn.Size()); FieldInfo info = readIn.FieldInfo("textField1"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField2"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == true); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField3"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); info = readIn.FieldInfo("omitNorms"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); dir.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
public /*internal*/ Document Doc(int n, FieldSelector fieldSelector) { SeekIndex(n); long position = indexStream.ReadLong(); fieldsStream.Seek(position); var doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); byte bits = fieldsStream.ReadByte(); System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; System.Diagnostics.Debug.Assert( (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)), "compressed fields are only allowed in indexes of version <= 2.9"); bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.Equals(FieldSelectorResult.LOAD)) { AddField(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) { AddField(doc, fi, binary, compressed, tokenize); break; //Get out of this loop } else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) { AddFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.SIZE)) { SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed)); } else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) { AddFieldSize(doc, fi, binary, compressed); break; } else { SkipField(binary, compressed); } } return(doc); }
private void AssertReadOnly(FieldInfos readOnly, FieldInfos modifiable) { Assert.AreEqual(modifiable.Count, readOnly.Count); // assert we can iterate foreach (FieldInfo fi in readOnly) { Assert.AreEqual(fi.Name, modifiable.FieldInfo(fi.Number).Name); } }
public System.Collections.ArrayList CreateCompoundFile(System.String fileName) { CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName); System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(IndexFileNames.COMPOUND_EXTENSIONS.Length + 1)); // Basic files for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++) { files.Add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); } // Fieldable norm files for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { files.Add(segment + "." + IndexFileNames.NORMS_EXTENSION); break; } } // Vector files if (fieldInfos.HasVectors()) { for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++) { files.Add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]); } } // Now merge all added files System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { cfsWriter.AddFile((System.String)it.Current); } // Perform the merge cfsWriter.Close(); return(files); }
public virtual void TestMixedRAM() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2))); Document d = new Document(); // this field will have Tf Field f1 = NewField("f1", "this field has term freqs", normalType); d.Add(f1); // this field will NOT have Tf Field f2 = NewField("f2", "this field has NO Tf in all docs", omitType); d.Add(f2); for (int i = 0; i < 5; i++) { writer.AddDocument(d); } for (int i = 0; i < 20; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f1").IndexOptions, "OmitTermFreqAndPositions field bit should not be set."); Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions, "OmitTermFreqAndPositions field bit should be set."); reader.Dispose(); ram.Dispose(); }
public override NumericDocValues GetNormValues(string field) { EnsureOpen(); FieldInfo fi = FieldInfos.FieldInfo(field); if (fi == null || !fi.HasNorms) { // Field does not exist or does not index norms return null; } return core.GetNormValues(fi); }
public virtual void TestLUCENE_1590() { Document doc = new Document(); // f1 has no norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; FieldType customType2 = new FieldType(); customType2.IsStored = true; doc.Add(NewField("f1", "v1", customType)); doc.Add(NewField("f1", "v2", customType2)); // f2 has no TF FieldType customType3 = new FieldType(TextField.TYPE_NOT_STORED); customType3.IndexOptions = IndexOptions.DOCS_ONLY; Field f = NewField("f2", "v1", customType3); doc.Add(f); doc.Add(NewField("f2", "v2", customType2)); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.AddDocument(doc); writer.ForceMerge(1); // be sure to have a single segment writer.Dispose(); TestUtil.CheckIndex(Dir); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(Dir)); FieldInfos fi = reader.FieldInfos; // f1 Assert.IsFalse(fi.FieldInfo("f1").HasNorms, "f1 should have no norms"); Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f1").IndexOptions, "omitTermFreqAndPositions field bit should not be set for f1"); // f2 Assert.IsTrue(fi.FieldInfo("f2").HasNorms, "f2 should have norms"); Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions, "omitTermFreqAndPositions field bit should be set for f2"); reader.Dispose(); }
public virtual void Test() { //Positive test of FieldInfos Assert.IsTrue(testDoc != null); FieldInfos fieldInfos = new FieldInfos(); fieldInfos.Add(testDoc); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == 7); //this is 7 b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); System.String name = "testFile"; OutputStream output = dir.CreateFile(name); Assert.IsTrue(output != null); //Use a RAMOutputStream try { fieldInfos.Write(output); output.Close(); Assert.IsTrue(output.Length() > 0); FieldInfos readIn = new FieldInfos(dir, name); Assert.IsTrue(fieldInfos.Size() == readIn.Size()); FieldInfo info = readIn.FieldInfo("textField1"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector == false); info = readIn.FieldInfo("textField2"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector == true); dir.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
// returns the FieldInfo that corresponds to the given field and type, or // null if the field does not exist, or not indexed as the requested // DovDocValuesType. private FieldInfo GetDVField(string field, DocValuesType type) { FieldInfo fi = FieldInfos_Renamed.FieldInfo(field); if (fi == null) { // Field does not exist return(null); } if (fi.DocValuesType == null) { // Field was not indexed with doc values return(null); } if (fi.DocValuesType != type) { // Field DocValues are different than requested type return(null); } return(fi); }
public virtual void TestPayloadFieldBit() { Directory ram = NewDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field won't have any payloads d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // only add payload data for field f2 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); writer.AddDocument(d); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should not be set."); reader.Dispose(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field analyzer = new PayloadAnalyzer(); // Clear payload state for each field writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE)); d = new Document(); d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // add payload data for field f2 and f3 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should be set."); reader.Dispose(); ram.Dispose(); }
public override SortedSetDocValues GetSortedSetDocValues(string field) { EnsureOpen(); OrdinalMap map = null; UninterruptableMonitor.Enter(cachedOrdMaps); try { if (!cachedOrdMaps.TryGetValue(field, out map)) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.GetSortedSetValues(@in, field); if (dv is MultiSortedSetDocValues docValues) { map = docValues.Mapping; if (map.owner == CoreCacheKey) { cachedOrdMaps[field] = map; } } return(dv); } } finally { UninterruptableMonitor.Exit(cachedOrdMaps); } // cached ordinal map if (FieldInfos.FieldInfo(field).DocValuesType != DocValuesType.SORTED_SET) { return(null); } if (Debugging.AssertsEnabled) { Debugging.Assert(map != null); } int size = @in.Leaves.Count; var values = new SortedSetDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = @in.Leaves[i]; SortedSetDocValues v = context.AtomicReader.GetSortedSetDocValues(field) ?? DocValues.EMPTY_SORTED_SET; values[i] = v; starts[i] = context.DocBase; } starts[size] = MaxDoc; return(new MultiSortedSetDocValues(values, starts, map)); }
public virtual void TestPayloadFieldBit() { rnd = NewRandom(); Directory ram = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); Document d = new Document(); // this field won't have any payloads d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // only add payload data for field f2 analyzer.SetPayloadData("f2", 1, System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); writer.AddDocument(d, null); // flush writer.Close(); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram, null); FieldInfos fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should not be set."); reader.Close(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); d = new Document(); d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // add payload data for field f2 and f3 analyzer.SetPayloadData("f2", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); analyzer.SetPayloadData("f3", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 3); writer.AddDocument(d, null); // force merge writer.Optimize(null); // flush writer.Close(); reader = SegmentReader.GetOnlySegmentReader(ram, null); fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should be set."); reader.Close(); }
public override BinaryDocValues GetBinaryDocValues(string field) { BinaryDocValues dv = base.GetBinaryDocValues(field); FieldInfo fi = FieldInfos.FieldInfo(field); if (dv != null) { Debug.Assert(fi != null); Debug.Assert(fi.DocValuesType == DocValuesType.BINARY); return(new AssertingBinaryDocValues(dv, MaxDoc)); } else { Debug.Assert(fi == null || fi.DocValuesType != DocValuesType.BINARY); return(null); } }
public override NumericDocValues GetNormValues(string field) { NumericDocValues dv = base.GetNormValues(field); FieldInfo fi = FieldInfos.FieldInfo(field); if (dv != null) { Debug.Assert(fi != null); Debug.Assert(fi.HasNorms); return(new AssertingNumericDocValues(dv, MaxDoc)); } else { Debug.Assert(fi == null || fi.HasNorms == false); return(null); } }
internal void AddDocument(Document doc, IState state) { indexStream.WriteLong(fieldsStream.FilePointer); System.Collections.Generic.IList <IFieldable> fields = doc.GetFields(); int storedCount = fields.Count(field => field.IsStored); fieldsStream.WriteVInt(storedCount); foreach (IFieldable field in fields) { if (field.IsStored) { WriteField(fieldInfos.FieldInfo(field.Name), field, state); } } }
public override SortedSetDocValues GetSortedSetDocValues(string field) { SortedSetDocValues dv = base.GetSortedSetDocValues(field); FieldInfo fi = FieldInfos.FieldInfo(field); if (dv != null) { Debug.Assert(fi != null); Debug.Assert(fi.DocValuesType == DocValuesType.SORTED_SET); return(new AssertingSortedSetDocValues(dv, MaxDoc)); } else { Debug.Assert(fi == null || fi.DocValuesType != DocValuesType.SORTED_SET); return(null); } }
// LUCENENET specific - de-nested AssertingNumericDocValues // LUCENENET specific - de-nested AssertingBinaryDocValues // LUCENENET specific - de-nested AssertingSortedDocValues // LUCENENET specific - de-nested AssertingSortedSetDocValues public override NumericDocValues GetNumericDocValues(string field) { NumericDocValues dv = base.GetNumericDocValues(field); FieldInfo fi = FieldInfos.FieldInfo(field); if (dv != null) { Debug.Assert(fi != null); Debug.Assert(fi.DocValuesType == DocValuesType.NUMERIC); return(new AssertingNumericDocValues(dv, MaxDoc)); } else { Debug.Assert(fi == null || fi.DocValuesType != DocValuesType.NUMERIC); return(null); } }
public override IBits GetDocsWithField(string field) { IBits docsWithField = base.GetDocsWithField(field); FieldInfo fi = FieldInfos.FieldInfo(field); if (docsWithField != null) { Debug.Assert(fi != null); Debug.Assert(fi.HasDocValues); Debug.Assert(MaxDoc == docsWithField.Length); docsWithField = new AssertingBits(docsWithField); } else { Debug.Assert(fi == null || fi.HasDocValues == false); } return(docsWithField); }
public override SortedDocValues GetSortedDocValues(string field) { EnsureOpen(); OrdinalMap map = null; lock (cachedOrdMaps) { if (!cachedOrdMaps.TryGetValue(field, out map)) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.GetSortedValues(@in, field); MultiSortedDocValues docValues = dv as MultiSortedDocValues; if (docValues != null) { map = docValues.Mapping; if (map.owner == CoreCacheKey) { cachedOrdMaps[field] = map; } } return(dv); } } // cached ordinal map if (FieldInfos.FieldInfo(field).DocValuesType != DocValuesType.SORTED) { return(null); } int size = @in.Leaves.Count; SortedDocValues[] values = new SortedDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = @in.Leaves[i]; SortedDocValues v = context.AtomicReader.GetSortedDocValues(field) ?? DocValues.EMPTY_SORTED; values[i] = v; starts[i] = context.DocBase; } starts[size] = MaxDoc; return(new MultiSortedDocValues(values, starts, map)); }
public virtual void TestWithUnindexedFields() { Directory dir = NewDirectory(); RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc); for (int i = 0; i < 100; i++) { Document doc = new Document(); // ensure at least one doc is indexed with offsets if (i < 99 && Random.Next(2) == 0) { // stored only FieldType ft = new FieldType(); ft.IsIndexed = false; ft.IsStored = true; doc.Add(new Field("foo", "boo!", ft)); } else { FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random.NextBoolean()) { // store some term vectors for the checkindex cross-check ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; ft.StoreTermVectorOffsets = true; } doc.Add(new Field("foo", "bar", ft)); } riw.AddDocument(doc); } CompositeReader ir = riw.GetReader(); AtomicReader slow = SlowCompositeReaderWrapper.Wrap(ir); FieldInfos fis = slow.FieldInfos; Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, fis.FieldInfo("foo").IndexOptions); slow.Dispose(); ir.Dispose(); riw.Dispose(); dir.Dispose(); }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; foreach (Fieldable field in doc.GetFields()) { if (field.IsStored()) { storedCount++; } } fieldsStream.WriteVInt(storedCount); foreach (Fieldable field in doc.GetFields()) { if (field.IsStored()) { WriteField(fieldInfos.FieldInfo(field.Name()), field); } } }
public virtual void TestDeleteLeftoverFiles() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); int i; for (i = 0; i < 35; i++) { AddDoc(writer, i); } writer.SetUseCompoundFile(false); for (; i < 45; i++) { AddDoc(writer, i); } writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // Now, artificially create an extra .del file & extra // .s0 file: System.String[] files = dir.ListAll(); /* * for(int j=0;j<files.length;j++) { * System.out.println(j + ": " + files[j]); * } */ // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); int contentFieldIndex = -1; for (i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); System.String normSuffix = "s" + contentFieldIndex; // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already: CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix); // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already, using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already: CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex); // Create a bogus separate del file for a // segment that already has a separate del file: CopyFile(dir, "_0_1.del", "_0_2.del"); // Create a bogus separate del file for a // segment that does not yet have a separate del file: CopyFile(dir, "_0_1.del", "_1_1.del"); // Create a bogus separate del file for a // non-existent segment: CopyFile(dir, "_0_1.del", "_188_1.del"); // Create a bogus segment file: CopyFile(dir, "_0.cfs", "_188.cfs"); // Create a bogus fnm file when the CFS already exists: CopyFile(dir, "_0.cfs", "_0.fnm"); // Create a deletable file: CopyFile(dir, "_0.cfs", "deletable"); // Create some old segments file: CopyFile(dir, "segments_3", "segments"); CopyFile(dir, "segments_3", "segments_2"); // Create a bogus cfs file shadowing a non-cfs segment: CopyFile(dir, "_2.cfs", "_3.cfs"); System.String[] filesPre = dir.ListAll(); // Open & close a writer: it should delete the above 4 // files and nothing more: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); System.String[] files2 = dir.ListAll(); dir.Close(); System.Array.Sort(files); System.Array.Sort(files2); System.Collections.Hashtable dif = DifFiles(files, files2); if (!SupportClass.CollectionsHelper.Equals(files, files2)) { Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n " + AsString(files) + "\n actual files:\n " + AsString(files2) + "\ndif: " + SupportClass.CollectionsHelper.CollectionToString(dif)); } }
public virtual void TestExactFileNames() { for (int pass = 0; pass < 2; pass++) { System.String outputDir = "lucene.backwardscompat0.index"; RmDir(outputDir); try { Directory dir = FSDirectory.Open(new System.IO.FileInfo(FullDir(outputDir))); bool autoCommit = 0 == pass; IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); writer.SetRAMBufferSizeMB(16.0); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.DocCount(), "wrong doc count"); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float) 1.5); reader.Close(); // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); int contentFieldIndex = - 1; for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment"); // Now verify file names: System.String[] expected; expected = new System.String[]{"_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen"}; System.String[] actual = dir.ListAll(); System.Array.Sort(expected); System.Array.Sort(actual); if (!SupportClass.CollectionsHelper.Equals(expected, actual)) { Assert.Fail("incorrect filenames in index: expected:\n " + AsString(expected) + "\n actual:\n " + AsString(actual)); } dir.Close(); } finally { RmDir(outputDir); } } }
private void AssertReadOnly(FieldInfos readOnly, FieldInfos modifiable) { Assert.AreEqual(modifiable.Size(), readOnly.Size()); // assert we can iterate foreach (FieldInfo fi in readOnly) { Assert.AreEqual(fi.Name, modifiable.FieldInfo(fi.Number).Name); } }
/// <summary> /// checks Fields api is consistent with itself. /// searcher is optional, to verify with queries. Can be null. /// </summary> private static Status.TermIndexStatus CheckFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, bool doPrint, bool isVectors, TextWriter infoStream, bool verbose) { // TODO: we should probably return our own stats thing...?! Status.TermIndexStatus status = new Status.TermIndexStatus(); int computedFieldCount = 0; if (fields == null) { Msg(infoStream, "OK [no fields/terms]"); return status; } DocsEnum docs = null; DocsEnum docsAndFreqs = null; DocsAndPositionsEnum postings = null; string lastField = null; foreach (string field in fields) { // MultiFieldsEnum relies upon this order... if (lastField != null && field.CompareTo(lastField) <= 0) { throw new Exception("fields out of order: lastField=" + lastField + " field=" + field); } lastField = field; // check that the field is in fieldinfos, and is indexed. // TODO: add a separate test to check this for different reader impls FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (fieldInfo == null) { throw new Exception("fieldsEnum inconsistent with fieldInfos, no fieldInfos for: " + field); } if (!fieldInfo.Indexed) { throw new Exception("fieldsEnum inconsistent with fieldInfos, isIndexed == false for: " + field); } // TODO: really the codec should not return a field // from FieldsEnum if it has no Terms... but we do // this today: // assert fields.terms(field) != null; computedFieldCount++; Terms terms = fields.Terms(field); if (terms == null) { continue; } bool hasFreqs = terms.HasFreqs(); bool hasPositions = terms.HasPositions(); bool hasPayloads = terms.HasPayloads(); bool hasOffsets = terms.HasOffsets(); // term vectors cannot omit TF: bool expectedHasFreqs = (isVectors || fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS); if (hasFreqs != expectedHasFreqs) { throw new Exception("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs); } if (hasFreqs == false) { if (terms.SumTotalTermFreq != -1) { throw new Exception("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.SumTotalTermFreq + " (should be -1)"); } } if (!isVectors) { bool expectedHasPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; if (hasPositions != expectedHasPositions) { throw new Exception("field \"" + field + "\" should have hasPositions=" + expectedHasPositions + " but got " + hasPositions); } bool expectedHasPayloads = fieldInfo.HasPayloads(); if (hasPayloads != expectedHasPayloads) { throw new Exception("field \"" + field + "\" should have hasPayloads=" + expectedHasPayloads + " but got " + hasPayloads); } bool expectedHasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (hasOffsets != expectedHasOffsets) { throw new Exception("field \"" + field + "\" should have hasOffsets=" + expectedHasOffsets + " but got " + hasOffsets); } } TermsEnum termsEnum = terms.Iterator(null); bool hasOrd = true; long termCountStart = status.DelTermCount + status.TermCount; BytesRef lastTerm = null; IComparer<BytesRef> termComp = terms.Comparator; long sumTotalTermFreq = 0; long sumDocFreq = 0; FixedBitSet visitedDocs = new FixedBitSet(maxDoc); while (true) { BytesRef term = termsEnum.Next(); if (term == null) { break; } Debug.Assert(term.Valid); // make sure terms arrive in order according to // the comp if (lastTerm == null) { lastTerm = BytesRef.DeepCopyOf(term); } else { if (termComp.Compare(lastTerm, term) >= 0) { throw new Exception("terms out of order: lastTerm=" + lastTerm + " term=" + term); } lastTerm.CopyBytes(term); } int docFreq = termsEnum.DocFreq(); if (docFreq <= 0) { throw new Exception("docfreq: " + docFreq + " is out of bounds"); } sumDocFreq += docFreq; docs = termsEnum.Docs(liveDocs, docs); postings = termsEnum.DocsAndPositions(liveDocs, postings); if (hasFreqs == false) { if (termsEnum.TotalTermFreq() != -1) { throw new Exception("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.TotalTermFreq() + " (should be -1)"); } } if (hasOrd) { long ord = -1; try { ord = termsEnum.Ord(); } catch (System.NotSupportedException uoe) { hasOrd = false; } if (hasOrd) { long ordExpected = status.DelTermCount + status.TermCount - termCountStart; if (ord != ordExpected) { throw new Exception("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected); } } } DocsEnum docs2; if (postings != null) { docs2 = postings; } else { docs2 = docs; } int lastDoc = -1; int docCount = 0; long totalTermFreq = 0; while (true) { int doc = docs2.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } status.TotFreq++; visitedDocs.Set(doc); int freq = -1; if (hasFreqs) { freq = docs2.Freq(); if (freq <= 0) { throw new Exception("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } status.TotPos += freq; totalTermFreq += freq; } else { // When a field didn't index freq, it must // consistently "lie" and pretend that freq was // 1: if (docs2.Freq() != 1) { throw new Exception("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false"); } } docCount++; if (doc <= lastDoc) { throw new Exception("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new Exception("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; int lastPos = -1; int lastOffset = 0; if (hasPositions) { for (int j = 0; j < freq; j++) { int pos = postings.NextPosition(); if (pos < 0) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; BytesRef payload = postings.Payload; if (payload != null) { Debug.Assert(payload.Valid); } if (payload != null && payload.Length < 1) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.Length); } if (hasOffsets) { int startOffset = postings.StartOffset(); int endOffset = postings.EndOffset(); // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before? // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter if (!isVectors) { if (startOffset < 0) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); } if (startOffset < lastOffset) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); } if (endOffset < 0) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); } if (endOffset < startOffset) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); } } lastOffset = startOffset; } } } } if (docCount != 0) { status.TermCount++; } else { status.DelTermCount++; } long totalTermFreq2 = termsEnum.TotalTermFreq(); bool hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1; // Re-count if there are deleted docs: if (liveDocs != null) { if (hasFreqs) { DocsEnum docsNoDel = termsEnum.Docs(null, docsAndFreqs); docCount = 0; totalTermFreq = 0; while (docsNoDel.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visitedDocs.Set(docsNoDel.DocID()); docCount++; totalTermFreq += docsNoDel.Freq(); } } else { DocsEnum docsNoDel = termsEnum.Docs(null, docs, DocsEnum.FLAG_NONE); docCount = 0; totalTermFreq = -1; while (docsNoDel.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visitedDocs.Set(docsNoDel.DocID()); docCount++; } } } if (docCount != docFreq) { throw new Exception("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount); } if (hasTotalTermFreq) { if (totalTermFreq2 <= 0) { throw new Exception("totalTermFreq: " + totalTermFreq2 + " is out of bounds"); } sumTotalTermFreq += totalTermFreq; if (totalTermFreq != totalTermFreq2) { throw new Exception("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq); } } // Test skipping if (hasPositions) { for (int idx = 0; idx < 7; idx++) { int skipDocID = (int)(((idx + 1) * (long)maxDoc) / 8); postings = termsEnum.DocsAndPositions(liveDocs, postings); int docID = postings.Advance(skipDocID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } else { if (docID < skipDocID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID); } int freq = postings.Freq(); if (freq <= 0) { throw new Exception("termFreq " + freq + " is out of bounds"); } int lastPosition = -1; int lastOffset = 0; for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = postings.NextPosition(); if (pos < 0) { throw new Exception("position " + pos + " is out of bounds"); } if (pos < lastPosition) { throw new Exception("position " + pos + " is < lastPosition " + lastPosition); } lastPosition = pos; if (hasOffsets) { int startOffset = postings.StartOffset(); int endOffset = postings.EndOffset(); // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before? // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter if (!isVectors) { if (startOffset < 0) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); } if (startOffset < lastOffset) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); } if (endOffset < 0) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); } if (endOffset < startOffset) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); } } lastOffset = startOffset; } } int nextDocID = postings.NextDoc(); if (nextDocID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (nextDocID <= docID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID); } } } } else { for (int idx = 0; idx < 7; idx++) { int skipDocID = (int)(((idx + 1) * (long)maxDoc) / 8); docs = termsEnum.Docs(liveDocs, docs, DocsEnum.FLAG_NONE); int docID = docs.Advance(skipDocID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } else { if (docID < skipDocID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID); } int nextDocID = docs.NextDoc(); if (nextDocID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (nextDocID <= docID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID); } } } } } Terms fieldTerms = fields.Terms(field); if (fieldTerms == null) { // Unusual: the FieldsEnum returned a field but // the Terms for that field is null; this should // only happen if it's a ghost field (field with // no terms, eg there used to be terms but all // docs got deleted and then merged away): } else { if (fieldTerms is BlockTreeTermsReader.FieldReader) { BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader)fieldTerms).ComputeStats(); Debug.Assert(stats != null); if (status.BlockTreeStats == null) { status.BlockTreeStats = new Dictionary<string, BlockTreeTermsReader.Stats>(); } status.BlockTreeStats[field] = stats; } if (sumTotalTermFreq != 0) { long v = fields.Terms(field).SumTotalTermFreq; if (v != -1 && sumTotalTermFreq != v) { throw new Exception("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq); } } if (sumDocFreq != 0) { long v = fields.Terms(field).SumDocFreq; if (v != -1 && sumDocFreq != v) { throw new Exception("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq); } } if (fieldTerms != null) { int v = fieldTerms.DocCount; if (v != -1 && visitedDocs.Cardinality() != v) { throw new Exception("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.Cardinality()); } } // Test seek to last term: if (lastTerm != null) { if (termsEnum.SeekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND) { throw new Exception("seek to last term " + lastTerm + " failed"); } int expectedDocFreq = termsEnum.DocFreq(); DocsEnum d = termsEnum.Docs(null, null, DocsEnum.FLAG_NONE); int docFreq = 0; while (d.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { docFreq++; } if (docFreq != expectedDocFreq) { throw new Exception("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq); } } // check unique term count long termCount = -1; if ((status.DelTermCount + status.TermCount) - termCountStart > 0) { termCount = fields.Terms(field).Size(); if (termCount != -1 && termCount != status.DelTermCount + status.TermCount - termCountStart) { throw new Exception("termCount mismatch " + (status.DelTermCount + termCount) + " vs " + (status.TermCount - termCountStart)); } } // Test seeking by ord if (hasOrd && status.TermCount - termCountStart > 0) { int seekCount = (int)Math.Min(10000L, termCount); if (seekCount > 0) { BytesRef[] seekTerms = new BytesRef[seekCount]; // Seek by ord for (int i = seekCount - 1; i >= 0; i--) { long ord = i * (termCount / seekCount); termsEnum.SeekExact(ord); seekTerms[i] = BytesRef.DeepCopyOf(termsEnum.Term()); } // Seek by term long totDocCount = 0; for (int i = seekCount - 1; i >= 0; i--) { if (termsEnum.SeekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) { throw new Exception("seek to existing term " + seekTerms[i] + " failed"); } docs = termsEnum.Docs(liveDocs, docs, DocsEnum.FLAG_NONE); if (docs == null) { throw new Exception("null DocsEnum from to existing term " + seekTerms[i]); } while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { totDocCount++; } } long totDocCountNoDeletes = 0; long totDocFreq = 0; for (int i = 0; i < seekCount; i++) { if (!termsEnum.SeekExact(seekTerms[i])) { throw new Exception("seek to existing term " + seekTerms[i] + " failed"); } totDocFreq += termsEnum.DocFreq(); docs = termsEnum.Docs(null, docs, DocsEnum.FLAG_NONE); if (docs == null) { throw new Exception("null DocsEnum from to existing term " + seekTerms[i]); } while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { totDocCountNoDeletes++; } } if (totDocCount > totDocCountNoDeletes) { throw new Exception("more postings with deletes=" + totDocCount + " than without=" + totDocCountNoDeletes); } if (totDocCountNoDeletes != totDocFreq) { throw new Exception("docfreqs=" + totDocFreq + " != recomputed docfreqs=" + totDocCountNoDeletes); } } } } } int fieldCount = fields.Size; if (fieldCount != -1) { if (fieldCount < 0) { throw new Exception("invalid fieldCount: " + fieldCount); } if (fieldCount != computedFieldCount) { throw new Exception("fieldCount mismatch " + fieldCount + " vs recomputed field count " + computedFieldCount); } } // for most implementations, this is boring (just the sum across all fields) // but codecs that don't work per-field like preflex actually implement this, // but don't implement it on Terms, so the check isn't redundant. long uniqueTermCountAllFields = fields.UniqueTermCount; if (uniqueTermCountAllFields != -1 && status.TermCount + status.DelTermCount != uniqueTermCountAllFields) { throw new Exception("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.TermCount + status.DelTermCount)); } if (doPrint) { Msg(infoStream, "OK [" + status.TermCount + " terms; " + status.TotFreq + " terms/docs pairs; " + status.TotPos + " tokens]"); } if (verbose && status.BlockTreeStats != null && infoStream != null && status.TermCount > 0) { foreach (KeyValuePair<string, BlockTreeTermsReader.Stats> ent in status.BlockTreeStats) { infoStream.WriteLine(" field \"" + ent.Key + "\":"); infoStream.WriteLine(" " + ent.Value.ToString().Replace("\n", "\n ")); } } return status; }
public virtual void TestDeleteLeftoverFiles() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); int i; for (i = 0; i < 35; i++) { AddDoc(writer, i); } writer.SetUseCompoundFile(false); for (; i < 45; i++) { AddDoc(writer, i); } writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float) 1.5); reader.Close(); // Now, artificially create an extra .del file & extra // .s0 file: System.String[] files = dir.List(); /* for(int i=0;i<files.length;i++) { System.out.println(i + ": " + files[i]); } */ // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); int contentFieldIndex = - 1; for (i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.Name_ForNUnitTest.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment"); System.String normSuffix = "s" + contentFieldIndex; // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already: CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix); // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already, using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already: CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex); // Create a bogus separate del file for a // segment that already has a separate del file: CopyFile(dir, "_0_1.del", "_0_2.del"); // Create a bogus separate del file for a // segment that does not yet have a separate del file: CopyFile(dir, "_0_1.del", "_1_1.del"); // Create a bogus separate del file for a // non-existent segment: CopyFile(dir, "_0_1.del", "_188_1.del"); // Create a bogus segment file: CopyFile(dir, "_0.cfs", "_188.cfs"); // Create a bogus fnm file when the CFS already exists: CopyFile(dir, "_0.cfs", "_0.fnm"); // Create a deletable file: CopyFile(dir, "_0.cfs", "deletable"); // Create some old segments file: CopyFile(dir, "segments_a", "segments"); CopyFile(dir, "segments_a", "segments_2"); // Create a bogus cfs file shadowing a non-cfs segment: CopyFile(dir, "_2.cfs", "_3.cfs"); System.String[] filesPre = dir.List(); // Open & close a writer: it should delete the above 4 // files and nothing more: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Close(); System.String[] files2 = dir.List(); dir.Close(); System.Array.Sort(files); System.Array.Sort(files2); if (!ArrayEquals(files, files2)) { Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n " + AsString(files) + "\n actual files:\n " + AsString(files2)); } }
public /*internal*/ Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) { doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); } else { doc.Add(new Field(fi.name, b, Field.Store.YES)); } } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) { index = Field.Index.TOKENIZED; } else if (fi.isIndexed && !tokenize) { index = Field.Index.UN_TOKENIZED; } else { index = Field.Index.NO; } Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } return(doc); }
// maxAllowed = the "highest" we can index, but we will still // randomly index at lower IndexOption private FieldsProducer BuildIndex(Directory dir, FieldInfo.IndexOptions maxAllowed, bool allowPayloads, bool alwaysTestMax) { Codec codec = Codec; SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", MaxDoc, false, codec, null); int maxIndexOption = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(maxAllowed); if (VERBOSE) { Console.WriteLine("\nTEST: now build index"); } int maxIndexOptionNoOffsets = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); // TODO use allowPayloads var newFieldInfoArray = new FieldInfo[Fields.Count]; for (int fieldUpto = 0; fieldUpto < Fields.Count; fieldUpto++) { FieldInfo oldFieldInfo = FieldInfos.FieldInfo(fieldUpto); string pf = TestUtil.GetPostingsFormat(codec, oldFieldInfo.Name); int fieldMaxIndexOption; if (DoesntSupportOffsets.Contains(pf)) { fieldMaxIndexOption = Math.Min(maxIndexOptionNoOffsets, maxIndexOption); } else { fieldMaxIndexOption = maxIndexOption; } // Randomly picked the IndexOptions to index this // field with: FieldInfo.IndexOptions indexOptions = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToArray()[alwaysTestMax ? fieldMaxIndexOption : Random().Next(1 + fieldMaxIndexOption)]; bool doPayloads = indexOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads; newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.Name, true, fieldUpto, false, false, doPayloads, indexOptions, null, DocValuesType.NUMERIC, null); } FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray); // Estimate that flushed segment size will be 25% of // what we use in RAM: long bytes = TotalPostings * 8 + TotalPayloadBytes; SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, newFieldInfos, 32, null, new IOContext(new FlushInfo(MaxDoc, bytes))); FieldsConsumer fieldsConsumer = codec.PostingsFormat().FieldsConsumer(writeState); foreach (KeyValuePair<string, SortedDictionary<BytesRef, long>> fieldEnt in Fields) { string field = fieldEnt.Key; IDictionary<BytesRef, long> terms = fieldEnt.Value; FieldInfo fieldInfo = newFieldInfos.FieldInfo(field); FieldInfo.IndexOptions? indexOptions = fieldInfo.FieldIndexOptions; if (VERBOSE) { Console.WriteLine("field=" + field + " indexOtions=" + indexOptions); } bool doFreq = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS; bool doPos = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; bool doPayloads = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && allowPayloads; bool doOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; TermsConsumer termsConsumer = fieldsConsumer.AddField(fieldInfo); long sumTotalTF = 0; long sumDF = 0; FixedBitSet seenDocs = new FixedBitSet(MaxDoc); foreach (KeyValuePair<BytesRef, long> termEnt in terms) { BytesRef term = termEnt.Key; SeedPostings postings = GetSeedPostings(term.Utf8ToString(), termEnt.Value, false, maxAllowed); if (VERBOSE) { Console.WriteLine(" term=" + field + ":" + term.Utf8ToString() + " docFreq=" + postings.DocFreq + " seed=" + termEnt.Value); } PostingsConsumer postingsConsumer = termsConsumer.StartTerm(term); long totalTF = 0; int docID = 0; while ((docID = postings.NextDoc()) != DocsEnum.NO_MORE_DOCS) { int freq = postings.Freq(); if (VERBOSE) { Console.WriteLine(" " + postings.Upto + ": docID=" + docID + " freq=" + postings.Freq_Renamed); } postingsConsumer.StartDoc(docID, doFreq ? postings.Freq_Renamed : -1); seenDocs.Set(docID); if (doPos) { totalTF += postings.Freq_Renamed; for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = postings.NextPosition(); BytesRef payload = postings.Payload; if (VERBOSE) { if (doPayloads) { Console.WriteLine(" pos=" + pos + " payload=" + (payload == null ? "null" : payload.Length + " bytes")); } else { Console.WriteLine(" pos=" + pos); } } postingsConsumer.AddPosition(pos, doPayloads ? payload : null, doOffsets ? postings.StartOffset() : -1, doOffsets ? postings.EndOffset() : -1); } } else if (doFreq) { totalTF += freq; } else { totalTF++; } postingsConsumer.FinishDoc(); } termsConsumer.FinishTerm(term, new TermStats(postings.DocFreq, doFreq ? totalTF : -1)); sumTotalTF += totalTF; sumDF += postings.DocFreq; } termsConsumer.Finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.Cardinality()); } fieldsConsumer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: after indexing: files="); foreach (string file in dir.ListAll()) { Console.WriteLine(" " + file + ": " + dir.FileLength(file) + " bytes"); } } CurrentFieldInfos = newFieldInfos; SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1); return codec.PostingsFormat().FieldsProducer(readState); }
public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { UninterruptableMonitor.Enter(this); try { if (Debugging.AssertsEnabled) { Debugging.Assert(UninterruptableMonitor.IsEntered(writer)); } //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); if (Debugging.AssertsEnabled) { Debugging.Assert(dvUpdates.Any()); } // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE); try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes != null) { foreach (KeyValuePair <string, string> e in fi.Attributes) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.numericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.binaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.TYPE); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; // LUCENENET specific: We created the segments names wrong in 4.8.0-beta00001 - 4.8.0-beta00015, // so we added a switch to be able to read these indexes in later versions. This logic as well as an // optimization on the first 100 segment values is implmeneted in SegmentInfos.SegmentNumberToString(). string segmentSuffix = SegmentInfos.SegmentNumberToString(nextFieldInfosGen); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat; DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.DisposeWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception t) when(t.IsThrowable()) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (isMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.SetGenUpdatesFiles(newGenUpdatesFiles); // wrote new files, should checkpoint() writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (reader != null) { SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount); bool reopened = false; try { reader.DecRef(); reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } finally { UninterruptableMonitor.Exit(this); } }
public virtual void TestPositions() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // f1,f2,f3: docs only FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_ONLY; Field f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); Field f2 = NewField("f2", "this field has docs only", ft); d.Add(f2); Field f3 = NewField("f3", "this field has docs only", ft); d.Add(f3); FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED); ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS; // f4,f5,f6 docs and freqs Field f4 = NewField("f4", "this field has docs and freqs", ft2); d.Add(f4); Field f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); Field f6 = NewField("f6", "this field has docs and freqs", ft2); d.Add(f6); FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED); ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; // f7,f8,f9 docs/freqs/positions Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3); d.Add(f7); Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3); d.Add(f8); Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8, // and docs/freqs/positions for f3, f6, f9 d = new Document(); // f1,f4,f7: docs only f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); f4 = NewField("f4", "this field has docs only", ft); d.Add(f4); f7 = NewField("f7", "this field has docs only", ft); d.Add(f7); // f2, f5, f8: docs and freqs f2 = NewField("f2", "this field has docs and freqs", ft2); d.Add(f2); f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); f8 = NewField("f8", "this field has docs and freqs", ft2); d.Add(f8); // f3, f6, f9: docs and freqs and positions f3 = NewField("f3", "this field has docs and freqs and positions", ft3); d.Add(f3); f6 = NewField("f6", "this field has docs and freqs and positions", ft3); d.Add(f6); f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; // docs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions); // docs + docs/freqs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions); // docs + docs/freqs/pos = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions); // docs/freqs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions); // docs/freqs + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions); // docs/freqs + docs/freqs/pos = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions); // docs/freqs/pos + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions); // docs/freqs/pos + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions); // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions); reader.Dispose(); ram.Dispose(); }
// Writes field updates (new _X_N updates files) to the directory public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { lock (this) { //Debug.Assert(Thread.holdsLock(Writer)); //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); Debug.Assert(dvUpdates.Any()); // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader; try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes() != null) { foreach (KeyValuePair <string, string> e in fi.Attributes()) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.NumericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.BinaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.fType); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; string segmentSuffix = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX)); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat(); DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.CloseWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.Reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (IsMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.GenUpdatesFiles = newGenUpdatesFiles; // wrote new files, should checkpoint() Writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (Reader != null) { SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed); bool reopened = false; try { Reader.DecRef(); Reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != -1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else { storeDir = null; } fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) { if (!fieldInfos.FieldInfo(i).omitTf) { anyProx = true; } } System.String fieldsSegment; if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); } else { fieldsSegment = segment; } if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) { proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); } OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != -1) { vectorsSegment = si.GetDocStoreSegment(); } else { vectorsSegment = segment; } termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
internal virtual SegmentReader ReopenSegment(SegmentInfo si) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[fieldInfos.Size()]; if (normsUpToDate) { for (int i = 0; i < fieldInfos.Size(); i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } } if (normsUpToDate && deletionsUpToDate) { return(this); } // clone reader SegmentReader clone; if (readOnly) { clone = new ReadOnlySegmentReader(); } else { clone = new SegmentReader(); } bool success = false; try { clone.readOnly = readOnly; clone.directory = directory; clone.si = si; clone.segment = segment; clone.readBufferSize = readBufferSize; clone.cfsReader = cfsReader; clone.storeCFSReader = storeCFSReader; clone.fieldInfos = fieldInfos; clone.tis = tis; clone.freqStream = freqStream; clone.proxStream = proxStream; clone.termVectorsReaderOrig = termVectorsReaderOrig; // we have to open a new FieldsReader, because it is not thread-safe // and can thus not be shared among multiple SegmentReaders // TODO: Change this in case FieldsReader becomes thread-safe in the future System.String fieldsSegment; Directory storeDir = Directory(); if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); if (storeCFSReader != null) { storeDir = storeCFSReader; } } else { fieldsSegment = segment; if (cfsReader != null) { storeDir = cfsReader; } } if (fieldsReader != null) { clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } if (!deletionsUpToDate) { // load deleted docs clone.deletedDocs = null; clone.LoadDeletedDocs(); } else { clone.deletedDocs = this.deletedDocs; } clone.norms = new System.Collections.Hashtable(); if (!normsUpToDate) { // load norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // copy unchanged norms to the cloned reader and incRef those norms if (!fieldNormsChanged[i]) { System.String curField = fieldInfos.FieldInfo(i).name; Norm norm = (Norm)this.norms[curField]; norm.IncRef(); clone.norms[curField] = norm; } } clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize); } else { System.Collections.IEnumerator it = norms.Keys.GetEnumerator(); while (it.MoveNext()) { System.String field = (System.String)it.Current; Norm norm = (Norm)norms[field]; norm.IncRef(); clone.norms[field] = norm; } } if (clone.singleNormStream == null) { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { Directory d = si.GetUseCompoundFile() ? cfsReader : Directory(); System.String fileName = si.GetNormFileName(fi.number); if (si.HasSeparateNorms(fi.number)) { continue; } if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { clone.singleNormStream = d.OpenInput(fileName, readBufferSize); break; } } } } success = true; } finally { if (this.referencedSegmentReader != null) { // this reader shares resources with another SegmentReader, // so we increment the other readers refCount. We don't // increment the refCount of the norms because we did // that already for the shared norms clone.referencedSegmentReader = this.referencedSegmentReader; referencedSegmentReader.IncRefReaderNotNorms(); } else { // this reader wasn't reopened, so we increment this // readers refCount clone.referencedSegmentReader = this; IncRefReaderNotNorms(); } if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return(clone); } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else storeDir = null; fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) if (!fieldInfos.FieldInfo(i).omitTf) anyProx = true; System.String fieldsSegment; if (si.GetDocStoreOffset() != - 1) fieldsSegment = si.GetDocStoreSegment(); else fieldsSegment = segment; if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != - 1) vectorsSegment = si.GetDocStoreSegment(); else vectorsSegment = segment; termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } foreach (IndexReader reader in readers) { if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; foreach (IndexReader reader in readers) { SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions) { docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Dispose(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long)docCount) * 8 != fdxFileLength) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { foreach (IndexReader reader in readers) { docCount += reader.NumDocs(); } } return(docCount); }
public virtual void TestExactFileNames() { System.String outputDir = "lucene.backwardscompat0.index"; RmDir(outputDir); try { Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir))); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(16.0); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); int contentFieldIndex = -1; for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); // Now verify file names: System.String[] expected; expected = new System.String[] { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" }; System.String[] actual = dir.ListAll(); System.Array.Sort(expected); System.Array.Sort(actual); if (!CollectionsHelper.Equals(expected, actual)) { Assert.Fail("incorrect filenames in index: expected:\n " + AsString(expected) + "\n actual:\n " + AsString(actual)); } dir.Close(); } finally { RmDir(outputDir); } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary byField = new System.Collections.Hashtable(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.ArrayList fieldsToRemove = new System.Collections.ArrayList(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.upto > 0) { // It has some norms System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo]; if (l == null) { l = new System.Collections.ArrayList(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = (NormsWriterPerField)toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }