/// <summary> /// Construct a Taxonomy writer. /// </summary> /// <param name="directory"> /// The <seealso cref="Directory"/> in which to store the taxonomy. Note that /// the taxonomy is written directly to that directory (not to a /// subdirectory of it). </param> /// <param name="openMode"> /// Specifies how to open a taxonomy for writing: <code>APPEND</code> /// means open an existing index for append (failing if the index does /// not yet exist). <code>CREATE</code> means create a new index (first /// deleting the old one if it already existed). /// <code>APPEND_OR_CREATE</code> appends to an existing index if there /// is one, otherwise it creates a new index. </param> /// <param name="cache"> /// A <seealso cref="TaxonomyWriterCache"/> implementation which determines /// the in-memory caching policy. See for example /// <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>. /// If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param> /// <exception cref="CorruptIndexException"> /// if the taxonomy is corrupted. </exception> /// <exception cref="LockObtainFailedException"> /// if the taxonomy is locked by another writer. If it is known /// that no other concurrent writer is active, the lock might /// have been left around by an old dead process, and should be /// removed using <seealso cref="#unlock(Directory)"/>. </exception> /// <exception cref="IOException"> /// if another error occurred. </exception> public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache) { dir = directory; IndexWriterConfig config = CreateIndexWriterConfig(openMode); indexWriter = OpenIndexWriter(dir, config); // verify (to some extent) that merge policy in effect would preserve category docids if (indexWriter != null) { Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed"); } // after we opened the writer, and the index is locked, it's safe to check // the commit data and read the index epoch openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND; if (!DirectoryReader.IndexExists(directory)) { indexEpoch = 1; } else { string epochStr = null; IDictionary<string, string> commitData = ReadCommitData(directory); if (commitData != null && commitData.ContainsKey(INDEX_EPOCH)) { epochStr = commitData[INDEX_EPOCH]; } // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack // of a better value. indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16); } if (openMode == OpenMode.CREATE) { ++indexEpoch; } FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.OmitNorms = true; parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft); fullPathField = new StringField(Consts.FULL, "", Field.Store.YES); if (indexWriter == null) return; nextID = indexWriter.MaxDoc; if (cache == null) { cache = DefaultTaxonomyWriterCache(); } this.cache = cache; if (nextID == 0) { cacheIsComplete = true; // Make sure that the taxonomy always contain the root category // with category id 0. AddCategory(new FacetLabel()); } else { // There are some categories on the disk, which we have not yet // read into the cache, and therefore the cache is incomplete. // We choose not to read all the categories into the cache now, // to avoid terrible performance when a taxonomy index is opened // to add just a single category. We will do it later, after we // notice a few cache misses. cacheIsComplete = false; } }
/// <summary> /// Construct a Taxonomy writer. /// </summary> /// <param name="directory"> /// The <seealso cref="Directory"/> in which to store the taxonomy. Note that /// the taxonomy is written directly to that directory (not to a /// subdirectory of it). </param> /// <param name="openMode"> /// Specifies how to open a taxonomy for writing: <code>APPEND</code> /// means open an existing index for append (failing if the index does /// not yet exist). <code>CREATE</code> means create a new index (first /// deleting the old one if it already existed). /// <code>APPEND_OR_CREATE</code> appends to an existing index if there /// is one, otherwise it creates a new index. </param> /// <param name="cache"> /// A <seealso cref="TaxonomyWriterCache"/> implementation which determines /// the in-memory caching policy. See for example /// <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>. /// If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param> /// <exception cref="CorruptIndexException"> /// if the taxonomy is corrupted. </exception> /// <exception cref="LockObtainFailedException"> /// if the taxonomy is locked by another writer. If it is known /// that no other concurrent writer is active, the lock might /// have been left around by an old dead process, and should be /// removed using <seealso cref="#unlock(Directory)"/>. </exception> /// <exception cref="IOException"> /// if another error occurred. </exception> public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache) { dir = directory; IndexWriterConfig config = CreateIndexWriterConfig(openMode); indexWriter = OpenIndexWriter(dir, config); // verify (to some extent) that merge policy in effect would preserve category docids if (indexWriter != null) { Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed"); } // after we opened the writer, and the index is locked, it's safe to check // the commit data and read the index epoch openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND; if (!DirectoryReader.IndexExists(directory)) { indexEpoch = 1; } else { string epochStr = null; IDictionary <string, string> commitData = ReadCommitData(directory); if (commitData != null && commitData.ContainsKey(INDEX_EPOCH)) { epochStr = commitData[INDEX_EPOCH]; } // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack // of a better value. indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16); } if (openMode == OpenMode.CREATE) { ++indexEpoch; } FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.OmitNorms = true; parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft); fullPathField = new StringField(Consts.FULL, "", Field.Store.YES); if (indexWriter == null) { return; } nextID = indexWriter.MaxDoc; if (cache == null) { cache = DefaultTaxonomyWriterCache(); } this.cache = cache; if (nextID == 0) { cacheIsComplete = true; // Make sure that the taxonomy always contain the root category // with category id 0. AddCategory(new FacetLabel()); } else { // There are some categories on the disk, which we have not yet // read into the cache, and therefore the cache is incomplete. // We choose not to read all the categories into the cache now, // to avoid terrible performance when a taxonomy index is opened // to add just a single category. We will do it later, after we // notice a few cache misses. cacheIsComplete = false; } }