コード例 #1
0
        /// <summary>
        /// Construct a Taxonomy writer.
        /// </summary>
        /// <param name="directory">
        ///    The <seealso cref="Directory"/> in which to store the taxonomy. Note that
        ///    the taxonomy is written directly to that directory (not to a
        ///    subdirectory of it). </param>
        /// <param name="openMode">
        ///    Specifies how to open a taxonomy for writing: <code>APPEND</code>
        ///    means open an existing index for append (failing if the index does
        ///    not yet exist). <code>CREATE</code> means create a new index (first
        ///    deleting the old one if it already existed).
        ///    <code>APPEND_OR_CREATE</code> appends to an existing index if there
        ///    is one, otherwise it creates a new index. </param>
        /// <param name="cache">
        ///    A <seealso cref="TaxonomyWriterCache"/> implementation which determines
        ///    the in-memory caching policy. See for example
        ///    <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>.
        ///    If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param>
        /// <exception cref="CorruptIndexException">
        ///     if the taxonomy is corrupted. </exception>
        /// <exception cref="LockObtainFailedException">
        ///     if the taxonomy is locked by another writer. If it is known
        ///     that no other concurrent writer is active, the lock might
        ///     have been left around by an old dead process, and should be
        ///     removed using <seealso cref="#unlock(Directory)"/>. </exception>
        /// <exception cref="IOException">
        ///     if another error occurred. </exception>
        public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache)
        {

            dir = directory;
            IndexWriterConfig config = CreateIndexWriterConfig(openMode);
            indexWriter = OpenIndexWriter(dir, config);

            // verify (to some extent) that merge policy in effect would preserve category docids 
            if (indexWriter != null)
            {
                Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed");
            }

            // after we opened the writer, and the index is locked, it's safe to check
            // the commit data and read the index epoch
            openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND;
            if (!DirectoryReader.IndexExists(directory))
            {
                indexEpoch = 1;
            }
            else
            {
                string epochStr = null;
                IDictionary<string, string> commitData = ReadCommitData(directory);
                if (commitData != null && commitData.ContainsKey(INDEX_EPOCH))
                {
                    epochStr = commitData[INDEX_EPOCH];
                }
                // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
                // of a better value.
                indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16);
            }

            if (openMode == OpenMode.CREATE)
            {
                ++indexEpoch;
            }

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.OmitNorms = true;
            parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
            fullPathField = new StringField(Consts.FULL, "", Field.Store.YES);

            if (indexWriter == null)
                return;

            nextID = indexWriter.MaxDoc;

            if (cache == null)
            {
                cache = DefaultTaxonomyWriterCache();
            }
            this.cache = cache;

            if (nextID == 0)
            {
                cacheIsComplete = true;
                // Make sure that the taxonomy always contain the root category
                // with category id 0.
                AddCategory(new FacetLabel());
            }
            else
            {
                // There are some categories on the disk, which we have not yet
                // read into the cache, and therefore the cache is incomplete.
                // We choose not to read all the categories into the cache now,
                // to avoid terrible performance when a taxonomy index is opened
                // to add just a single category. We will do it later, after we
                // notice a few cache misses.
                cacheIsComplete = false;
            }
        }
コード例 #2
0
        /// <summary>
        /// Construct a Taxonomy writer.
        /// </summary>
        /// <param name="directory">
        ///    The <seealso cref="Directory"/> in which to store the taxonomy. Note that
        ///    the taxonomy is written directly to that directory (not to a
        ///    subdirectory of it). </param>
        /// <param name="openMode">
        ///    Specifies how to open a taxonomy for writing: <code>APPEND</code>
        ///    means open an existing index for append (failing if the index does
        ///    not yet exist). <code>CREATE</code> means create a new index (first
        ///    deleting the old one if it already existed).
        ///    <code>APPEND_OR_CREATE</code> appends to an existing index if there
        ///    is one, otherwise it creates a new index. </param>
        /// <param name="cache">
        ///    A <seealso cref="TaxonomyWriterCache"/> implementation which determines
        ///    the in-memory caching policy. See for example
        ///    <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>.
        ///    If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param>
        /// <exception cref="CorruptIndexException">
        ///     if the taxonomy is corrupted. </exception>
        /// <exception cref="LockObtainFailedException">
        ///     if the taxonomy is locked by another writer. If it is known
        ///     that no other concurrent writer is active, the lock might
        ///     have been left around by an old dead process, and should be
        ///     removed using <seealso cref="#unlock(Directory)"/>. </exception>
        /// <exception cref="IOException">
        ///     if another error occurred. </exception>
        public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache)
        {
            dir = directory;
            IndexWriterConfig config = CreateIndexWriterConfig(openMode);

            indexWriter = OpenIndexWriter(dir, config);

            // verify (to some extent) that merge policy in effect would preserve category docids
            if (indexWriter != null)
            {
                Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed");
            }

            // after we opened the writer, and the index is locked, it's safe to check
            // the commit data and read the index epoch
            openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND;
            if (!DirectoryReader.IndexExists(directory))
            {
                indexEpoch = 1;
            }
            else
            {
                string epochStr = null;
                IDictionary <string, string> commitData = ReadCommitData(directory);
                if (commitData != null && commitData.ContainsKey(INDEX_EPOCH))
                {
                    epochStr = commitData[INDEX_EPOCH];
                }
                // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
                // of a better value.
                indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16);
            }

            if (openMode == OpenMode.CREATE)
            {
                ++indexEpoch;
            }

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.OmitNorms      = true;
            parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
            fullPathField     = new StringField(Consts.FULL, "", Field.Store.YES);

            if (indexWriter == null)
            {
                return;
            }

            nextID = indexWriter.MaxDoc;

            if (cache == null)
            {
                cache = DefaultTaxonomyWriterCache();
            }
            this.cache = cache;

            if (nextID == 0)
            {
                cacheIsComplete = true;
                // Make sure that the taxonomy always contain the root category
                // with category id 0.
                AddCategory(new FacetLabel());
            }
            else
            {
                // There are some categories on the disk, which we have not yet
                // read into the cache, and therefore the cache is incomplete.
                // We choose not to read all the categories into the cache now,
                // to avoid terrible performance when a taxonomy index is opened
                // to add just a single category. We will do it later, after we
                // notice a few cache misses.
                cacheIsComplete = false;
            }
        }