Esempio n. 1
0
        /// <summary>
        /// Split source index into multiple parts. </summary>
        /// <param name="version">lucene compatibility version</param>
        /// <param name="in"> source index, can have deletions, can have
        /// multiple segments (or multiple readers). </param>
        /// <param name="outputs"> list of directories where the output parts will be stored. </param>
        /// <param name="seq"> if true, then the source index will be split into equal
        /// increasing ranges of document id-s. If false, source document id-s will be
        /// assigned in a deterministic round-robin fashion to one of the output splits. </param>
        /// <exception cref="IOException"> If there is a low-level I/O error </exception>
        public virtual void Split(LuceneVersion version, IndexReader @in, Store.Directory[] outputs, bool seq)
        {
            if (outputs == null || outputs.Length < 2)
            {
                throw new IOException("Invalid number of outputs.");
            }
            if (@in == null || @in.NumDocs < 2)
            {
                throw new IOException("Not enough documents for splitting");
            }
            int numParts = outputs.Length;
            // wrap a potentially read-only input
            // this way we don't have to preserve original deletions because neither
            // deleteDocument(int) or undeleteAll() is applied to the wrapped input index.
            FakeDeleteIndexReader input = new FakeDeleteIndexReader(@in);
            int maxDoc  = input.MaxDoc;
            int partLen = maxDoc / numParts;

            for (int i = 0; i < numParts; i++)
            {
                input.UndeleteAll();
                if (seq) // sequential range
                {
                    int lo = partLen * i;
                    int hi = lo + partLen;
                    // below range
                    for (int j = 0; j < lo; j++)
                    {
                        input.DeleteDocument(j);
                    }
                    // above range - last part collects all id-s that remained due to
                    // integer rounding errors
                    if (i < numParts - 1)
                    {
                        for (int j = hi; j < maxDoc; j++)
                        {
                            input.DeleteDocument(j);
                        }
                    }
                }
                else
                {
                    // round-robin
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if ((j + numParts - i) % numParts != 0)
                        {
                            input.DeleteDocument(j);
                        }
                    }
                }
                using (IndexWriter w = new IndexWriter(outputs[i],
                                                       new IndexWriterConfig(version, null)
                {
                    OpenMode = OpenMode.CREATE
                }))
                {
                    Console.Error.WriteLine("Writing part " + (i + 1) + " ...");
                    // pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date
                    IList <IndexReader> sr = input.GetSequentialSubReaders();
                    w.AddIndexes(sr.ToArray()); // TODO: maybe take List<IR> here?
                }
            }
            Console.Error.WriteLine("Done.");
        }
Esempio n. 2
0
        /// <summary>
        /// Split source index into multiple parts. </summary>
        /// <param name="in"> source index, can have deletions, can have
        /// multiple segments (or multiple readers). </param>
        /// <param name="outputs"> list of directories where the output parts will be stored. </param>
        /// <param name="seq"> if true, then the source index will be split into equal
        /// increasing ranges of document id-s. If false, source document id-s will be
        /// assigned in a deterministic round-robin fashion to one of the output splits. </param>
        /// <exception cref="IOException"> If there is a low-level I/O error </exception>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void split(org.apache.lucene.util.Version version, IndexReader in, org.apache.lucene.store.Directory[] outputs, boolean seq) throws java.io.IOException
        public virtual void Split(Version version, IndexReader @in, Directory[] outputs, bool seq)
        {
            if (outputs == null || outputs.Length < 2)
            {
                throw new IOException("Invalid number of outputs.");
            }
            if (@in == null || @in.numDocs() < 2)
            {
                throw new IOException("Not enough documents for splitting");
            }
            int numParts = outputs.Length;
            // wrap a potentially read-only input
            // this way we don't have to preserve original deletions because neither
            // deleteDocument(int) or undeleteAll() is applied to the wrapped input index.
            FakeDeleteIndexReader input = new FakeDeleteIndexReader(@in);
            int maxDoc  = input.maxDoc();
            int partLen = maxDoc / numParts;

            for (int i = 0; i < numParts; i++)
            {
                input.undeleteAll();
                if (seq)   // sequential range
                {
                    int lo = partLen * i;
                    int hi = lo + partLen;
                    // below range
                    for (int j = 0; j < lo; j++)
                    {
                        input.deleteDocument(j);
                    }
                    // above range - last part collects all id-s that remained due to
                    // integer rounding errors
                    if (i < numParts - 1)
                    {
                        for (int j = hi; j < maxDoc; j++)
                        {
                            input.deleteDocument(j);
                        }
                    }
                }
                else
                {
                    // round-robin
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if ((j + numParts - i) % numParts != 0)
                        {
                            input.deleteDocument(j);
                        }
                    }
                }
                IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(version, null)
                                                .setOpenMode(OpenMode.CREATE));
                Console.Error.WriteLine("Writing part " + (i + 1) + " ...");
                // pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.List<? extends FakeDeleteAtomicIndexReader> sr = input.getSequentialSubReaders();
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
                IList <?> sr = input.SequentialSubReaders;
                w.addIndexes(sr.ToArray());   // TODO: maybe take List<IR> here?
                w.close();
            }
            Console.Error.WriteLine("Done.");
        }