/// <summary> /// Split source index into multiple parts. </summary> /// <param name="version">lucene compatibility version</param> /// <param name="in"> source index, can have deletions, can have /// multiple segments (or multiple readers). </param> /// <param name="outputs"> list of directories where the output parts will be stored. </param> /// <param name="seq"> if true, then the source index will be split into equal /// increasing ranges of document id-s. If false, source document id-s will be /// assigned in a deterministic round-robin fashion to one of the output splits. </param> /// <exception cref="IOException"> If there is a low-level I/O error </exception> public virtual void Split(LuceneVersion version, IndexReader @in, Store.Directory[] outputs, bool seq) { if (outputs == null || outputs.Length < 2) { throw new IOException("Invalid number of outputs."); } if (@in == null || @in.NumDocs < 2) { throw new IOException("Not enough documents for splitting"); } int numParts = outputs.Length; // wrap a potentially read-only input // this way we don't have to preserve original deletions because neither // deleteDocument(int) or undeleteAll() is applied to the wrapped input index. FakeDeleteIndexReader input = new FakeDeleteIndexReader(@in); int maxDoc = input.MaxDoc; int partLen = maxDoc / numParts; for (int i = 0; i < numParts; i++) { input.UndeleteAll(); if (seq) // sequential range { int lo = partLen * i; int hi = lo + partLen; // below range for (int j = 0; j < lo; j++) { input.DeleteDocument(j); } // above range - last part collects all id-s that remained due to // integer rounding errors if (i < numParts - 1) { for (int j = hi; j < maxDoc; j++) { input.DeleteDocument(j); } } } else { // round-robin for (int j = 0; j < maxDoc; j++) { if ((j + numParts - i) % numParts != 0) { input.DeleteDocument(j); } } } using (IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(version, null) { OpenMode = OpenMode.CREATE })) { Console.Error.WriteLine("Writing part " + (i + 1) + " ..."); // pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date IList <IndexReader> sr = input.GetSequentialSubReaders(); w.AddIndexes(sr.ToArray()); // TODO: maybe take List<IR> here? } } Console.Error.WriteLine("Done."); }
/// <summary> /// Split source index into multiple parts. </summary> /// <param name="in"> source index, can have deletions, can have /// multiple segments (or multiple readers). </param> /// <param name="outputs"> list of directories where the output parts will be stored. </param> /// <param name="seq"> if true, then the source index will be split into equal /// increasing ranges of document id-s. If false, source document id-s will be /// assigned in a deterministic round-robin fashion to one of the output splits. </param> /// <exception cref="IOException"> If there is a low-level I/O error </exception> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void split(org.apache.lucene.util.Version version, IndexReader in, org.apache.lucene.store.Directory[] outputs, boolean seq) throws java.io.IOException public virtual void Split(Version version, IndexReader @in, Directory[] outputs, bool seq) { if (outputs == null || outputs.Length < 2) { throw new IOException("Invalid number of outputs."); } if (@in == null || @in.numDocs() < 2) { throw new IOException("Not enough documents for splitting"); } int numParts = outputs.Length; // wrap a potentially read-only input // this way we don't have to preserve original deletions because neither // deleteDocument(int) or undeleteAll() is applied to the wrapped input index. FakeDeleteIndexReader input = new FakeDeleteIndexReader(@in); int maxDoc = input.maxDoc(); int partLen = maxDoc / numParts; for (int i = 0; i < numParts; i++) { input.undeleteAll(); if (seq) // sequential range { int lo = partLen * i; int hi = lo + partLen; // below range for (int j = 0; j < lo; j++) { input.deleteDocument(j); } // above range - last part collects all id-s that remained due to // integer rounding errors if (i < numParts - 1) { for (int j = hi; j < maxDoc; j++) { input.deleteDocument(j); } } } else { // round-robin for (int j = 0; j < maxDoc; j++) { if ((j + numParts - i) % numParts != 0) { input.deleteDocument(j); } } } IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(version, null) .setOpenMode(OpenMode.CREATE)); Console.Error.WriteLine("Writing part " + (i + 1) + " ..."); // pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.List<? extends FakeDeleteAtomicIndexReader> sr = input.getSequentialSubReaders(); //JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: IList <?> sr = input.SequentialSubReaders; w.addIndexes(sr.ToArray()); // TODO: maybe take List<IR> here? w.close(); } Console.Error.WriteLine("Done."); }