public void TestSplitRR() { MultiPassIndexSplitter splitter = new MultiPassIndexSplitter(); Directory[] dirs = new Directory[] { NewDirectory(), NewDirectory(), NewDirectory() }; try { splitter.Split(TEST_VERSION_CURRENT, input, dirs, false); Document doc; TermsEnum te; IndexReader ir; using (ir = DirectoryReader.Open(dirs[0])) { assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1); // rounding error doc = ir.Document(0); assertEquals("0", doc.Get("id")); te = MultiFields.GetTerms(ir, "id").GetIterator(null); assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("1"))); assertNotSame("1", te.Term.Utf8ToString()); } using (ir = DirectoryReader.Open(dirs[1])) { assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1); doc = ir.Document(0); assertEquals("1", doc.Get("id")); te = MultiFields.GetTerms(ir, "id").GetIterator(null); assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("0"))); assertNotSame("0", te.Term.Utf8ToString()); } using (ir = DirectoryReader.Open(dirs[2])) { assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1); doc = ir.Document(0); assertEquals("2", doc.Get("id")); te = MultiFields.GetTerms(ir, "id").GetIterator(null); assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("1"))); assertNotSame("1", te.Term); assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("0"))); assertNotSame("0", te.Term.Utf8ToString()); } } finally { foreach (Directory d in dirs) { d.Dispose(); } } }
public void TestSplitSeq() { MultiPassIndexSplitter splitter = new MultiPassIndexSplitter(); Directory[] dirs = new Directory[] { NewDirectory(), NewDirectory(), NewDirectory() }; try { splitter.Split(TEST_VERSION_CURRENT, input, dirs, true); Document doc; int start; IndexReader ir; using (ir = DirectoryReader.Open(dirs[0])) { assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1); doc = ir.Document(0); assertEquals("0", doc.Get("id")); start = ir.NumDocs; } using (ir = DirectoryReader.Open(dirs[1])) { assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1); doc = ir.Document(0); assertEquals(start + "", doc.Get("id")); start += ir.NumDocs; } using (ir = DirectoryReader.Open(dirs[2])) { assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1); doc = ir.Document(0); assertEquals(start + "", doc.Get("id")); // make sure the deleted doc is not here TermsEnum te = MultiFields.GetTerms(ir, "id").GetIterator(null); Term t = new Term("id", (NUM_DOCS - 1) + ""); assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef(t.Text()))); assertNotSame(t.Text(), te.Term.Utf8ToString()); } } finally { foreach (Directory d in dirs) { d.Dispose(); } } }
public static void Main(string[] args) { if (args.Length < 5) { Console.Error.WriteLine("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]"); Console.Error.WriteLine("\tinputIndex\tpath to input index, multiple values are ok"); Console.Error.WriteLine("\t-out ouputDir\tpath to output directory to contain partial indexes"); Console.Error.WriteLine("\t-num numParts\tnumber of parts to produce"); Console.Error.WriteLine("\t-seq\tsequential docid-range split (default is round-robin)"); Environment.Exit(-1); } List <IndexReader> indexes = new List <IndexReader>(); string outDir = null; int numParts = -1; bool seq = false; for (int i = 0; i < args.Length; i++) { if (args[i].Equals("-out")) { outDir = args[++i]; } else if (args[i].Equals("-num")) { numParts = Convert.ToInt32(args[++i]); } else if (args[i].Equals("-seq")) { seq = true; } else { DirectoryInfo file = new DirectoryInfo(args[i]); if (!file.Exists) { Console.Error.WriteLine("Invalid input path - skipping: " + file); continue; } Store.Directory dir = FSDirectory.Open(new DirectoryInfo(args[i])); try { if (!DirectoryReader.IndexExists(dir)) { Console.Error.WriteLine("Invalid input index - skipping: " + file); continue; } } catch (Exception) { Console.Error.WriteLine("Invalid input index - skipping: " + file); continue; } indexes.Add(DirectoryReader.Open(dir)); } } if (outDir == null) { throw new Exception("Required argument missing: -out outputDir"); } if (numParts < 2) { throw new Exception("Invalid value of required argument: -num numParts"); } if (indexes.Count == 0) { throw new Exception("No input indexes to process"); } DirectoryInfo @out = new DirectoryInfo(outDir); @out.Create(); if (!new DirectoryInfo(outDir).Exists) { throw new Exception("Can't create output directory: " + @out); } Store.Directory[] dirs = new Store.Directory[numParts]; for (int i = 0; i < numParts; i++) { dirs[i] = FSDirectory.Open(new DirectoryInfo(Path.Combine(@out.FullName, "part-" + i))); } MultiPassIndexSplitter splitter = new MultiPassIndexSplitter(); IndexReader input; if (indexes.Count == 1) { input = indexes[0]; } else { input = new MultiReader(indexes.ToArray()); } #pragma warning disable 612, 618 splitter.Split(LuceneVersion.LUCENE_CURRENT, input, dirs, seq); #pragma warning restore 612, 618 }