Ejemplo n.º 1
0
        public void TestSplitRR()
        {
            MultiPassIndexSplitter splitter = new MultiPassIndexSplitter();

            Directory[] dirs = new Directory[] {
                NewDirectory(),
                NewDirectory(),
                NewDirectory()
            };
            try
            {
                splitter.Split(TEST_VERSION_CURRENT, input, dirs, false);
                Document    doc;
                TermsEnum   te;
                IndexReader ir;
                using (ir = DirectoryReader.Open(dirs[0]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1); // rounding error
                    doc = ir.Document(0);
                    assertEquals("0", doc.Get("id"));
                    te = MultiFields.GetTerms(ir, "id").GetIterator(null);
                    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("1")));
                    assertNotSame("1", te.Term.Utf8ToString());
                }
                using (ir = DirectoryReader.Open(dirs[1]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals("1", doc.Get("id"));
                    te = MultiFields.GetTerms(ir, "id").GetIterator(null);
                    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("0")));

                    assertNotSame("0", te.Term.Utf8ToString());
                }
                using (ir = DirectoryReader.Open(dirs[2]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals("2", doc.Get("id"));

                    te = MultiFields.GetTerms(ir, "id").GetIterator(null);
                    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("1")));
                    assertNotSame("1", te.Term);

                    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef("0")));
                    assertNotSame("0", te.Term.Utf8ToString());
                }
            }
            finally
            {
                foreach (Directory d in dirs)
                {
                    d.Dispose();
                }
            }
        }
Ejemplo n.º 2
0
        public void TestSplitSeq()
        {
            MultiPassIndexSplitter splitter = new MultiPassIndexSplitter();

            Directory[] dirs = new Directory[] {
                NewDirectory(),
                NewDirectory(),
                NewDirectory()
            };
            try
            {
                splitter.Split(TEST_VERSION_CURRENT, input, dirs, true);
                Document    doc;
                int         start;
                IndexReader ir;
                using (ir = DirectoryReader.Open(dirs[0]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals("0", doc.Get("id"));
                    start = ir.NumDocs;
                }
                using (ir = DirectoryReader.Open(dirs[1]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals(start + "", doc.Get("id"));
                    start += ir.NumDocs;
                }
                using (ir = DirectoryReader.Open(dirs[2]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals(start + "", doc.Get("id"));
                    // make sure the deleted doc is not here
                    TermsEnum te = MultiFields.GetTerms(ir, "id").GetIterator(null);
                    Term      t  = new Term("id", (NUM_DOCS - 1) + "");
                    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef(t.Text())));
                    assertNotSame(t.Text(), te.Term.Utf8ToString());
                }
            }
            finally
            {
                foreach (Directory d in dirs)
                {
                    d.Dispose();
                }
            }
        }
Ejemplo n.º 3
0
        public static void Main(string[] args)
        {
            if (args.Length < 5)
            {
                Console.Error.WriteLine("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]");
                Console.Error.WriteLine("\tinputIndex\tpath to input index, multiple values are ok");
                Console.Error.WriteLine("\t-out ouputDir\tpath to output directory to contain partial indexes");
                Console.Error.WriteLine("\t-num numParts\tnumber of parts to produce");
                Console.Error.WriteLine("\t-seq\tsequential docid-range split (default is round-robin)");
                Environment.Exit(-1);
            }
            List <IndexReader> indexes = new List <IndexReader>();
            string             outDir  = null;
            int  numParts = -1;
            bool seq      = false;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].Equals("-out"))
                {
                    outDir = args[++i];
                }
                else if (args[i].Equals("-num"))
                {
                    numParts = Convert.ToInt32(args[++i]);
                }
                else if (args[i].Equals("-seq"))
                {
                    seq = true;
                }
                else
                {
                    DirectoryInfo file = new DirectoryInfo(args[i]);
                    if (!file.Exists)
                    {
                        Console.Error.WriteLine("Invalid input path - skipping: " + file);
                        continue;
                    }
                    Store.Directory dir = FSDirectory.Open(new DirectoryInfo(args[i]));
                    try
                    {
                        if (!DirectoryReader.IndexExists(dir))
                        {
                            Console.Error.WriteLine("Invalid input index - skipping: " + file);
                            continue;
                        }
                    }
                    catch (Exception)
                    {
                        Console.Error.WriteLine("Invalid input index - skipping: " + file);
                        continue;
                    }
                    indexes.Add(DirectoryReader.Open(dir));
                }
            }
            if (outDir == null)
            {
                throw new Exception("Required argument missing: -out outputDir");
            }
            if (numParts < 2)
            {
                throw new Exception("Invalid value of required argument: -num numParts");
            }
            if (indexes.Count == 0)
            {
                throw new Exception("No input indexes to process");
            }
            DirectoryInfo @out = new DirectoryInfo(outDir);

            @out.Create();
            if (!new DirectoryInfo(outDir).Exists)
            {
                throw new Exception("Can't create output directory: " + @out);
            }
            Store.Directory[] dirs = new Store.Directory[numParts];
            for (int i = 0; i < numParts; i++)
            {
                dirs[i] = FSDirectory.Open(new DirectoryInfo(Path.Combine(@out.FullName, "part-" + i)));
            }
            MultiPassIndexSplitter splitter = new MultiPassIndexSplitter();
            IndexReader            input;

            if (indexes.Count == 1)
            {
                input = indexes[0];
            }
            else
            {
                input = new MultiReader(indexes.ToArray());
            }
#pragma warning disable 612, 618
            splitter.Split(LuceneVersion.LUCENE_CURRENT, input, dirs, seq);
#pragma warning restore 612, 618
        }