public static void Main(string[] args) { FileInfo wikipedia = null; DirectoryInfo outputDir = new DirectoryInfo("./enwiki"); bool keepImageOnlyDocs = true; for (int i = 0; i < args.Length; i++) { string arg = args[i]; if (arg.Equals("--input", StringComparison.Ordinal) || arg.Equals("-i", StringComparison.Ordinal)) { wikipedia = new FileInfo(args[i + 1]); i++; } else if (arg.Equals("--output", StringComparison.Ordinal) || arg.Equals("-o", StringComparison.Ordinal)) { outputDir = new DirectoryInfo(args[i + 1]); i++; } else if (arg.Equals("--discardImageOnlyDocs", StringComparison.Ordinal) || arg.Equals("-d", StringComparison.Ordinal)) { keepImageOnlyDocs = false; } } IDictionary <string, string> properties = new Dictionary <string, string> { ["docs.file"] = wikipedia.FullName, ["content.source.forever"] = "false", ["keep.image.only.docs"] = keepImageOnlyDocs.ToString() }; Config config = new Config(properties); ContentSource source = new EnwikiContentSource(); source.SetConfig(config); DocMaker docMaker = new DocMaker(); docMaker.SetConfig(config, source); docMaker.ResetInputs(); if (wikipedia.Exists) { Console.WriteLine("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource"); outputDir.Create(); ExtractWikipedia extractor = new ExtractWikipedia(docMaker, outputDir); extractor.Extract(); } else { // LUCENENET specific - our wrapper console shows correct usage throw new ArgumentException(); //PrintUsage(); } }
// constructor public PerfRunData(Config config) { this.config = config; // analyzer (default is standard analyzer) analyzer = NewAnalyzerTask.CreateAnalyzer(config.Get("analyzer", typeof(Lucene.Net.Analysis.Standard.StandardAnalyzer).AssemblyQualifiedName)); // content source string sourceClass = config.Get("content.source", typeof(SingleDocSource).AssemblyQualifiedName); contentSource = (ContentSource)Activator.CreateInstance(Type.GetType(sourceClass)); //Class.forName(sourceClass).asSubclass(typeof(ContentSource)).newInstance(); contentSource.SetConfig(config); // doc maker docMaker = (DocMaker)Activator.CreateInstance(Type.GetType(config.Get("doc.maker", typeof(DocMaker).AssemblyQualifiedName))); // "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance(); docMaker.SetConfig(config, contentSource); // facet source facetSource = (FacetSource)Activator.CreateInstance(Type.GetType(config.Get("facet.source", typeof(RandomFacetSource).AssemblyQualifiedName))); // "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance(); facetSource.SetConfig(config); // query makers readTaskQueryMaker = new Dictionary <Type, IQueryMaker>(); qmkrClass = Type.GetType(config.Get("query.maker", typeof(SimpleQueryMaker).AssemblyQualifiedName)); // index stuff Reinit(false); // statistic points points = new Points(config); if (bool.Parse(config.Get("log.queries", "false"))) { Console.WriteLine("------------> queries:"); Console.WriteLine(GetQueryMaker(new SearchTask(this)).PrintQueries()); } }