Пример #1
0
        public static void Main(string[] args)
        {
            FileInfo      wikipedia         = null;
            DirectoryInfo outputDir         = new DirectoryInfo("./enwiki");
            bool          keepImageOnlyDocs = true;

            for (int i = 0; i < args.Length; i++)
            {
                string arg = args[i];
                if (arg.Equals("--input", StringComparison.Ordinal) || arg.Equals("-i", StringComparison.Ordinal))
                {
                    wikipedia = new FileInfo(args[i + 1]);
                    i++;
                }
                else if (arg.Equals("--output", StringComparison.Ordinal) || arg.Equals("-o", StringComparison.Ordinal))
                {
                    outputDir = new DirectoryInfo(args[i + 1]);
                    i++;
                }
                else if (arg.Equals("--discardImageOnlyDocs", StringComparison.Ordinal) || arg.Equals("-d", StringComparison.Ordinal))
                {
                    keepImageOnlyDocs = false;
                }
            }

            IDictionary <string, string> properties = new Dictionary <string, string>
            {
                ["docs.file"] = wikipedia.FullName,
                ["content.source.forever"] = "false",
                ["keep.image.only.docs"]   = keepImageOnlyDocs.ToString()
            };
            Config config = new Config(properties);

            ContentSource source = new EnwikiContentSource();

            source.SetConfig(config);

            DocMaker docMaker = new DocMaker();

            docMaker.SetConfig(config, source);
            docMaker.ResetInputs();
            if (wikipedia.Exists)
            {
                Console.WriteLine("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");
                outputDir.Create();
                ExtractWikipedia extractor = new ExtractWikipedia(docMaker, outputDir);
                extractor.Extract();
            }
            else
            {
                // LUCENENET specific - our wrapper console shows correct usage
                throw new ArgumentException();
                //PrintUsage();
            }
        }
Пример #2
0
        // constructor
        public PerfRunData(Config config)
        {
            this.config = config;
            // analyzer (default is standard analyzer)
            analyzer = NewAnalyzerTask.CreateAnalyzer(config.Get("analyzer",
                                                                 typeof(Lucene.Net.Analysis.Standard.StandardAnalyzer).AssemblyQualifiedName));

            // content source
            string sourceClass = config.Get("content.source", typeof(SingleDocSource).AssemblyQualifiedName);

            contentSource = (ContentSource)Activator.CreateInstance(Type.GetType(sourceClass)); //Class.forName(sourceClass).asSubclass(typeof(ContentSource)).newInstance();
            contentSource.SetConfig(config);

            // doc maker
            docMaker = (DocMaker)Activator.CreateInstance(Type.GetType(config.Get("doc.maker", typeof(DocMaker).AssemblyQualifiedName)));  // "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
            docMaker.SetConfig(config, contentSource);
            // facet source
            facetSource = (FacetSource)Activator.CreateInstance(Type.GetType(config.Get("facet.source",
                                                                                        typeof(RandomFacetSource).AssemblyQualifiedName))); // "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
            facetSource.SetConfig(config);
            // query makers
            readTaskQueryMaker = new Dictionary <Type, IQueryMaker>();
            qmkrClass          = Type.GetType(config.Get("query.maker", typeof(SimpleQueryMaker).AssemblyQualifiedName));

            // index stuff
            Reinit(false);

            // statistic points
            points = new Points(config);

            if (bool.Parse(config.Get("log.queries", "false")))
            {
                Console.WriteLine("------------> queries:");
                Console.WriteLine(GetQueryMaker(new SearchTask(this)).PrintQueries());
            }
        }