Exemplo n.º 1
0
        public QuickDemo()
        {
            Console.Write("INIT...");
            CorpusExplorerEcosystem.InitializeMinimal();
            if (!Directory.Exists("corpus"))
            {
                Directory.CreateDirectory("corpus");
            }
            InitializeComponent();
#if DEBUG
            if (!File.Exists(_corpusPath))
            {
                _corpusPath = "W:/eBooks-MFB/" + _corpusPath;
            }
#endif

            if (File.Exists(_corpusPath))
            {
                _quickIndex = new QuickIndex(_corpusPath);
                _dict       = Serializer.Deserialize <Dictionary <Guid, string> >("corpus/data.bin");
            }
            Console.WriteLine("OK!");
        }
Exemplo n.º 2
0
        private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
        {
            try
            {
                btn_index_delete.Invoke((MethodInvoker) delegate { btn_index_delete.Enabled = false; });
                btn_update.Invoke((MethodInvoker) delegate { btn_update.Enabled = false; });

                var files = Directory.GetFiles(_rootPath, "*.pdf", SearchOption.AllDirectories);

                progressBar1.Invoke((MethodInvoker) delegate
                {
                    progressBar1.Maximum = 5;
                    progressBar1.Minimum = 0;
                    progressBar1.Value   = 1;
                    Console.WriteLine($"1 / 5 = Read {files.Length} files");
                });

                var scraper = new TextSharpPdfScraper {
                    Strategy = TextSharpPdfScraper.TextSharpPdfScraperStrategy.Location
                };
                scraper.Input.Enqueue(files);
                scraper.Execute();

                progressBar1.Invoke((MethodInvoker) delegate
                {
                    progressBar1.Maximum = 5;
                    progressBar1.Minimum = 0;
                    progressBar1.Value   = 2;
                    Console.WriteLine($"2 / 5 = Cleaning");
                });

                // no cleanup

                progressBar1.Invoke((MethodInvoker) delegate
                {
                    progressBar1.Maximum = 5;
                    progressBar1.Minimum = 0;
                    progressBar1.Value   = 3;
                    Console.WriteLine($"3 / 5 = Tagging");
                });

                var tagger = new RawTextTagger();
                tagger.Input = scraper.Output;
                tagger.Execute();
                var corpus = tagger.Output.First();
                corpus.Save(_corpusPath, false);
                _dict = corpus.DocumentMetadata.ToDictionary(x => x.Key, x => (string)x.Value["Datei"]);
                Serializer.Serialize(_dict, "corpus/data.bin", false);

                progressBar1.Invoke((MethodInvoker) delegate
                {
                    progressBar1.Maximum = 5;
                    progressBar1.Minimum = 0;
                    progressBar1.Value   = 4;
                    Console.WriteLine($"4 / 5 = Build QuickIndex");
                });

                _quickIndex = new QuickIndex(_corpusPath);

                btn_index_delete.Invoke((MethodInvoker) delegate { btn_index_delete.Enabled = true; });
                btn_update.Invoke((MethodInvoker) delegate { btn_index_delete.Enabled = true; });

                progressBar1.Invoke((MethodInvoker) delegate
                {
                    progressBar1.Maximum = 5;
                    progressBar1.Minimum = 0;
                    progressBar1.Value   = 4;
                    Console.WriteLine($"5 / 5 = COMPLETE!");
                });
            }
            catch (Exception ex)
            {
                Console.WriteLine("########--------########");
                Console.WriteLine(ex.Message);
                Console.WriteLine("--------########--------");
                Console.WriteLine(ex.StackTrace);
                Console.WriteLine("########--------########");
                Console.WriteLine();
            }
        }