/// <summary>
        /// Scrapes html page for image and text data
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public ActionResult Scrape(string url)
        {
            Scraper scraper = new Scraper(url);

            if (scraper.Loaded)
            {
                scraper.Execute();
            }

            return(scraper.DocInfo != null
                ? View("~/Views/UrlScraper/Results.cshtml", scraper.DocInfo)
                : null);
        }
Beispiel #2
0
        private static async Task Main(string[] args)
        {
            if (File.Exists(ConfigPath))
            {
                Xml.LoadConfig(ConfigPath);
            }

            string argInput;

            try
            {
                argInput = args[0];
            }
            catch { argInput = null; }

            string        dmPath;
            DirectoryInfo chatDir;

PathInput:
            Console.Clear();

            if (argInput == null)
            {
                Logger.Print("Path of files to be scraped: ", LogType.Info, false);
                dmPath = Console.ReadLine();
            }
            else
            {
                dmPath = argInput;
            }


            if (!Directory.Exists(dmPath))
            {
                Console.Clear();
                Logger.Print("Path was not found.", LogType.Error);
                Thread.Sleep(1000);
                goto PathInput;
            }
            else
            {
                chatDir = new DirectoryInfo(dmPath);
            }

            Console.Clear();

            Logger.Print("Press any key to start downloading...", LogType.Info);
            Console.Read();

            var downloader = new Downloader();

            _scraper = new Scraper(downloader, chatDir, dmPath);

            await _scraper.Execute();

            Console.WriteLine();

            Logger.Print($"{DateTime.Now} | Finished scraping and downloading all links and files!", LogType.Info);

            Console.ReadLine();
        }
Beispiel #3
0
        /// <summary>
        /// main console start
        /// </summary>
        /// <param name="args"></param>
        internal static void Main(string[] args)
        {
            //data map file
            string dataMapFile = "";

            //data output file
            string dataOutFile = "";

            //page download delay
            int delay = 0;

            Console.WriteLine("Welcome to Cidean's WebScraper.");
            Console.WriteLine(LineDivider);
            Console.WriteLine("Please use this application responsibly and respect all copyrighted material.");

            try
            {
                //Check for command arguments, required for continuing
                if (args.Length == 0)
                {
                    throw new ArgumentNullException("No arguments set.");
                }

                //argument index for looping
                int argsIndex = 0;

                //grab command arguments and parameters
                while (true)
                {
                    switch (args[argsIndex].ToLower())
                    {
                    //data map file command
                    case "-m":
                        if (((args.Length) > (argsIndex)) && (!args[argsIndex + 1].StartsWith("-")))
                        {
                            dataMapFile = args[argsIndex + 1];
                            Console.WriteLine("Data Map File: " + dataMapFile);
                            argsIndex++;
                        }
                        break;

                    //data output file command
                    case "-o":
                        if (((args.Length) > (argsIndex)) && (!args[argsIndex + 1].StartsWith("-")))
                        {
                            dataOutFile = args[argsIndex + 1];
                            Console.WriteLine("Data Output File: " + dataOutFile);
                            argsIndex++;
                        }
                        break;

                    //delay between page downloads
                    case "-d":
                        if (((args.Length) > (argsIndex)) && (!args[argsIndex + 1].StartsWith("-")))
                        {
                            if (!int.TryParse(args[argsIndex + 1], out delay))
                            {
                                throw new ArgumentException("Delay is not an number.");
                            }
                            Console.WriteLine("Data Output File: " + dataOutFile);
                            argsIndex++;
                        }
                        break;

                    case "-r":    //test: remove
                        break;

                    default:
                        //bad argument
                        throw new ArgumentException("Unknown argument " + args[argsIndex]);
                    }

                    argsIndex++;
                    //check if all arguments have been processed.
                    if (argsIndex >= args.Length)
                    {
                        break;
                    }
                }

                if (string.IsNullOrEmpty(dataMapFile) || string.IsNullOrEmpty(dataOutFile))
                {
                    throw new ArgumentNullException("Missing Datamap file or Output file.");
                }

                //set file to relative path
                dataMapFile = Path.Combine(baseDirectory, dataMapFile);

                dataOutFile = Path.Combine(baseDirectory, dataOutFile);

                //initialize web scraper and load data map
                Scraper scraper = new Scraper();
                scraper.LoggedEvent += Scraper_LoggedEvent;
                scraper.Delay        = delay;

                //create datamap from xml file
                DataMap map = DataMap.LoadFile(dataMapFile);
                Console.WriteLine("Data map {0} loaded successfully.", dataMapFile);

                //execute webscraping
                scraper.Execute(map, Path.Combine(baseDirectory, dataOutFile));
            }
            catch (Exception ex)
            {
                //arguments didn't work out
                Console.WriteLine(ex.Message);
                Console.WriteLine("Exiting WebScraper Runner.");
                Console.ReadKey();
                return;
            }
            finally
            {
                //exit application
                Console.WriteLine("Press any key to exit.");
                Console.ReadKey();
            }
        }