コード例 #1
0
        private static void SaveDatabase(string location, IpdbDatabase database)
        {
            JsonSerializer serializer = new JsonSerializer();

            serializer.Converters.Add(new Newtonsoft.Json.Converters.StringEnumConverter());
            serializer.NullValueHandling = NullValueHandling.Ignore;
            serializer.Formatting        = Formatting.Indented;
            //serializer.Error += Serializer_Error; //Ignore errors
            using (StreamWriter sw = new StreamWriter(location, false))
                using (JsonWriter writer = new JsonTextWriter(sw))
                {
                    serializer.Serialize(writer, database);
                }
        }
コード例 #2
0
ファイル: IpdbScraper.cs プロジェクト: xantari/Ipdb.Database
        public IpdbDatabase ScrapeAllResume(IpdbDatabase database, string incrementalSaveLocation, int start = 1, int end = 10000)
        {
            if (database == null)
            {
                throw new Exception("You must resume from an existing database");
            }
            Log.Information("{Scraper}: Beginning Scrape All. Start: {start} End: {end}...", _scraperName, start, end);
            var model = database;
            int maxThresholdOfNullsBeforeQuit = 300;
            int thresholdBeforeQuitCounter    = 0;

            for (int i = start; i < end; i++)
            {
                var result = Scrape(i);
                if (result != null)
                {
                    model.Data.Add(result);
                    thresholdBeforeQuitCounter = 0; //Reset since we finally found a valid machine id
                }
                else
                {
                    thresholdBeforeQuitCounter++;
                }

                if (thresholdBeforeQuitCounter > maxThresholdOfNullsBeforeQuit)
                {
                    Log.Information("{Scraper}: Reached maximum threshold of invalid machine id's not returning results. Quiting...", _scraperName);
                    break; //Reached to many invalid machines, quit.
                }

                if (i % 50 == 0 && !string.IsNullOrEmpty(incrementalSaveLocation)) //Every 50 entries save where we are at so we can resume if errors occur
                {
                    Log.Information("{Scraper}: Reached incremental save threshold. Saving where we are at so far.", _scraperName);
                    JsonSerializer serializer = new JsonSerializer();
                    serializer.Converters.Add(new Newtonsoft.Json.Converters.StringEnumConverter());
                    serializer.NullValueHandling = NullValueHandling.Ignore;
                    serializer.Formatting        = Formatting.Indented;
                    //serializer.Error += Serializer_Error; //Ignore errors
                    using (StreamWriter sw = new StreamWriter(incrementalSaveLocation, false))
                        using (JsonWriter writer = new JsonTextWriter(sw))
                        {
                            serializer.Serialize(writer, model);
                        }
                }
            }
            Log.Information("{Scraper}: Finished Scrape All. Start: {start} End: {end}...", _scraperName, start, end);
            return(model);
        }
コード例 #3
0
        static void Main(string[] args)
        {
            //var env = Environment.GetEnvironmentVariable("NETCORE_ENVIRONMENT");
            var builder = new ConfigurationBuilder()
                          .AddJsonFile($"appsettings.json", true, true)
                          //.AddJsonFile($"appsettings.{env}.json", true, true)
                          .AddEnvironmentVariables();
            var config = builder.Build();

            Log.Logger = new LoggerConfiguration()
                         .WriteTo.File("Log\\Log-.txt", rollingInterval: RollingInterval.Day)
                         .WriteTo.Console()
                         .CreateLogger();

            Log.Information("Starting IPDB Database Creator.");

            UserAgentStrings.InitializeList("Data\\UserAgentStrings.txt");

            Log.Information("User Agent strings initialized.");

            var database = new IpdbDatabase();
            var scraper  = new IpdbScraper();

            scraper.EnableRandomSleepTime = false; //Try do it as fast as possible.

            var cfg = config.Get <AppSettings>();

            try
            {
                if (args.Any(p => p.Contains("-resume")))
                {
                    if (File.Exists(cfg.TempFileLocation))
                    {
                        database = JsonConvert.DeserializeObject <IpdbDatabase>(File.ReadAllText(cfg.TempFileLocation));
                        //Find where we left off.
                        var lastIpdb = database.Data.Max(c => c.IpdbId) + 1;
                        database = scraper.ScrapeAllResume(database, cfg.TempFileLocation, lastIpdb, 10000);
                    }
                    else
                    {
                        Log.Information("Temp file not found: {file}", cfg.TempFileLocation);
                        return;
                    }

                    SaveDatabase(cfg.FinalFileLocation, database);

                    //Delete the temp file
                    if (File.Exists(cfg.TempFileLocation))
                    {
                        File.Delete(cfg.TempFileLocation);
                    }
                }
                else //Full scraping
                {
                    database = scraper.ScrapeAll(cfg.TempFileLocation);

                    SaveDatabase(cfg.FinalFileLocation, database);

                    //Delete the temp file
                    if (File.Exists(cfg.TempFileLocation))
                    {
                        File.Delete(cfg.TempFileLocation);
                    }
                }
            }
            catch (Exception ex)
            {
                Log.Error("{error}", ex);
                throw ex;
            }

            Log.Information("Scraping Finished.");
        }