public string ScrapeURLToFile(string url, bool force_download = false, Dictionary <string, string> additional_headers = null)
        {
            string cache_key = StreamMD5.FromText(url);
            string directory = Path.GetFullPath(Path.Combine(base_directory, cache_key.Substring(0, 2)));
            string filename  = Path.GetFullPath(Path.Combine(directory, cache_key));

            if (!File.Exists(filename) || force_download)
            {
                Utilities.Files.DirectoryTools.CreateDirectory(directory);

                // Crude throttle
                if (true)
                {
                    while (DateTime.UtcNow.Subtract(last_scrape_time).TotalMilliseconds < throttle_ms)
                    {
                        Thread.Sleep(50);
                    }
                    last_scrape_time = DateTime.UtcNow;
                }

                Logging.Info("Downloading from {0}", url);
                using (WebClient client = new WebClient())
                {
                    if (null != additional_headers)
                    {
                        foreach (var pair in additional_headers)
                        {
                            client.Headers.Add(pair.Key, pair.Value);
                        }
                    }
                    if (!String.IsNullOrEmpty(userAgent))
                    {
                        client.Headers.Add("User-agent", userAgent);
                    }

                    string temp_filename = filename + ".tmp";
                    try
                    {
                        client.DownloadFile(url, temp_filename);
                    }
                    catch (WebException ex)
                    {
                        File.WriteAllText(temp_filename, ex.ToString());
                    }

                    File.Delete(filename);
                    File.Move(temp_filename, filename);
                }

                string filename_manifest = Path.GetFullPath(Path.Combine(base_directory, @"manifest.txt"));
                string manifest_line     = String.Format("{0}\t{1}", cache_key, url);
                using (StreamWriter sw = File.AppendText(filename_manifest))
                {
                    sw.WriteLine(manifest_line);
                }
            }

            return(filename);
        }
Beispiel #2
0
 public override StreamWriter AppendText(string path)
 {
     return(AfsFile.AppendText(path));
 }