public string ScrapeURLToFile(string url, bool force_download = false, Dictionary <string, string> additional_headers = null) { string cache_key = StreamMD5.FromText(url); string directory = Path.GetFullPath(Path.Combine(base_directory, cache_key.Substring(0, 2))); string filename = Path.GetFullPath(Path.Combine(directory, cache_key)); if (!File.Exists(filename) || force_download) { Utilities.Files.DirectoryTools.CreateDirectory(directory); // Crude throttle if (true) { while (DateTime.UtcNow.Subtract(last_scrape_time).TotalMilliseconds < throttle_ms) { Thread.Sleep(50); } last_scrape_time = DateTime.UtcNow; } Logging.Info("Downloading from {0}", url); using (WebClient client = new WebClient()) { if (null != additional_headers) { foreach (var pair in additional_headers) { client.Headers.Add(pair.Key, pair.Value); } } if (!String.IsNullOrEmpty(userAgent)) { client.Headers.Add("User-agent", userAgent); } string temp_filename = filename + ".tmp"; try { client.DownloadFile(url, temp_filename); } catch (WebException ex) { File.WriteAllText(temp_filename, ex.ToString()); } File.Delete(filename); File.Move(temp_filename, filename); } string filename_manifest = Path.GetFullPath(Path.Combine(base_directory, @"manifest.txt")); string manifest_line = String.Format("{0}\t{1}", cache_key, url); using (StreamWriter sw = File.AppendText(filename_manifest)) { sw.WriteLine(manifest_line); } } return(filename); }
public override StreamWriter AppendText(string path) { return(AfsFile.AppendText(path)); }