public static List <GoogleScholarScrapePaper> ScrapeUrl(IWebProxy proxy, string url) { List <GoogleScholarScrapePaper> gssps = new List <GoogleScholarScrapePaper>(); MemoryStream ms = new MemoryStream(); try { WebHeaderCollection header_collection = new WebHeaderCollection(); UrlDownloader.DownloadWithBlocking(proxy, url, out ms, out header_collection); HtmlDocument doc = new HtmlDocument(); doc.Load(ms); ScrapeDoc(doc, url, gssps); } catch (Exception ex) { Logging.Error(ex, "There was a problem parsing the GoogleScholar url"); } finally { if (ms != null) { ms.Dispose(); } } return(gssps); }
private void PerformRequest(string url) { bool is_chat_available = false; try { MemoryStream ms; UrlDownloader.DownloadWithBlocking(ConfigurationManager.Instance.Proxy, url, out ms); ProcessDisplayResponse(ms); is_chat_available = true; } catch (Exception ex) { Logging.Warn(ex, "There was a problem communicating with chat."); next_autopoll_datetime = DateTime.UtcNow.AddMinutes(1); is_chat_available = false; } Application.Current.Dispatcher.BeginInvoke(new Action(() => { this.IsEnabled = is_chat_available; TxtChatUnavailable.Visibility = is_chat_available ? Visibility.Collapsed : Visibility.Visible; })); }
public YahooScraper(UrlDownloader downloader, ScrapeResultCallback callback) : base(downloader, callback) { this.initialPageNum = 0; base.Radius = 0f; this.allowsCategorylessSearch = true; this.campaign.SearchEngine = 4; }
private void PerformRequest(string url) { bool is_chat_available = false; try { using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(url)) { ProcessDisplayResponse(ms); } is_chat_available = true; } catch (Exception ex) { Logging.Warn(ex, "There was a problem communicating with chat. URL: {0}", url); next_autopoll_datetime = DateTime.UtcNow.AddMinutes(1); is_chat_available = false; } // make sure we're not in the process of shutting down Qiqqa for then the next code chunk will cause a CRASH: if (null != Application.Current && !Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown) { Application.Current.Dispatcher.BeginInvoke(new Action(() => { IsEnabled = is_chat_available; TxtChatUnavailable.Visibility = is_chat_available ? Visibility.Collapsed : Visibility.Visible; })); } else { Logging.Warn("Chat: detected Qiqqa shutting down."); } }
protected MapScrapeResult ParseDetailsAustralia(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Headline = base.ExtractValue(text, "<h1>(?<value>.*?)</h1>"); mapScrapeResult.Address = base.ExtractValue(text, "<div class=\"adr\">(?<value>.*?)</div>"); mapScrapeResult.Region = base.ExtractValue(mapScrapeResult.Address, "<span class=\"region\".*?>(?<value>.*?)</span>"); mapScrapeResult.City = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"locality\".*?>(?<value>.*?)</span>"); mapScrapeResult.ZipCode = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"postal-code\".*?>(?<value>.*?)</span>"); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Replace("\t", string.Empty).Replace("\n", string.Empty).Trim(); mapScrapeResult.Latitude = base.ExtractValue(text, "<span class=\"latitude\">(?<value>.*?)</span>"); mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\">(?<value>.*?)</span>"); mapScrapeResult.Map = string.Format("http://maps.yahoo.com/maps_result?ard=1&lat={0}&lon={1}&zoom=18", mapScrapeResult.Latitude, mapScrapeResult.Longitude); string text2 = base.ExtractValue(text, "<ul class=\"pageTools\">(?<value>.*?)</ul>"); mapScrapeResult.Website = base.ExtractValue(text2, "href=\"(?<value>.*?)\""); mapScrapeResult.Phone = base.ExtractValue(text2, "<span class=\"tl-phone-full\">(?<value>.*?)</span>").Trim(); mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); } return(mapScrapeResult); }
private void ManageDownload(BundleLibraryManifest manifest) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); string url = manifest.BaseUrl + @"/" + manifest.Id + Common.EXT_BUNDLE; using (UrlDownloader.DownloadAsyncTracker download_async_tracker = UrlDownloader.DownloadWithNonBlocking(url)) { string STATUS_TOKEN = "BundleDownload-" + manifest.Version; StatusManager.Instance.ClearCancelled(STATUS_TOKEN); while (!download_async_tracker.DownloadComplete) { if (ShutdownableManager.Instance.IsShuttingDown) { Logging.Error("Canceling download of Bundle Library due to signaled application shutdown"); StatusManager.Instance.SetCancelled(STATUS_TOKEN); } if (StatusManager.Instance.IsCancelled(STATUS_TOKEN)) { download_async_tracker.Cancel(); break; } StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Downloading Bundle Library...", download_async_tracker.ProgressPercentage, 100, true); ShutdownableManager.Sleep(3000); } // Check the reason for exiting if (download_async_tracker.DownloadDataCompletedEventArgs.Cancelled) { StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Cancelled download of Bundle Library."); } else if (null != download_async_tracker.DownloadDataCompletedEventArgs.Error) { MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library. Please try again later or contact {0} for more information.", manifest.SupportEmail); StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library."); } else if (null == download_async_tracker.DownloadDataCompletedEventArgs.Result) { MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library. Please try again later or contact {0} for more information.", manifest.SupportEmail); StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library."); } else { StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Completed download of Bundle Library."); if (MessageBoxes.AskQuestion("The Bundle Library named '{0}' has been downloaded. Do you want to install it now?", manifest.Title)) { LibraryBundleInstaller.Install(manifest, download_async_tracker.DownloadDataCompletedEventArgs.Result); } else { MessageBoxes.Warn("Not installing Bundle Library."); } } } }
internal static string GetRootStyleFilename(string style_xml_filename) { string parent_filename; string parent_url; if (IsDependentStyle(style_xml_filename, out parent_filename, out parent_url)) { // Check that we have the dependent style - if we don't prompt to download it string full_parent_filename = Path.GetFullPath(Path.Combine(Path.GetDirectoryName(style_xml_filename), parent_filename)); if (!File.Exists(full_parent_filename)) { string message = String.Format( "Can't find parent style for this dependent style" + "\n\n" + "Your style depends on a parent style named {0}, which needs to be saved in the same directory.\n\n" + "It appears to be available from {1}.\n" + "Shall we try to download it automatically? If you choose NO, Qiqqa will open the website for you so you can download it manually.", parent_filename, parent_url ); if (MessageBoxes.AskQuestion(message)) { try { using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(parent_url)) { File.WriteAllBytes(full_parent_filename, ms.ToArray()); } } catch (UnauthorizedAccessException ex) { Logging.Error(ex, "You don't seem to have permission to write the new style to the directory '{0}'.\nPlease copy the original style file '{1}' to a folder where you can write (perhaps alongside your Word document), and try again.", full_parent_filename, style_xml_filename); MessageBoxes.Warn("You don't seem to have permission to write the new style to the directory '{0}'.\nPlease copy the original style file '{1}' to a folder where you can write (perhaps alongside your Word document), and try again.", full_parent_filename, style_xml_filename); } } else { MainWindowServiceDispatcher.Instance.OpenUrlInBrowser(parent_url, true); } } // Check again if the parent file exists, and if it does, recurse the dependency check if (File.Exists(full_parent_filename)) { return(GetRootStyleFilename(full_parent_filename)); } else { // We need the parent style, but haven't managed to download it, so return nothing... return(null); } } else // Not a dependent style, so use this filename { return(style_xml_filename); } }
internal static void Check(WebLibraryDetail web_library_detail) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); // We can operate only on bundle libs if (!web_library_detail.IsBundleLibrary) { return; } // Only check every hour if (DateTime.UtcNow.Subtract(web_library_detail.LastBundleManifestDownloadTimestampUTC ?? DateTime.MinValue).TotalMinutes < 60) { return; } // Flag that we are running this update check now web_library_detail.LastBundleManifestDownloadTimestampUTC = DateTime.UtcNow; WebLibraryManager.Instance.NotifyOfChangeToWebLibraryDetail(); // Download the new manifest BundleLibraryManifest manifest_existing = BundleLibraryManifest.FromJSON(web_library_detail.BundleManifestJSON); string manifest_latest_url = manifest_existing.BaseUrl + @"/" + manifest_existing.Id + Common.EXT_BUNDLE_MANIFEST; using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(manifest_latest_url)) { string manifest_latest_json = Encoding.UTF8.GetString(ms.ToArray()); BundleLibraryManifest manifest_latest = BundleLibraryManifest.FromJSON(manifest_latest_json); // It is an old version or we have this version if (0 <= String.Compare(manifest_existing.Version, manifest_latest.Version)) { return; } // It is a version the user has chosen to ignore if (web_library_detail.LastBundleManifestIgnoreVersion == manifest_latest.Version) { return; } BundleLibraryUpdateNotification blun = new BundleLibraryUpdateNotification(web_library_detail, manifest_latest); NotificationManager.Instance.AddPendingNotification( new NotificationManager.Notification( String.Format("An update is available for your Bundle Library '{0}', from version {1} to {2}.", manifest_latest.Title, manifest_existing.Version, manifest_latest.Version), "Bundle Library update available!", NotificationManager.NotificationType.Info, Icons.LibraryTypeBundle, "Download!", blun.Download, "No thanks!", blun.NoThanks ) ); } }
override protected void OnUrlRequested(string url, HTMLStream handle) { // gobble up UrlRequested event by not calling base UrlDownloader ud = new UrlDownloader(url, handle); Thread t = new Thread(new ThreadStart(ud.Go)); threads.Add(t); t.Start(); }
protected bool ProcessUSA(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<h3 class=\"business-name fn org\">.*?<a.*?href=\"(?<href>.*?)\".*?</a>.*?</h3>.*?<div class=\"distance\">(?<radius>.*?)</div>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsUSA(match.Groups["href"].Value); mapScrapeResult.Radius = UrlDownloader.SkipHtmlTags(match.Groups["radius"].Value).Trim(); mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf("<li class=\"next\">") >= 0); return(result2); }
protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<div class=\"listingDetail\".*?>.*?<h3 class=\"listingTitleLine\">.*?href=\"(?<href>.*?)\".*?</h3>.*?<h4 class=\"phoneLink\">(?<phone>.*?)</h4>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(string.Format("http://www.yellowpages.ca{0}", match.Groups["href"].Value)); mapScrapeResult.Phone = UrlDownloader.SkipHtmlTags(match.Groups["phone"].Value); mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf(">Next</a>") >= 0); return(result2); }
internal static void Check(Library library) { // We can operate only on bundle libs if (!library.WebLibraryDetail.IsBundleLibrary) { return; } // Only check every hour if (DateTime.UtcNow.Subtract(library.WebLibraryDetail.LastBundleManifestDownloadTimestampUTC ?? DateTime.MinValue).TotalMinutes < 60) { return; } // Flag that we are running this update check now library.WebLibraryDetail.LastBundleManifestDownloadTimestampUTC = DateTime.UtcNow; WebLibraryManager.Instance.NotifyOfChangeToWebLibraryDetail(); // Download the new manifest BundleLibraryManifest manifest_existing = BundleLibraryManifest.FromJSON(library.WebLibraryDetail.BundleManifestJSON); string manifest_latest_url = manifest_existing.BaseUrl + "/" + manifest_existing.Id + ".qiqqa_bundle_manifest"; MemoryStream ms; UrlDownloader.DownloadWithBlocking(ConfigurationManager.Instance.Proxy, manifest_latest_url, out ms); string manifest_latest_json = Encoding.UTF8.GetString(ms.ToArray()); BundleLibraryManifest manifest_latest = BundleLibraryManifest.FromJSON(manifest_latest_json); // It is an old version or we have this version if (0 <= String.Compare(manifest_existing.Version, manifest_latest.Version)) { return; } // It is a version the user has chosen to ignore if (library.WebLibraryDetail.LastBundleManifestIgnoreVersion == manifest_latest.Version) { return; } { BundleLibraryUpdateNotification blun = new BundleLibraryUpdateNotification(library, manifest_latest); NotificationManager.Instance.AddPendingNotification( new NotificationManager.Notification( String.Format("An update is available for your Bundle Library '{0}', from version {1} to {2}.", manifest_latest.Title, manifest_existing.Version, manifest_latest.Version), "Bundle Library update available!", NotificationManager.NotificationType.Info, Icons.LibraryTypeBundle, "Download!", blun.Download, "No thanks!", blun.NoThanks ) ); } }
protected string ExtractValue(string text, string regexp) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex(regexp, options); Match match = regex.Match(text); if (match.Success) { return(UrlDownloader.HtmlDecode(match.Groups["value"].Value)); } return(string.Empty); }
private void ManageDownload(BundleLibraryManifest manifest) { string url = manifest.BaseUrl + "/" + manifest.Id + Common.EXT_BUNDLE; UrlDownloader.DownloadAsyncTracker download_async_tracker = UrlDownloader.DownloadWithNonBlocking(ConfigurationManager.Instance.Proxy, url); string STATUS_TOKEN = "BundleDownload-" + manifest.Version; StatusManager.Instance.ClearCancelled(STATUS_TOKEN); while (!download_async_tracker.DownloadComplete) { if (StatusManager.Instance.IsCancelled(STATUS_TOKEN)) { download_async_tracker.Cancel(); break; } StatusManager.Instance.UpdateStatusBusy(STATUS_TOKEN, "Downloading Bundle Library...", download_async_tracker.ProgressPercentage, 100, true); Thread.Sleep(1000); } // Check the reason for exiting if (false) { } else if (download_async_tracker.DownloadDataCompletedEventArgs.Cancelled) { StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Cancelled download of Bundle Library."); } else if (null != download_async_tracker.DownloadDataCompletedEventArgs.Error) { MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library. Please try again later or contact {0} for more information.", manifest.SupportEmail); StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library."); } else if (null == download_async_tracker.DownloadDataCompletedEventArgs.Result) { MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library. Please try again later or contact {0} for more information.", manifest.SupportEmail); StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library."); } else { StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Completed download of Bundle Library."); if (MessageBoxes.AskQuestion("The Bundle Library named '{0}' has been downloaded. Do you want to install it now?", manifest.Title)) { LibraryBundleInstaller.Install(manifest, download_async_tracker.DownloadDataCompletedEventArgs.Result); } else { MessageBoxes.Warn("Not installing Bundle Library."); } } }
public void DownloadsSomeProxies() { var urlDownloader = new UrlDownloader( new WebRequestExecutor(), new WebRequestFactory(DatabaseTest.ConfigurationReader) ); new ProxyListReader(urlDownloader) .Read() .ToArray() .Length .Should() .BeGreaterThan(5); }
protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum) { string text = scLocation.Url; string url = scCategory.Url; string text2 = url.Split(new char[] { '/' }).LastOrDefault <string>(); string text3 = string.Empty; text = UrlDownloader.UrlEncode(text); string text4 = scLocation.Meta ?? string.Empty; if (!string.IsNullOrEmpty(keyword)) { text2 += UrlDownloader.UrlEncode(string.Format(" {0}", keyword)); } string a; if ((a = text4) != null) { if (a == "canada") { this.country = AbstractScraper.Country.Canada; text3 = string.Format("http://ca.local.yahoo.com/results?csz={0}&stx={1}&pg_nm={2}", text.Replace("/", "+"), text2, pageNum); goto IL_15D; } if (a == "australia") { this.country = AbstractScraper.Country.Australia; text3 = string.Format("http://local.yahoo.com.au/search/{0}/{1}?search.offset={2}", text2, text.Replace("/", "+"), (pageNum - 1) * 20); goto IL_15D; } if (a == "uk") { this.country = AbstractScraper.Country.UK; text3 = string.Format("http://uk.local.yahoo.com/search.html?poi={0}&p={1}&cb={2}&output=html", text.Replace("/", "+"), text2, (pageNum - 1) * 10 + 1); goto IL_15D; } } text3 = string.Format("http://local.yahoo.com/results?csz={0}&stx={1}&pg_nm={2}", text.Replace("/", "+"), text2, pageNum); this.country = AbstractScraper.Country.USA; IL_15D: if (base.Radius > 0f) { text3 += string.Format("&radius={0}", base.Radius.ToString("#0.##")); } return(text3); }
protected MapScrapeResult ParseDetailsCanada(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Website = base.ExtractValue(text, "\"webURL\":{.*?\"href\":\"(?<value>.*?)\"").Trim(); mapScrapeResult.Map = base.ExtractValue(text, "<a id=\"interactiveMapPrint\" href='(?<value>.*?)'.*?>").Trim(); mapScrapeResult.Latitude = base.ExtractValue(text, "\"latitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)"); mapScrapeResult.Longitude = base.ExtractValue(text, "\"longitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)"); text = base.ExtractValue(text, "<div id=\"busCardLeft\">(?<value>.*?)</div>"); mapScrapeResult.Phone = base.ExtractPhones(text).FirstOrDefault <string>(); mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 id=\"ypBusCardBusName\">(?<value>.*?)</h1>")).Trim(); mapScrapeResult.Address = base.ExtractValue(text, "<p>(?<value>.*?)</p>").Trim(); string[] array = mapScrapeResult.Address.Split(new char[] { ',' }); if (array.Length > 0) { string text2 = array[array.Length - 1].Trim(); int num = text2.IndexOf(' '); if (num >= 0) { mapScrapeResult.Region = text2.Substring(0, num).Trim(); mapScrapeResult.ZipCode = text2.Substring(num + 1).Trim(); } } if (array.Length > 1) { mapScrapeResult.City = array[array.Length - 2].Trim(); } if (!string.IsNullOrEmpty(mapScrapeResult.Map)) { mapScrapeResult.Map = string.Format("http://www.yellowpages.ca{0}", mapScrapeResult.Map); } } return(mapScrapeResult); }
private static string DoSearch(string title) { try { string title_encoded = Convert.ToBase64String(Encoding.UTF8.GetBytes(title)); string auth = title; if (0 < auth.Length) { // construct key for bibtexsearch.com authentication hash: auth = auth[0] + auth + auth[0]; } auth = StreamFingerprint.FromText(auth); string url_server = bibtex_search_server_manager.GetServerUrl(); string url = String.Format("{0}/search?auth={1}&qe={2}", url_server, auth, WebUtility.HtmlEncode(title_encoded)); try { WebHeaderCollection header_collection; Stopwatch clk = Stopwatch.StartNew(); using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(url, out header_collection)) { bibtex_search_server_manager.ReportLatency(url_server, clk.ElapsedMilliseconds); Logging.Debug特("bibtex_search_server_manager: Download {0} took {1} ms", url, clk.ElapsedMilliseconds); string json = Encoding.UTF8.GetString(ms.ToArray()); return(json); } } catch (Exception ex) { bibtex_search_server_manager.ReportError(url_server); Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}' at server '{1}'.", title, url_server); } } catch (Exception ex) { Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}'.", title); } return(null); }
/// <summary> /// Checks if there is any new release avaiable /// </summary> /// <param name="urlOfIndex">Url to the json File </param> /// <returns>true if there is an update avaible. false if everything is up to date</returns> public static bool CheckForUpdate(string urlOfIndex) { if (string.IsNullOrWhiteSpace(urlOfIndex)) { return(false); } try { FileVersionInfo info = new FileVersionInfo() { FileName = Assembly.GetExecutingAssembly().GetName().Name, Release = Assembly.GetExecutingAssembly().GetName().Version?.ToString() }; IUrlDownloader downloader = new UrlDownloader(""); string knownVersions = downloader.LoadFromUrl(urlOfIndex, true); if (string.IsNullOrWhiteSpace(knownVersions)) { return(false); } List <FileVersionInfo> loadedVersions = new List <FileVersionInfo>(); try { loadedVersions.AddRange(JsonConvert.DeserializeObject <List <FileVersionInfo> >(knownVersions)); } catch { loadedVersions.Add(JsonConvert.DeserializeObject <FileVersionInfo>(knownVersions)); } FileVersionInfo newestVersion = loadedVersions.FirstOrDefault(x => x.FileName.Equals(info.FileName, StringComparison.OrdinalIgnoreCase)); return(newestVersion != null && newestVersion.Release != info.Release); } catch { //maybe some logging in the future } return(false); }
private static string DoSearch(string title) { try { string title_encoded = Convert.ToBase64String(Encoding.UTF8.GetBytes(title)); string auth = title; if (0 < auth.Length) { auth = auth[0] + auth + auth[0]; } auth = StreamFingerprint.FromText(auth); string url_server = bibtex_search_server_manager.GetServerUrl(); string url = String.Format("{0}/search?auth={1}&qe={2}", url_server, auth, WebUtility.HtmlEncode(title_encoded)); try { MemoryStream ms; WebHeaderCollection header_collection; DateTime START = DateTime.UtcNow; UrlDownloader.DownloadWithBlocking(ConfigurationManager.Instance.Proxy, url, out ms, out header_collection); DateTime STOP = DateTime.UtcNow; bibtex_search_server_manager.ReportLatency(url_server, (STOP - START).TotalMilliseconds); string json = Encoding.UTF8.GetString(ms.ToArray()); return(json); } catch (Exception ex) { bibtex_search_server_manager.ReportError(url_server); Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}' at server '{1}'.", title, url_server); } } catch (Exception ex) { Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}'.", title); } return(null); }
private void buttonDownload_Click(object sender, EventArgs e) { if (!UrlDownloader.DownloadStatus) { if (playlist.SelectedIndex != -1) { string directory = Settings.Default.path; string filename; if (SoundCloud.MyTracklist[playlist.SelectedIndex].artist != "" && SoundCloud.MyTracklist[playlist.SelectedIndex].artist != null && SoundCloud.MyTracklist[playlist.SelectedIndex].title != "" && SoundCloud.MyTracklist[playlist.SelectedIndex].title != null) { filename = String.Format("{0} - {1}.mp3", SoundCloud.MyTracklist[playlist.SelectedIndex].artist, SoundCloud.MyTracklist[playlist.SelectedIndex].title); } else { filename = String.Format("{0}.mp3", SoundCloud.MyTracklist[playlist.SelectedIndex].buferTitle); } UrlDownloader.Downloader(SoundCloud.MyTracklist[playlist.SelectedIndex].stream_url + "?client_id=" + SoundCloud.clientID, directory, filename); UrlDownloader.webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted; } } }
protected MapScrapeResult ParseDetailsUSA(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 class=\"fn org\">(?<value>.*?)</h1>")).Trim(); string text2 = base.ExtractValue(text, "<p class=\"primary-location\">(?<value>.*?)</p>"); if (string.IsNullOrEmpty(text2)) { mapScrapeResult.Address = base.ExtractValue(text, "<span class=\"listing-address adr\">(?<value>.*?)</span>").Trim(); } else { mapScrapeResult.Address = base.ExtractValue(text2, "<span class=\"street-address\">(?<value>.*?)</span>").Trim(); mapScrapeResult.City = base.ExtractValue(text2, "<span class=\"locality\">(?<value>.*?)</span>"); mapScrapeResult.Region = base.ExtractValue(text2, "<span class=\"region\">(?<value>.*?)</span>"); mapScrapeResult.ZipCode = base.ExtractValue(text2, "<span class=\"postal-code\">(?<value>.*?)</span>"); } mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); mapScrapeResult.Phone = base.ExtractValue(text, "<p class=\"phone\">(?<value>.*?)</p>"); string text3 = base.ExtractValue(text, "<ul class=\"feature-links blue-arrow\">(?<value>.*?)</ul>").Trim(); mapScrapeResult.Website = base.ExtractValue(text3, "href=\"(?<value>.*?)\".*?Visit Website</a>").Trim(); if (mapScrapeResult.Website.StartsWith("mailto:")) { mapScrapeResult.Website = string.Empty; } mapScrapeResult.Map = string.Format("http://www.yellowpages.com{0}", base.ExtractValue(text, "<div id=\"mip-minimap\">.*?href=\"(?<value>.*?)\".*?</div>").Trim()); mapScrapeResult.Latitude = base.ExtractValue(text, "<span class=\"latitude\" id=\"map-latitude\">(?<value>.*?)</span>"); mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\" id=\"map-longitude\">(?<value>.*?)</span>"); } return(mapScrapeResult); }
protected MapScrapeResult ParseDetailsUK(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Headline = base.ExtractValue(text, "<h1 class=\"org fn n\">(?<value>.*?)</h1>"); mapScrapeResult.Address = base.ExtractValue(text, "<p class=\"street-address\">(?<value>.*?)</p>"); mapScrapeResult.ZipCode = base.ExtractValue(text, "<span class=\"postal-code\">(?<value>.*?)</span>"); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Trim(); List <string> list = new List <string>(mapScrapeResult.Address.Split(new string[] { "\t", "\n", "," }, StringSplitOptions.RemoveEmptyEntries)); list.RemoveAll((string s) => s.Trim() == string.Empty); mapScrapeResult.Address = mapScrapeResult.Address.Replace("\t", string.Empty); mapScrapeResult.Address = mapScrapeResult.Address.Replace("\n", string.Empty); if (list.Count > 1) { mapScrapeResult.City = list[1]; } string text2 = base.ExtractValue(text, "<p class=\"geo\">(?<value>.*?)</p>"); mapScrapeResult.Latitude = base.ExtractValue(text2, "<span class=\"latitude\">(?<value>.*?)</span>"); mapScrapeResult.Longitude = base.ExtractValue(text2, "<span class=\"longitude\">(?<value>.*?)</span>"); mapScrapeResult.Map = base.ExtractValue(text, "<div id=\"ent-page-md-links\">.*?href=\"(?<value>.*?)\".*?</div>"); mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); mapScrapeResult.Phone = base.ExtractValue(text, "<h2 class=\"tel\">.*?<span class=\"value\">(?<value>.*?)</span>").Trim(); } return(mapScrapeResult); }
public static List <GoogleScholarScrapePaper> ScrapeUrl(string url) { List <GoogleScholarScrapePaper> gssps = new List <GoogleScholarScrapePaper>(); try { WebHeaderCollection header_collection = new WebHeaderCollection(); using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(url, out header_collection)) { HtmlDocument doc = new HtmlDocument(); doc.Load(ms, System.Text.Encoding.UTF8, detectEncodingFromByteOrderMarks: false); ScrapeDoc(doc, url, gssps); } } catch (Exception ex) { Logging.Error(ex, "There was a problem parsing the GoogleScholar url {0}", url); } return(gssps); }
protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<div class=\"advert-content\">(?<text>.*?)</div>.*?(?<cta><div class=\"advert-cta\">.*?</div>){1}.*?<ul class=\"tabbed\">(?<tab>.*?)</ul>{1}", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } string value = match.Groups["text"].Value; string value2 = match.Groups["cta"].Value; string value3 = match.Groups["tab"].Value; MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"summaryTL\">.*?href=\"(?<value>.*?)\".*?</li>")); mapScrapeResult.Map = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"mapTL\">.*?href=\"(?<value>.*?)\".*?</li>")); mapScrapeResult.Phone = UrlDownloader.SkipHtmlTags(base.ExtractValue(value2, "<ul class=\"(tel-single|tel-multiple)\">.*?<strong>(?<value>.*?)</strong>.*?</ul>")).Trim(); mapScrapeResult.Headline = base.ExtractValue(value, "<h2 class=\"coName\">(?<value>.*?)</h2>"); mapScrapeResult.Website = base.ExtractValue(mapScrapeResult.Headline, "href='(?<value>.*?)'").Trim(); mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(mapScrapeResult.Headline).Trim(); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(base.ExtractValue(value, "<p class=\"address\">(?<value>.*?)</p>")).Trim(); mapScrapeResult.ZipCode = base.ExtractValue(mapScrapeResult.Address, "(?<value>[A-Z]{1,2}[0-9R][0-9A-Z]?\\s*?[0-9][ABD-HJLNP-UW-Z]{2})"); string[] array = mapScrapeResult.Address.Split(new char[] { ',' }); if (array.Length > 0) { mapScrapeResult.Region = array[array.Length - 1]; if (mapScrapeResult.ZipCode.Length > 0) { mapScrapeResult.Region = mapScrapeResult.Region.Replace(mapScrapeResult.ZipCode, string.Empty).Trim(); } else { mapScrapeResult.Region = mapScrapeResult.Region.Trim(); } } if (array.Length > 1) { if (string.IsNullOrEmpty(mapScrapeResult.ZipCode)) { mapScrapeResult.City = array[array.Length - 2].Trim(); } else { mapScrapeResult.City = array[array.Length - 2].Replace(mapScrapeResult.ZipCode, string.Empty).Trim(); } } if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, mapScrapeResult)) { if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf(">Next</a>") >= 0); return(result2); }
static void Main(string[] args) { UrlDownloader d = new UrlDownloader("http://www.ynet.co.il", "myFile"); d.Start(); }
protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum) { string text = scLocation.Url.Replace(" ", "+"); string text2 = scCategory.Url.Replace(" ", "+"); string text3 = string.Empty; keyword = UrlDownloader.UrlEncode(keyword); string text4 = scLocation.Meta ?? string.Empty; string a; if ((a = text4) != null) { if (a == "canada") { this.country = AbstractScraper.Country.Canada; text2 = UrlDownloader.SkipHtmlTags(text2.Replace('-', ' ')); text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}&where={1}", text2, text) : string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}+{1}&where={2}", text2, keyword, text)); goto IL_1D9; } if (a == "australia") { if (base.Downloader != null) { base.Downloader.KeepAlive = false; } this.country = AbstractScraper.Country.Australia; text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com.au/search/listings?clue={1}&locationClue={0}&x=0&y=0&pageNumber={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com.au/search/listings?clue={1}+{3}&locationClue={0}&x=0&y=0&pageNumber={2}", new object[] { text, text2, pageNum, keyword })); goto IL_1D9; } if (a == "uk") { this.country = AbstractScraper.Country.UK; text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}&location={1}&pageNum={2}", text2, text, pageNum) : string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}+{1}&location={2}&pageNum={3}", new object[] { text2, keyword, text, pageNum })); goto IL_1D9; } } text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com/{0}/{1}?page={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com/{0}/{1}?q={2}&page={3}", new object[] { text, text2, keyword, pageNum })); this.country = AbstractScraper.Country.USA; IL_1D9: if (base.Radius > 0f) { text3 += string.Format("&refinements[radius]={0}", base.Radius.ToString("#0.##")); } return(text3); }
public UrlDownloaderTest() { _downloader = new UrlDownloader(_webRequestExecutor, _webRequestFactory); }
public CraigslistScraper(UrlDownloader downloader, ScrapeResultCallback callback) : base(downloader, callback) { this.initialPageNum = -1; this.campaign.SearchEngine = 0; }
public void LoadFromUrl(Uri url) { LoadRaw(UrlDownloader.Download(url)); }