Beispiel #1
0
        public static List <GoogleScholarScrapePaper> ScrapeUrl(IWebProxy proxy, string url)
        {
            List <GoogleScholarScrapePaper> gssps = new List <GoogleScholarScrapePaper>();

            MemoryStream ms = new MemoryStream();

            try
            {
                WebHeaderCollection header_collection = new WebHeaderCollection();
                UrlDownloader.DownloadWithBlocking(proxy, url, out ms, out header_collection);

                HtmlDocument doc = new HtmlDocument();
                doc.Load(ms);

                ScrapeDoc(doc, url, gssps);
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem parsing the GoogleScholar url");
            }
            finally
            {
                if (ms != null)
                {
                    ms.Dispose();
                }
            }

            return(gssps);
        }
        private void PerformRequest(string url)
        {
            bool is_chat_available = false;

            try
            {
                MemoryStream ms;
                UrlDownloader.DownloadWithBlocking(ConfigurationManager.Instance.Proxy, url, out ms);
                ProcessDisplayResponse(ms);

                is_chat_available = true;
            }

            catch (Exception ex)
            {
                Logging.Warn(ex, "There was a problem communicating with chat.");
                next_autopoll_datetime = DateTime.UtcNow.AddMinutes(1);

                is_chat_available = false;
            }

            Application.Current.Dispatcher.BeginInvoke(new Action(() =>
            {
                this.IsEnabled = is_chat_available;
                TxtChatUnavailable.Visibility = is_chat_available ? Visibility.Collapsed : Visibility.Visible;
            }));
        }
 public YahooScraper(UrlDownloader downloader, ScrapeResultCallback callback) : base(downloader, callback)
 {
     this.initialPageNum           = 0;
     base.Radius                   = 0f;
     this.allowsCategorylessSearch = true;
     this.campaign.SearchEngine    = 4;
 }
        private void PerformRequest(string url)
        {
            bool is_chat_available = false;

            try
            {
                using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(url))
                {
                    ProcessDisplayResponse(ms);
                }

                is_chat_available = true;
            }
            catch (Exception ex)
            {
                Logging.Warn(ex, "There was a problem communicating with chat. URL: {0}", url);
                next_autopoll_datetime = DateTime.UtcNow.AddMinutes(1);

                is_chat_available = false;
            }

            // make sure we're not in the process of shutting down Qiqqa for then the next code chunk will cause a CRASH:
            if (null != Application.Current && !Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown)
            {
                Application.Current.Dispatcher.BeginInvoke(new Action(() =>
                {
                    IsEnabled = is_chat_available;
                    TxtChatUnavailable.Visibility = is_chat_available ? Visibility.Collapsed : Visibility.Visible;
                }));
            }
            else
            {
                Logging.Warn("Chat: detected Qiqqa shutting down.");
            }
        }
        protected MapScrapeResult ParseDetailsAustralia(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline  = base.ExtractValue(text, "<h1>(?<value>.*?)</h1>");
                mapScrapeResult.Address   = base.ExtractValue(text, "<div class=\"adr\">(?<value>.*?)</div>");
                mapScrapeResult.Region    = base.ExtractValue(mapScrapeResult.Address, "<span class=\"region\".*?>(?<value>.*?)</span>");
                mapScrapeResult.City      = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"locality\".*?>(?<value>.*?)</span>");
                mapScrapeResult.ZipCode   = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"postal-code\".*?>(?<value>.*?)</span>");
                mapScrapeResult.Address   = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Replace("\t", string.Empty).Replace("\n", string.Empty).Trim();
                mapScrapeResult.Latitude  = base.ExtractValue(text, "<span class=\"latitude\">(?<value>.*?)</span>");
                mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\">(?<value>.*?)</span>");
                mapScrapeResult.Map       = string.Format("http://maps.yahoo.com/maps_result?ard=1&lat={0}&lon={1}&zoom=18", mapScrapeResult.Latitude, mapScrapeResult.Longitude);
                string text2 = base.ExtractValue(text, "<ul class=\"pageTools\">(?<value>.*?)</ul>");
                mapScrapeResult.Website = base.ExtractValue(text2, "href=\"(?<value>.*?)\"");
                mapScrapeResult.Phone   = base.ExtractValue(text2, "<span class=\"tl-phone-full\">(?<value>.*?)</span>").Trim();
                mapScrapeResult.Email   = base.ExtractEmails(text).FirstOrDefault <string>();
            }
            return(mapScrapeResult);
        }
Beispiel #6
0
        private void ManageDownload(BundleLibraryManifest manifest)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

            string url = manifest.BaseUrl + @"/" + manifest.Id + Common.EXT_BUNDLE;

            using (UrlDownloader.DownloadAsyncTracker download_async_tracker = UrlDownloader.DownloadWithNonBlocking(url))
            {
                string STATUS_TOKEN = "BundleDownload-" + manifest.Version;

                StatusManager.Instance.ClearCancelled(STATUS_TOKEN);
                while (!download_async_tracker.DownloadComplete)
                {
                    if (ShutdownableManager.Instance.IsShuttingDown)
                    {
                        Logging.Error("Canceling download of Bundle Library due to signaled application shutdown");
                        StatusManager.Instance.SetCancelled(STATUS_TOKEN);
                    }

                    if (StatusManager.Instance.IsCancelled(STATUS_TOKEN))
                    {
                        download_async_tracker.Cancel();
                        break;
                    }

                    StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Downloading Bundle Library...", download_async_tracker.ProgressPercentage, 100, true);

                    ShutdownableManager.Sleep(3000);
                }

                // Check the reason for exiting
                if (download_async_tracker.DownloadDataCompletedEventArgs.Cancelled)
                {
                    StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Cancelled download of Bundle Library.");
                }
                else if (null != download_async_tracker.DownloadDataCompletedEventArgs.Error)
                {
                    MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library.  Please try again later or contact {0} for more information.", manifest.SupportEmail);
                    StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library.");
                }
                else if (null == download_async_tracker.DownloadDataCompletedEventArgs.Result)
                {
                    MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library.  Please try again later or contact {0} for more information.", manifest.SupportEmail);
                    StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library.");
                }
                else
                {
                    StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Completed download of Bundle Library.");
                    if (MessageBoxes.AskQuestion("The Bundle Library named '{0}' has been downloaded.  Do you want to install it now?", manifest.Title))
                    {
                        LibraryBundleInstaller.Install(manifest, download_async_tracker.DownloadDataCompletedEventArgs.Result);
                    }
                    else
                    {
                        MessageBoxes.Warn("Not installing Bundle Library.");
                    }
                }
            }
        }
        internal static string GetRootStyleFilename(string style_xml_filename)
        {
            string parent_filename;
            string parent_url;

            if (IsDependentStyle(style_xml_filename, out parent_filename, out parent_url))
            {
                // Check that we have the dependent style - if we don't prompt to download it
                string full_parent_filename = Path.GetFullPath(Path.Combine(Path.GetDirectoryName(style_xml_filename), parent_filename));
                if (!File.Exists(full_parent_filename))
                {
                    string message = String.Format(
                        "Can't find parent style for this dependent style" +
                        "\n\n" +
                        "Your style depends on a parent style named {0}, which needs to be saved in the same directory.\n\n" +
                        "It appears to be available from {1}.\n" +
                        "Shall we try to download it automatically?  If you choose NO, Qiqqa will open the website for you so you can download it manually.",
                        parent_filename, parent_url
                        );

                    if (MessageBoxes.AskQuestion(message))
                    {
                        try
                        {
                            using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(parent_url))
                            {
                                File.WriteAllBytes(full_parent_filename, ms.ToArray());
                            }
                        }
                        catch (UnauthorizedAccessException ex)
                        {
                            Logging.Error(ex, "You don't seem to have permission to write the new style to the directory '{0}'.\nPlease copy the original style file '{1}' to a folder where you can write (perhaps alongside your Word document), and try again.", full_parent_filename, style_xml_filename);
                            MessageBoxes.Warn("You don't seem to have permission to write the new style to the directory '{0}'.\nPlease copy the original style file '{1}' to a folder where you can write (perhaps alongside your Word document), and try again.", full_parent_filename, style_xml_filename);
                        }
                    }
                    else
                    {
                        MainWindowServiceDispatcher.Instance.OpenUrlInBrowser(parent_url, true);
                    }
                }

                // Check again if the parent file exists, and if it does, recurse the dependency check
                if (File.Exists(full_parent_filename))
                {
                    return(GetRootStyleFilename(full_parent_filename));
                }
                else
                {
                    // We need the parent style, but haven't managed to download it, so return nothing...
                    return(null);
                }
            }
            else // Not a dependent style, so use this filename
            {
                return(style_xml_filename);
            }
        }
        internal static void Check(WebLibraryDetail web_library_detail)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

            // We can operate only on bundle libs
            if (!web_library_detail.IsBundleLibrary)
            {
                return;
            }

            // Only check every hour
            if (DateTime.UtcNow.Subtract(web_library_detail.LastBundleManifestDownloadTimestampUTC ?? DateTime.MinValue).TotalMinutes < 60)
            {
                return;
            }

            // Flag that we are running this update check now
            web_library_detail.LastBundleManifestDownloadTimestampUTC = DateTime.UtcNow;
            WebLibraryManager.Instance.NotifyOfChangeToWebLibraryDetail();

            // Download the new manifest
            BundleLibraryManifest manifest_existing = BundleLibraryManifest.FromJSON(web_library_detail.BundleManifestJSON);
            string manifest_latest_url = manifest_existing.BaseUrl + @"/" + manifest_existing.Id + Common.EXT_BUNDLE_MANIFEST;

            using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(manifest_latest_url))
            {
                string manifest_latest_json           = Encoding.UTF8.GetString(ms.ToArray());
                BundleLibraryManifest manifest_latest = BundleLibraryManifest.FromJSON(manifest_latest_json);

                // It is an old version or we have this version
                if (0 <= String.Compare(manifest_existing.Version, manifest_latest.Version))
                {
                    return;
                }

                // It is a version the user has chosen to ignore
                if (web_library_detail.LastBundleManifestIgnoreVersion == manifest_latest.Version)
                {
                    return;
                }

                BundleLibraryUpdateNotification blun = new BundleLibraryUpdateNotification(web_library_detail, manifest_latest);

                NotificationManager.Instance.AddPendingNotification(
                    new NotificationManager.Notification(
                        String.Format("An update is available for your Bundle Library '{0}', from version {1} to {2}.", manifest_latest.Title, manifest_existing.Version, manifest_latest.Version),
                        "Bundle Library update available!",
                        NotificationManager.NotificationType.Info,
                        Icons.LibraryTypeBundle,
                        "Download!",
                        blun.Download,
                        "No thanks!",
                        blun.NoThanks
                        )
                    );
            }
        }
Beispiel #9
0
        override protected void OnUrlRequested(string url, HTMLStream handle)
        {
            // gobble up UrlRequested event by not calling base
            UrlDownloader ud = new UrlDownloader(url, handle);
            Thread        t  = new Thread(new ThreadStart(ud.Go));

            threads.Add(t);
            t.Start();
        }
Beispiel #10
0
        protected bool ProcessUSA(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<h3 class=\"business-name fn org\">.*?<a.*?href=\"(?<href>.*?)\".*?</a>.*?</h3>.*?<div class=\"distance\">(?<radius>.*?)</div>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsUSA(match.Groups["href"].Value);
                mapScrapeResult.Radius   = UrlDownloader.SkipHtmlTags(match.Groups["radius"].Value).Trim();
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<li class=\"next\">") >= 0);

            return(result2);
        }
Beispiel #11
0
        protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"listingDetail\".*?>.*?<h3 class=\"listingTitleLine\">.*?href=\"(?<href>.*?)\".*?</h3>.*?<h4 class=\"phoneLink\">(?<phone>.*?)</h4>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(string.Format("http://www.yellowpages.ca{0}", match.Groups["href"].Value));
                mapScrapeResult.Phone    = UrlDownloader.SkipHtmlTags(match.Groups["phone"].Value);
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf(">Next</a>") >= 0);

            return(result2);
        }
        internal static void Check(Library library)
        {
            // We can operate only on bundle libs
            if (!library.WebLibraryDetail.IsBundleLibrary)
            {
                return;
            }

            // Only check every hour
            if (DateTime.UtcNow.Subtract(library.WebLibraryDetail.LastBundleManifestDownloadTimestampUTC ?? DateTime.MinValue).TotalMinutes < 60)
            {
                return;
            }

            // Flag that we are running this update check now
            library.WebLibraryDetail.LastBundleManifestDownloadTimestampUTC = DateTime.UtcNow;
            WebLibraryManager.Instance.NotifyOfChangeToWebLibraryDetail();

            // Download the new manifest
            BundleLibraryManifest manifest_existing = BundleLibraryManifest.FromJSON(library.WebLibraryDetail.BundleManifestJSON);
            string       manifest_latest_url        = manifest_existing.BaseUrl + "/" + manifest_existing.Id + ".qiqqa_bundle_manifest";
            MemoryStream ms;

            UrlDownloader.DownloadWithBlocking(ConfigurationManager.Instance.Proxy, manifest_latest_url, out ms);
            string manifest_latest_json           = Encoding.UTF8.GetString(ms.ToArray());
            BundleLibraryManifest manifest_latest = BundleLibraryManifest.FromJSON(manifest_latest_json);

            // It is an old version or we have this version
            if (0 <= String.Compare(manifest_existing.Version, manifest_latest.Version))
            {
                return;
            }

            // It is a version the user has chosen to ignore
            if (library.WebLibraryDetail.LastBundleManifestIgnoreVersion == manifest_latest.Version)
            {
                return;
            }

            {
                BundleLibraryUpdateNotification blun = new BundleLibraryUpdateNotification(library, manifest_latest);

                NotificationManager.Instance.AddPendingNotification(
                    new NotificationManager.Notification(
                        String.Format("An update is available for your Bundle Library '{0}', from version {1} to {2}.", manifest_latest.Title, manifest_existing.Version, manifest_latest.Version),
                        "Bundle Library update available!",
                        NotificationManager.NotificationType.Info,
                        Icons.LibraryTypeBundle,
                        "Download!",
                        blun.Download,
                        "No thanks!",
                        blun.NoThanks
                        )
                    );
            }
        }
Beispiel #13
0
        protected string ExtractValue(string text, string regexp)
        {
            RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex        regex   = new Regex(regexp, options);
            Match        match   = regex.Match(text);

            if (match.Success)
            {
                return(UrlDownloader.HtmlDecode(match.Groups["value"].Value));
            }
            return(string.Empty);
        }
Beispiel #14
0
        private void ManageDownload(BundleLibraryManifest manifest)
        {
            string url = manifest.BaseUrl + "/" + manifest.Id + Common.EXT_BUNDLE;

            UrlDownloader.DownloadAsyncTracker download_async_tracker = UrlDownloader.DownloadWithNonBlocking(ConfigurationManager.Instance.Proxy, url);

            string STATUS_TOKEN = "BundleDownload-" + manifest.Version;

            StatusManager.Instance.ClearCancelled(STATUS_TOKEN);
            while (!download_async_tracker.DownloadComplete)
            {
                if (StatusManager.Instance.IsCancelled(STATUS_TOKEN))
                {
                    download_async_tracker.Cancel();
                    break;
                }

                StatusManager.Instance.UpdateStatusBusy(STATUS_TOKEN, "Downloading Bundle Library...", download_async_tracker.ProgressPercentage, 100, true);
                Thread.Sleep(1000);
            }

            // Check the reason for exiting
            if (false)
            {
            }
            else if (download_async_tracker.DownloadDataCompletedEventArgs.Cancelled)
            {
                StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Cancelled download of Bundle Library.");
            }
            else if (null != download_async_tracker.DownloadDataCompletedEventArgs.Error)
            {
                MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library.  Please try again later or contact {0} for more information.", manifest.SupportEmail);
                StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library.");
            }
            else if (null == download_async_tracker.DownloadDataCompletedEventArgs.Result)
            {
                MessageBoxes.Error(download_async_tracker.DownloadDataCompletedEventArgs.Error, "There was an error during the download of your Bundle Library.  Please try again later or contact {0} for more information.", manifest.SupportEmail);
                StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Error during download of Bundle Library.");
            }
            else
            {
                StatusManager.Instance.UpdateStatus(STATUS_TOKEN, "Completed download of Bundle Library.");
                if (MessageBoxes.AskQuestion("The Bundle Library named '{0}' has been downloaded.  Do you want to install it now?", manifest.Title))
                {
                    LibraryBundleInstaller.Install(manifest, download_async_tracker.DownloadDataCompletedEventArgs.Result);
                }
                else
                {
                    MessageBoxes.Warn("Not installing Bundle Library.");
                }
            }
        }
        public void DownloadsSomeProxies()
        {
            var urlDownloader = new UrlDownloader(
                new WebRequestExecutor(),
                new WebRequestFactory(DatabaseTest.ConfigurationReader)
                );

            new ProxyListReader(urlDownloader)
            .Read()
            .ToArray()
            .Length
            .Should()
            .BeGreaterThan(5);
        }
        protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum)
        {
            string text  = scLocation.Url;
            string url   = scCategory.Url;
            string text2 = url.Split(new char[]
            {
                '/'
            }).LastOrDefault <string>();
            string text3 = string.Empty;

            text = UrlDownloader.UrlEncode(text);
            string text4 = scLocation.Meta ?? string.Empty;

            if (!string.IsNullOrEmpty(keyword))
            {
                text2 += UrlDownloader.UrlEncode(string.Format(" {0}", keyword));
            }
            string a;

            if ((a = text4) != null)
            {
                if (a == "canada")
                {
                    this.country = AbstractScraper.Country.Canada;
                    text3        = string.Format("http://ca.local.yahoo.com/results?csz={0}&stx={1}&pg_nm={2}", text.Replace("/", "+"), text2, pageNum);
                    goto IL_15D;
                }
                if (a == "australia")
                {
                    this.country = AbstractScraper.Country.Australia;
                    text3        = string.Format("http://local.yahoo.com.au/search/{0}/{1}?search.offset={2}", text2, text.Replace("/", "+"), (pageNum - 1) * 20);
                    goto IL_15D;
                }
                if (a == "uk")
                {
                    this.country = AbstractScraper.Country.UK;
                    text3        = string.Format("http://uk.local.yahoo.com/search.html?poi={0}&p={1}&cb={2}&output=html", text.Replace("/", "+"), text2, (pageNum - 1) * 10 + 1);
                    goto IL_15D;
                }
            }
            text3        = string.Format("http://local.yahoo.com/results?csz={0}&stx={1}&pg_nm={2}", text.Replace("/", "+"), text2, pageNum);
            this.country = AbstractScraper.Country.USA;
IL_15D:
            if (base.Radius > 0f)
            {
                text3 += string.Format("&radius={0}", base.Radius.ToString("#0.##"));
            }
            return(text3);
        }
Beispiel #17
0
        protected MapScrapeResult ParseDetailsCanada(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Website   = base.ExtractValue(text, "\"webURL\":{.*?\"href\":\"(?<value>.*?)\"").Trim();
                mapScrapeResult.Map       = base.ExtractValue(text, "<a id=\"interactiveMapPrint\" href='(?<value>.*?)'.*?>").Trim();
                mapScrapeResult.Latitude  = base.ExtractValue(text, "\"latitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)");
                mapScrapeResult.Longitude = base.ExtractValue(text, "\"longitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)");
                text = base.ExtractValue(text, "<div id=\"busCardLeft\">(?<value>.*?)</div>");
                mapScrapeResult.Phone    = base.ExtractPhones(text).FirstOrDefault <string>();
                mapScrapeResult.Email    = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 id=\"ypBusCardBusName\">(?<value>.*?)</h1>")).Trim();
                mapScrapeResult.Address  = base.ExtractValue(text, "<p>(?<value>.*?)</p>").Trim();
                string[] array = mapScrapeResult.Address.Split(new char[]
                {
                    ','
                });
                if (array.Length > 0)
                {
                    string text2 = array[array.Length - 1].Trim();
                    int    num   = text2.IndexOf(' ');
                    if (num >= 0)
                    {
                        mapScrapeResult.Region  = text2.Substring(0, num).Trim();
                        mapScrapeResult.ZipCode = text2.Substring(num + 1).Trim();
                    }
                }
                if (array.Length > 1)
                {
                    mapScrapeResult.City = array[array.Length - 2].Trim();
                }
                if (!string.IsNullOrEmpty(mapScrapeResult.Map))
                {
                    mapScrapeResult.Map = string.Format("http://www.yellowpages.ca{0}", mapScrapeResult.Map);
                }
            }
            return(mapScrapeResult);
        }
        private static string DoSearch(string title)
        {
            try
            {
                string title_encoded = Convert.ToBase64String(Encoding.UTF8.GetBytes(title));

                string auth = title;
                if (0 < auth.Length)
                {
                    // construct key for bibtexsearch.com authentication hash:
                    auth = auth[0] + auth + auth[0];
                }
                auth = StreamFingerprint.FromText(auth);

                string url_server = bibtex_search_server_manager.GetServerUrl();
                string url        = String.Format("{0}/search?auth={1}&qe={2}", url_server, auth, WebUtility.HtmlEncode(title_encoded));
                try
                {
                    WebHeaderCollection header_collection;
                    Stopwatch           clk = Stopwatch.StartNew();

                    using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(url, out header_collection))
                    {
                        bibtex_search_server_manager.ReportLatency(url_server, clk.ElapsedMilliseconds);
                        Logging.Debug特("bibtex_search_server_manager: Download {0} took {1} ms", url, clk.ElapsedMilliseconds);

                        string json = Encoding.UTF8.GetString(ms.ToArray());
                        return(json);
                    }
                }
                catch (Exception ex)
                {
                    bibtex_search_server_manager.ReportError(url_server);
                    Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}' at server '{1}'.", title, url_server);
                }
            }

            catch (Exception ex)
            {
                Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}'.", title);
            }

            return(null);
        }
        /// <summary>
        /// Checks if there is any new release avaiable
        /// </summary>
        /// <param name="urlOfIndex">Url to the json File </param>
        /// <returns>true if there is an update avaible. false if everything is up to date</returns>
        public static bool CheckForUpdate(string urlOfIndex)
        {
            if (string.IsNullOrWhiteSpace(urlOfIndex))
            {
                return(false);
            }

            try
            {
                FileVersionInfo info = new FileVersionInfo()
                {
                    FileName = Assembly.GetExecutingAssembly().GetName().Name,
                    Release  = Assembly.GetExecutingAssembly().GetName().Version?.ToString()
                };

                IUrlDownloader downloader = new UrlDownloader("");

                string knownVersions = downloader.LoadFromUrl(urlOfIndex, true);
                if (string.IsNullOrWhiteSpace(knownVersions))
                {
                    return(false);
                }

                List <FileVersionInfo> loadedVersions = new List <FileVersionInfo>();

                try
                {
                    loadedVersions.AddRange(JsonConvert.DeserializeObject <List <FileVersionInfo> >(knownVersions));
                }
                catch
                {
                    loadedVersions.Add(JsonConvert.DeserializeObject <FileVersionInfo>(knownVersions));
                }

                FileVersionInfo newestVersion = loadedVersions.FirstOrDefault(x => x.FileName.Equals(info.FileName, StringComparison.OrdinalIgnoreCase));
                return(newestVersion != null && newestVersion.Release != info.Release);
            }
            catch
            {
                //maybe some logging in the future
            }

            return(false);
        }
        private static string DoSearch(string title)
        {
            try
            {
                string title_encoded = Convert.ToBase64String(Encoding.UTF8.GetBytes(title));

                string auth = title;
                if (0 < auth.Length)
                {
                    auth = auth[0] + auth + auth[0];
                }
                auth = StreamFingerprint.FromText(auth);

                string url_server = bibtex_search_server_manager.GetServerUrl();
                string url        = String.Format("{0}/search?auth={1}&qe={2}", url_server, auth, WebUtility.HtmlEncode(title_encoded));
                try
                {
                    MemoryStream        ms;
                    WebHeaderCollection header_collection;
                    DateTime            START = DateTime.UtcNow;
                    UrlDownloader.DownloadWithBlocking(ConfigurationManager.Instance.Proxy, url, out ms, out header_collection);
                    DateTime STOP = DateTime.UtcNow;
                    bibtex_search_server_manager.ReportLatency(url_server, (STOP - START).TotalMilliseconds);

                    string json = Encoding.UTF8.GetString(ms.ToArray());
                    return(json);
                }

                catch (Exception ex)
                {
                    bibtex_search_server_manager.ReportError(url_server);
                    Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}' at server '{1}'.", title, url_server);
                }
            }

            catch (Exception ex)
            {
                Logging.Warn(ex, "There was a problem searching for BibTeX for title '{0}'.", title);
            }

            return(null);
        }
 private void buttonDownload_Click(object sender, EventArgs e)
 {
     if (!UrlDownloader.DownloadStatus)
     {
         if (playlist.SelectedIndex != -1)
         {
             string directory = Settings.Default.path;
             string filename;
             if (SoundCloud.MyTracklist[playlist.SelectedIndex].artist != "" && SoundCloud.MyTracklist[playlist.SelectedIndex].artist != null && SoundCloud.MyTracklist[playlist.SelectedIndex].title != "" && SoundCloud.MyTracklist[playlist.SelectedIndex].title != null)
             {
                 filename = String.Format("{0} - {1}.mp3", SoundCloud.MyTracklist[playlist.SelectedIndex].artist, SoundCloud.MyTracklist[playlist.SelectedIndex].title);
             }
             else
             {
                 filename = String.Format("{0}.mp3", SoundCloud.MyTracklist[playlist.SelectedIndex].buferTitle);
             }
             UrlDownloader.Downloader(SoundCloud.MyTracklist[playlist.SelectedIndex].stream_url + "?client_id=" + SoundCloud.clientID, directory, filename);
             UrlDownloader.webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted;
         }
     }
 }
Beispiel #22
0
        protected MapScrapeResult ParseDetailsUSA(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 class=\"fn org\">(?<value>.*?)</h1>")).Trim();
                string text2 = base.ExtractValue(text, "<p class=\"primary-location\">(?<value>.*?)</p>");
                if (string.IsNullOrEmpty(text2))
                {
                    mapScrapeResult.Address = base.ExtractValue(text, "<span class=\"listing-address adr\">(?<value>.*?)</span>").Trim();
                }
                else
                {
                    mapScrapeResult.Address = base.ExtractValue(text2, "<span class=\"street-address\">(?<value>.*?)</span>").Trim();
                    mapScrapeResult.City    = base.ExtractValue(text2, "<span class=\"locality\">(?<value>.*?)</span>");
                    mapScrapeResult.Region  = base.ExtractValue(text2, "<span class=\"region\">(?<value>.*?)</span>");
                    mapScrapeResult.ZipCode = base.ExtractValue(text2, "<span class=\"postal-code\">(?<value>.*?)</span>");
                }
                mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Phone = base.ExtractValue(text, "<p class=\"phone\">(?<value>.*?)</p>");
                string text3 = base.ExtractValue(text, "<ul class=\"feature-links blue-arrow\">(?<value>.*?)</ul>").Trim();
                mapScrapeResult.Website = base.ExtractValue(text3, "href=\"(?<value>.*?)\".*?Visit Website</a>").Trim();
                if (mapScrapeResult.Website.StartsWith("mailto:"))
                {
                    mapScrapeResult.Website = string.Empty;
                }
                mapScrapeResult.Map       = string.Format("http://www.yellowpages.com{0}", base.ExtractValue(text, "<div id=\"mip-minimap\">.*?href=\"(?<value>.*?)\".*?</div>").Trim());
                mapScrapeResult.Latitude  = base.ExtractValue(text, "<span class=\"latitude\" id=\"map-latitude\">(?<value>.*?)</span>");
                mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\" id=\"map-longitude\">(?<value>.*?)</span>");
            }
            return(mapScrapeResult);
        }
        protected MapScrapeResult ParseDetailsUK(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline = base.ExtractValue(text, "<h1 class=\"org fn n\">(?<value>.*?)</h1>");
                mapScrapeResult.Address  = base.ExtractValue(text, "<p class=\"street-address\">(?<value>.*?)</p>");
                mapScrapeResult.ZipCode  = base.ExtractValue(text, "<span class=\"postal-code\">(?<value>.*?)</span>");
                mapScrapeResult.Address  = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Trim();
                List <string> list = new List <string>(mapScrapeResult.Address.Split(new string[]
                {
                    "\t",
                    "\n",
                    ","
                }, StringSplitOptions.RemoveEmptyEntries));
                list.RemoveAll((string s) => s.Trim() == string.Empty);
                mapScrapeResult.Address = mapScrapeResult.Address.Replace("\t", string.Empty);
                mapScrapeResult.Address = mapScrapeResult.Address.Replace("\n", string.Empty);
                if (list.Count > 1)
                {
                    mapScrapeResult.City = list[1];
                }
                string text2 = base.ExtractValue(text, "<p class=\"geo\">(?<value>.*?)</p>");
                mapScrapeResult.Latitude  = base.ExtractValue(text2, "<span class=\"latitude\">(?<value>.*?)</span>");
                mapScrapeResult.Longitude = base.ExtractValue(text2, "<span class=\"longitude\">(?<value>.*?)</span>");
                mapScrapeResult.Map       = base.ExtractValue(text, "<div id=\"ent-page-md-links\">.*?href=\"(?<value>.*?)\".*?</div>");
                mapScrapeResult.Email     = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Phone     = base.ExtractValue(text, "<h2 class=\"tel\">.*?<span class=\"value\">(?<value>.*?)</span>").Trim();
            }
            return(mapScrapeResult);
        }
        public static List <GoogleScholarScrapePaper> ScrapeUrl(string url)
        {
            List <GoogleScholarScrapePaper> gssps = new List <GoogleScholarScrapePaper>();

            try
            {
                WebHeaderCollection header_collection = new WebHeaderCollection();

                using (MemoryStream ms = UrlDownloader.DownloadWithBlocking(url, out header_collection))
                {
                    HtmlDocument doc = new HtmlDocument();
                    doc.Load(ms, System.Text.Encoding.UTF8, detectEncodingFromByteOrderMarks: false);

                    ScrapeDoc(doc, url, gssps);
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem parsing the GoogleScholar url {0}", url);
            }

            return(gssps);
        }
Beispiel #25
0
        protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"advert-content\">(?<text>.*?)</div>.*?(?<cta><div class=\"advert-cta\">.*?</div>){1}.*?<ul class=\"tabbed\">(?<tab>.*?)</ul>{1}", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                string          value           = match.Groups["text"].Value;
                string          value2          = match.Groups["cta"].Value;
                string          value3          = match.Groups["tab"].Value;
                MapScrapeResult mapScrapeResult = new MapScrapeResult();
                mapScrapeResult.AdUrl    = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"summaryTL\">.*?href=\"(?<value>.*?)\".*?</li>"));
                mapScrapeResult.Map      = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"mapTL\">.*?href=\"(?<value>.*?)\".*?</li>"));
                mapScrapeResult.Phone    = UrlDownloader.SkipHtmlTags(base.ExtractValue(value2, "<ul class=\"(tel-single|tel-multiple)\">.*?<strong>(?<value>.*?)</strong>.*?</ul>")).Trim();
                mapScrapeResult.Headline = base.ExtractValue(value, "<h2 class=\"coName\">(?<value>.*?)</h2>");
                mapScrapeResult.Website  = base.ExtractValue(mapScrapeResult.Headline, "href='(?<value>.*?)'").Trim();
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(mapScrapeResult.Headline).Trim();
                mapScrapeResult.Address  = UrlDownloader.SkipHtmlTags(base.ExtractValue(value, "<p class=\"address\">(?<value>.*?)</p>")).Trim();
                mapScrapeResult.ZipCode  = base.ExtractValue(mapScrapeResult.Address, "(?<value>[A-Z]{1,2}[0-9R][0-9A-Z]?\\s*?[0-9][ABD-HJLNP-UW-Z]{2})");
                string[] array = mapScrapeResult.Address.Split(new char[]
                {
                    ','
                });
                if (array.Length > 0)
                {
                    mapScrapeResult.Region = array[array.Length - 1];
                    if (mapScrapeResult.ZipCode.Length > 0)
                    {
                        mapScrapeResult.Region = mapScrapeResult.Region.Replace(mapScrapeResult.ZipCode, string.Empty).Trim();
                    }
                    else
                    {
                        mapScrapeResult.Region = mapScrapeResult.Region.Trim();
                    }
                }
                if (array.Length > 1)
                {
                    if (string.IsNullOrEmpty(mapScrapeResult.ZipCode))
                    {
                        mapScrapeResult.City = array[array.Length - 2].Trim();
                    }
                    else
                    {
                        mapScrapeResult.City = array[array.Length - 2].Replace(mapScrapeResult.ZipCode, string.Empty).Trim();
                    }
                }
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    mapScrapeResult.Category = Category.Name;
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf(">Next</a>") >= 0);

            return(result2);
        }
        static void Main(string[] args)
        {
            UrlDownloader d = new UrlDownloader("http://www.ynet.co.il", "myFile");

            d.Start();
        }
Beispiel #27
0
        protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum)
        {
            string text  = scLocation.Url.Replace(" ", "+");
            string text2 = scCategory.Url.Replace(" ", "+");
            string text3 = string.Empty;

            keyword = UrlDownloader.UrlEncode(keyword);
            string text4 = scLocation.Meta ?? string.Empty;
            string a;

            if ((a = text4) != null)
            {
                if (a == "canada")
                {
                    this.country = AbstractScraper.Country.Canada;
                    text2        = UrlDownloader.SkipHtmlTags(text2.Replace('-', ' '));
                    text3        = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}&where={1}", text2, text) : string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}+{1}&where={2}", text2, keyword, text));
                    goto IL_1D9;
                }
                if (a == "australia")
                {
                    if (base.Downloader != null)
                    {
                        base.Downloader.KeepAlive = false;
                    }
                    this.country = AbstractScraper.Country.Australia;
                    text3        = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com.au/search/listings?clue={1}&locationClue={0}&x=0&y=0&pageNumber={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com.au/search/listings?clue={1}+{3}&locationClue={0}&x=0&y=0&pageNumber={2}", new object[]
                    {
                        text,
                        text2,
                        pageNum,
                        keyword
                    }));
                    goto IL_1D9;
                }
                if (a == "uk")
                {
                    this.country = AbstractScraper.Country.UK;
                    text3        = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}&location={1}&pageNum={2}", text2, text, pageNum) : string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}+{1}&location={2}&pageNum={3}", new object[]
                    {
                        text2,
                        keyword,
                        text,
                        pageNum
                    }));
                    goto IL_1D9;
                }
            }
            text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com/{0}/{1}?page={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com/{0}/{1}?q={2}&page={3}", new object[]
            {
                text,
                text2,
                keyword,
                pageNum
            }));
            this.country = AbstractScraper.Country.USA;
IL_1D9:
            if (base.Radius > 0f)
            {
                text3 += string.Format("&refinements[radius]={0}", base.Radius.ToString("#0.##"));
            }
            return(text3);
        }
 public UrlDownloaderTest()
 {
     _downloader = new UrlDownloader(_webRequestExecutor, _webRequestFactory);
 }
Beispiel #29
0
 public CraigslistScraper(UrlDownloader downloader, ScrapeResultCallback callback) : base(downloader, callback)
 {
     this.initialPageNum        = -1;
     this.campaign.SearchEngine = 0;
 }
 public void LoadFromUrl(Uri url)
 {
     LoadRaw(UrlDownloader.Download(url));
 }