private void WarmupUrl(string baseUrl, string url, ICollection <ReportEntry> reportEntries) { var relativeUrl = url.Trim().Replace(baseUrl, string.Empty); try { var filename = WarmupUtility.EncodeUrl(url.TrimEnd('/')); var path = _appDataFolder.Combine(BaseFolder, filename); var download = _webDownloader.Download(url); if (download != null) { if (download.StatusCode == HttpStatusCode.OK) { // success _appDataFolder.CreateFile(path, download.Content); reportEntries.Add(new ReportEntry { RelativeUrl = relativeUrl, Filename = filename, StatusCode = (int)download.StatusCode, CreatedUtc = _clock.UtcNow }); // if the base url contains http://www, then also render the www-less one); if (url.StartsWith("http://www.", StringComparison.OrdinalIgnoreCase)) { url = "http://" + url.Substring("http://www.".Length); filename = WarmupUtility.EncodeUrl(url.TrimEnd('/')); path = _appDataFolder.Combine(BaseFolder, filename); _appDataFolder.CreateFile(path, download.Content); } } else { reportEntries.Add(new ReportEntry { RelativeUrl = relativeUrl, Filename = filename, StatusCode = (int)download.StatusCode, CreatedUtc = _clock.UtcNow }); } } else { // download failed reportEntries.Add(new ReportEntry { RelativeUrl = relativeUrl, Filename = filename, StatusCode = 0, CreatedUtc = _clock.UtcNow }); } } catch (Exception e) { Logger.Error(e, "Could not extract warmup page content for: ", url); } }
public void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); string content = propertyBag.Text; if (content.IsNullOrEmpty()) { return; } string contentLookupText = content.Max(MaxPostSize); string encodedRequestUrlFragment = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}".FormatWith(contentLookupText); m_Logger.Verbose("Google language detection using: {0}", encodedRequestUrlFragment); try { IWebDownloader downloader = NCrawlerModule.Container.Resolve <IWebDownloader>(); PropertyBag result = downloader.Download(new CrawlStep(new Uri(encodedRequestUrlFragment), 0), null, DownloadMethod.GET); if (result.IsNull()) { return; } using (Stream responseReader = result.GetResponse()) using (StreamReader reader = new StreamReader(responseReader)) { string json = reader.ReadLine(); using (MemoryStream ms = new MemoryStream(Encoding.Unicode.GetBytes(json))) { DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(LanguageDetector)); LanguageDetector detector = ser.ReadObject(ms) as LanguageDetector; if (!detector.IsNull()) { CultureInfo culture = CultureInfo.GetCultureInfo(detector.responseData.language); propertyBag["Language"].Value = detector.responseData.language; propertyBag["LanguageCulture"].Value = culture; } } } } catch (Exception ex) { m_Logger.Error("Error during google language detection, the error was: {0}", ex.ToString()); } }
/// <summary> /// Download content from a url /// </summary> /// <param name="step">Step in crawler that contains url to download</param> /// <returns>Downloaded content</returns> private PropertyBag Download(CrawlStep step) { try { IWebDownloader webDownloader = m_DownloaderFactory.GetDownloader(); m_Logger.Verbose("Downloading {0}", step.Uri); return(webDownloader.Download(step, DownloadMethod.Get)); } catch (Exception ex) { OnDownloadException(ex, step); } return(null); }
public void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); string content = propertyBag.Text; if (content.IsNullOrEmpty()) { return; } string contentLookupText = content.Length > MaxPostSize ? content.Substring(0, MaxPostSize).Trim() : content.Trim(); string encodedRequestUrlFragment = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}".FormatWith(contentLookupText); IWebDownloader downloader = m_DownloaderFactory.GetDownloader(); PropertyBag result = downloader.Download(new CrawlStep(new Uri(encodedRequestUrlFragment), 0), DownloadMethod.Get); using (MemoryStream responseReader = result.GetResponseStream()) using (StreamReader reader = new StreamReader(responseReader)) { string json = reader.ReadLine(); using (MemoryStream ms = new MemoryStream(Encoding.Unicode.GetBytes(json))) { DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(LanguageDetector)); LanguageDetector detector = ser.ReadObject(ms) as LanguageDetector; if (!detector.IsNull()) { CultureInfo culture = CultureInfo.GetCultureInfo(detector.responseData.language); propertyBag["Language"].Value = detector.responseData.language; propertyBag["LanguageCulture"].Value = culture; } } } }
public void EnsureGenerate() { var baseUrl = _orchardServices.WorkContext.CurrentSite.BaseUrl; var part = _orchardServices.WorkContext.CurrentSite.As <WarmupSettingsPart>(); // do nothing while the base url setting is not defined if (String.IsNullOrWhiteSpace(baseUrl)) { return; } // prevent multiple appdomains from rebuilding the static page concurrently (e.g., command line) ILockFile lockFile = null; if (!_lockFileManager.TryAcquireLock(_lockFilename, ref lockFile)) { return; } using (lockFile) { // check if we need to regenerate the pages by reading the last time it has been done // 1- if the warmup file doesn't exists, generate the pages // 2- otherwise, if the scheduled generation option is on, check if the delay is over if (_appDataFolder.FileExists(_warmupPath)) { try { var warmupContent = _appDataFolder.ReadFile(_warmupPath); var expired = XmlConvert.ToDateTimeOffset(warmupContent).AddMinutes(part.Delay); if (expired > _clock.UtcNow) { return; } } catch { // invalid file, delete continue processing _appDataFolder.DeleteFile(_warmupPath); } } // delete peviously generated pages, by reading the Warmup Report file try { var encodedPrefix = WarmupUtility.EncodeUrl("http://www."); foreach (var reportEntry in _reportManager.Read()) { try { // use FileName as the SiteBaseUrl could have changed in the meantime var path = _appDataFolder.Combine(BaseFolder, reportEntry.Filename); _appDataFolder.DeleteFile(path); // delete the www-less version too if it's available if (reportEntry.Filename.StartsWith(encodedPrefix, StringComparison.OrdinalIgnoreCase)) { var filename = WarmupUtility.EncodeUrl("http://") + reportEntry.Filename.Substring(encodedPrefix.Length); path = _appDataFolder.Combine(BaseFolder, filename); _appDataFolder.DeleteFile(path); } } catch (Exception e) { Logger.Error(e, "Could not delete specific warmup file: ", reportEntry.Filename); } } } catch (Exception e) { Logger.Error(e, "Could not read warmup report file"); } var reportEntries = new List <ReportEntry>(); if (!String.IsNullOrEmpty(part.Urls)) { // loop over every relative url to generate the contents using (var urlReader = new StringReader(part.Urls)) { string relativeUrl; while (null != (relativeUrl = urlReader.ReadLine())) { if (String.IsNullOrWhiteSpace(relativeUrl)) { continue; } string url = null; relativeUrl = relativeUrl.Trim(); try { url = VirtualPathUtility.RemoveTrailingSlash(baseUrl) + relativeUrl; var filename = WarmupUtility.EncodeUrl(url.TrimEnd('/')); var path = _appDataFolder.Combine(BaseFolder, filename); var download = _webDownloader.Download(url); if (download != null) { if (download.StatusCode == HttpStatusCode.OK) { // success _appDataFolder.CreateFile(path, download.Content); reportEntries.Add(new ReportEntry { RelativeUrl = relativeUrl, Filename = filename, StatusCode = (int)download.StatusCode, CreatedUtc = _clock.UtcNow }); // if the base url contains http://www, then also render the www-less one); if (url.StartsWith("http://www.", StringComparison.OrdinalIgnoreCase)) { url = "http://" + url.Substring("http://www.".Length); filename = WarmupUtility.EncodeUrl(url.TrimEnd('/')); path = _appDataFolder.Combine(BaseFolder, filename); _appDataFolder.CreateFile(path, download.Content); } } else { reportEntries.Add(new ReportEntry { RelativeUrl = relativeUrl, Filename = filename, StatusCode = (int)download.StatusCode, CreatedUtc = _clock.UtcNow }); } } else { // download failed reportEntries.Add(new ReportEntry { RelativeUrl = relativeUrl, Filename = filename, StatusCode = 0, CreatedUtc = _clock.UtcNow }); } } catch (Exception e) { Logger.Error(e, "Could not extract warmup page content for: ", url); } } } } _reportManager.Create(reportEntries); // finally write the time the generation has been executed _appDataFolder.CreateFile(_warmupPath, XmlConvert.ToString(_clock.UtcNow, XmlDateTimeSerializationMode.Utc)); } }
public void ShouldReturnNullWhenUrlIsEmpty() { Assert.That(_webDownloader.Download(null), Is.Null); Assert.That(_webDownloader.Download(""), Is.Null); Assert.That(_webDownloader.Download(" "), Is.Null); }