private void WarmupUrl(string baseUrl, string url, ICollection <ReportEntry> reportEntries)
        {
            var relativeUrl = url.Trim().Replace(baseUrl, string.Empty);

            try {
                var filename = WarmupUtility.EncodeUrl(url.TrimEnd('/'));
                var path     = _appDataFolder.Combine(BaseFolder, filename);

                var download = _webDownloader.Download(url);

                if (download != null)
                {
                    if (download.StatusCode == HttpStatusCode.OK)
                    {
                        // success
                        _appDataFolder.CreateFile(path, download.Content);

                        reportEntries.Add(new ReportEntry {
                            RelativeUrl = relativeUrl,
                            Filename    = filename,
                            StatusCode  = (int)download.StatusCode,
                            CreatedUtc  = _clock.UtcNow
                        });

                        // if the base url contains http://www, then also render the www-less one);

                        if (url.StartsWith("http://www.", StringComparison.OrdinalIgnoreCase))
                        {
                            url      = "http://" + url.Substring("http://www.".Length);
                            filename = WarmupUtility.EncodeUrl(url.TrimEnd('/'));
                            path     = _appDataFolder.Combine(BaseFolder, filename);
                            _appDataFolder.CreateFile(path, download.Content);
                        }
                    }
                    else
                    {
                        reportEntries.Add(new ReportEntry {
                            RelativeUrl = relativeUrl,
                            Filename    = filename,
                            StatusCode  = (int)download.StatusCode,
                            CreatedUtc  = _clock.UtcNow
                        });
                    }
                }
                else
                {
                    // download failed
                    reportEntries.Add(new ReportEntry {
                        RelativeUrl = relativeUrl,
                        Filename    = filename,
                        StatusCode  = 0,
                        CreatedUtc  = _clock.UtcNow
                    });
                }
            } catch (Exception e) {
                Logger.Error(e, "Could not extract warmup page content for: ", url);
            }
        }
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            AspectF.Define.
            NotNull(crawler, "crawler").
            NotNull(propertyBag, "propertyBag");

            string content = propertyBag.Text;

            if (content.IsNullOrEmpty())
            {
                return;
            }

            string contentLookupText         = content.Max(MaxPostSize);
            string encodedRequestUrlFragment =
                "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}".FormatWith(contentLookupText);

            m_Logger.Verbose("Google language detection using: {0}", encodedRequestUrlFragment);

            try
            {
                IWebDownloader downloader = NCrawlerModule.Container.Resolve <IWebDownloader>();
                PropertyBag    result     = downloader.Download(new CrawlStep(new Uri(encodedRequestUrlFragment), 0), null, DownloadMethod.GET);
                if (result.IsNull())
                {
                    return;
                }

                using (Stream responseReader = result.GetResponse())
                    using (StreamReader reader = new StreamReader(responseReader))
                    {
                        string json = reader.ReadLine();
                        using (MemoryStream ms = new MemoryStream(Encoding.Unicode.GetBytes(json)))
                        {
                            DataContractJsonSerializer ser =
                                new DataContractJsonSerializer(typeof(LanguageDetector));
                            LanguageDetector detector = ser.ReadObject(ms) as LanguageDetector;

                            if (!detector.IsNull())
                            {
                                CultureInfo culture = CultureInfo.GetCultureInfo(detector.responseData.language);
                                propertyBag["Language"].Value        = detector.responseData.language;
                                propertyBag["LanguageCulture"].Value = culture;
                            }
                        }
                    }
            }
            catch (Exception ex)
            {
                m_Logger.Error("Error during google language detection, the error was: {0}", ex.ToString());
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Download content from a url
        /// </summary>
        /// <param name="step">Step in crawler that contains url to download</param>
        /// <returns>Downloaded content</returns>
        private PropertyBag Download(CrawlStep step)
        {
            try
            {
                IWebDownloader webDownloader = m_DownloaderFactory.GetDownloader();
                m_Logger.Verbose("Downloading {0}", step.Uri);
                return(webDownloader.Download(step, DownloadMethod.Get));
            }
            catch (Exception ex)
            {
                OnDownloadException(ex, step);
            }

            return(null);
        }
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            AspectF.Define.
            NotNull(crawler, "crawler").
            NotNull(propertyBag, "propertyBag");

            string content = propertyBag.Text;

            if (content.IsNullOrEmpty())
            {
                return;
            }

            string contentLookupText = content.Length > MaxPostSize
                                ? content.Substring(0, MaxPostSize).Trim()
                                : content.Trim();

            string encodedRequestUrlFragment =
                "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}".FormatWith(contentLookupText);

            IWebDownloader downloader = m_DownloaderFactory.GetDownloader();
            PropertyBag    result     = downloader.Download(new CrawlStep(new Uri(encodedRequestUrlFragment), 0), DownloadMethod.Get);

            using (MemoryStream responseReader = result.GetResponseStream())
                using (StreamReader reader = new StreamReader(responseReader))
                {
                    string json = reader.ReadLine();
                    using (MemoryStream ms = new MemoryStream(Encoding.Unicode.GetBytes(json)))
                    {
                        DataContractJsonSerializer ser =
                            new DataContractJsonSerializer(typeof(LanguageDetector));
                        LanguageDetector detector = ser.ReadObject(ms) as LanguageDetector;

                        if (!detector.IsNull())
                        {
                            CultureInfo culture = CultureInfo.GetCultureInfo(detector.responseData.language);
                            propertyBag["Language"].Value        = detector.responseData.language;
                            propertyBag["LanguageCulture"].Value = culture;
                        }
                    }
                }
        }
        public void EnsureGenerate()
        {
            var baseUrl = _orchardServices.WorkContext.CurrentSite.BaseUrl;
            var part    = _orchardServices.WorkContext.CurrentSite.As <WarmupSettingsPart>();

            // do nothing while the base url setting is not defined
            if (String.IsNullOrWhiteSpace(baseUrl))
            {
                return;
            }

            // prevent multiple appdomains from rebuilding the static page concurrently (e.g., command line)
            ILockFile lockFile = null;

            if (!_lockFileManager.TryAcquireLock(_lockFilename, ref lockFile))
            {
                return;
            }

            using (lockFile) {
                // check if we need to regenerate the pages by reading the last time it has been done
                // 1- if the warmup file doesn't exists, generate the pages
                // 2- otherwise, if the scheduled generation option is on, check if the delay is over
                if (_appDataFolder.FileExists(_warmupPath))
                {
                    try {
                        var warmupContent = _appDataFolder.ReadFile(_warmupPath);
                        var expired       = XmlConvert.ToDateTimeOffset(warmupContent).AddMinutes(part.Delay);
                        if (expired > _clock.UtcNow)
                        {
                            return;
                        }
                    }
                    catch {
                        // invalid file, delete continue processing
                        _appDataFolder.DeleteFile(_warmupPath);
                    }
                }

                // delete peviously generated pages, by reading the Warmup Report file
                try {
                    var encodedPrefix = WarmupUtility.EncodeUrl("http://www.");

                    foreach (var reportEntry in _reportManager.Read())
                    {
                        try {
                            // use FileName as the SiteBaseUrl could have changed in the meantime
                            var path = _appDataFolder.Combine(BaseFolder, reportEntry.Filename);
                            _appDataFolder.DeleteFile(path);

                            // delete the www-less version too if it's available
                            if (reportEntry.Filename.StartsWith(encodedPrefix, StringComparison.OrdinalIgnoreCase))
                            {
                                var filename = WarmupUtility.EncodeUrl("http://") + reportEntry.Filename.Substring(encodedPrefix.Length);
                                path = _appDataFolder.Combine(BaseFolder, filename);
                                _appDataFolder.DeleteFile(path);
                            }
                        }
                        catch (Exception e) {
                            Logger.Error(e, "Could not delete specific warmup file: ", reportEntry.Filename);
                        }
                    }
                }
                catch (Exception e) {
                    Logger.Error(e, "Could not read warmup report file");
                }

                var reportEntries = new List <ReportEntry>();

                if (!String.IsNullOrEmpty(part.Urls))
                {
                    // loop over every relative url to generate the contents
                    using (var urlReader = new StringReader(part.Urls)) {
                        string relativeUrl;
                        while (null != (relativeUrl = urlReader.ReadLine()))
                        {
                            if (String.IsNullOrWhiteSpace(relativeUrl))
                            {
                                continue;
                            }

                            string url = null;
                            relativeUrl = relativeUrl.Trim();

                            try {
                                url = VirtualPathUtility.RemoveTrailingSlash(baseUrl) + relativeUrl;
                                var filename = WarmupUtility.EncodeUrl(url.TrimEnd('/'));
                                var path     = _appDataFolder.Combine(BaseFolder, filename);

                                var download = _webDownloader.Download(url);

                                if (download != null)
                                {
                                    if (download.StatusCode == HttpStatusCode.OK)
                                    {
                                        // success
                                        _appDataFolder.CreateFile(path, download.Content);

                                        reportEntries.Add(new ReportEntry {
                                            RelativeUrl = relativeUrl,
                                            Filename    = filename,
                                            StatusCode  = (int)download.StatusCode,
                                            CreatedUtc  = _clock.UtcNow
                                        });

                                        // if the base url contains http://www, then also render the www-less one);

                                        if (url.StartsWith("http://www.", StringComparison.OrdinalIgnoreCase))
                                        {
                                            url      = "http://" + url.Substring("http://www.".Length);
                                            filename = WarmupUtility.EncodeUrl(url.TrimEnd('/'));
                                            path     = _appDataFolder.Combine(BaseFolder, filename);
                                            _appDataFolder.CreateFile(path, download.Content);
                                        }
                                    }
                                    else
                                    {
                                        reportEntries.Add(new ReportEntry {
                                            RelativeUrl = relativeUrl,
                                            Filename    = filename,
                                            StatusCode  = (int)download.StatusCode,
                                            CreatedUtc  = _clock.UtcNow
                                        });
                                    }
                                }
                                else
                                {
                                    // download failed
                                    reportEntries.Add(new ReportEntry {
                                        RelativeUrl = relativeUrl,
                                        Filename    = filename,
                                        StatusCode  = 0,
                                        CreatedUtc  = _clock.UtcNow
                                    });
                                }
                            }
                            catch (Exception e) {
                                Logger.Error(e, "Could not extract warmup page content for: ", url);
                            }
                        }
                    }
                }

                _reportManager.Create(reportEntries);

                // finally write the time the generation has been executed
                _appDataFolder.CreateFile(_warmupPath, XmlConvert.ToString(_clock.UtcNow, XmlDateTimeSerializationMode.Utc));
            }
        }
Esempio n. 6
0
 public void ShouldReturnNullWhenUrlIsEmpty()
 {
     Assert.That(_webDownloader.Download(null), Is.Null);
     Assert.That(_webDownloader.Download(""), Is.Null);
     Assert.That(_webDownloader.Download(" "), Is.Null);
 }