Beispiel #1
0
        public override IEnumerable<WarningOccurrenceDto> Analyze(ISqlRepository sqlRepository, UrlDto urlDto)
        {
            var warningOccurrencesList = new List<WarningOccurrenceDto>();

            if (urlDto.Status != HttpStatusCode.OK)
            {
                return warningOccurrencesList;
            }

            foreach (var link in sqlRepository.GetWebPageLinks(urlDto.UrlId.Value))
            {
                if (link.Status == HttpStatusCode.NotFound)
                {
                    IWarning warning = GetWarning(typeof(BrokenLinkWarning));

                    warningOccurrencesList.Add(new WarningOccurrenceDto()
                    {
                        UrlId = (long)urlDto.UrlId,
                        Message = String.Format(warning.Description, link.Url),
                        WarningType = warning
                    }
                    );
                }
            }

            return warningOccurrencesList;
        }
Beispiel #2
0
        public override IEnumerable<WarningOccurrenceDto> Analyze(ISqlRepository sqlRepository, UrlDto urlDto)
        {
            var warningOccurrencesList = new List<WarningOccurrenceDto>();

            if (urlDto.Contents == null)
            {
                _log.Debug("urlDto is empty for url:" + urlDto.Url);
                return warningOccurrencesList;
            }

            if (urlDto.Status != HttpStatusCode.OK)
            {
                return warningOccurrencesList;
            }

            var title = urlDto.Contents.SingleOrDefault(element => element.ContentType == ContentTypesEnum.Title);

            _log.Debug(String.Format("Analyze title '{0}' for url '{1}'",
                (title == null || title.Element == null) ? "" : title.Element,
                urlDto.Url));

            if (title == null || String.IsNullOrEmpty(title.Element))
            {
                IWarning warning = GetWarning(typeof(EmptyTitleWarning));
                warningOccurrencesList.Add(new WarningOccurrenceDto()
                    {
                        UrlId = (long) urlDto.UrlId,
                        Message = warning.Description,
                        WarningType = warning
                    }
                );
            }
            else
            {
                var elements = sqlRepository.GetContentForElementText(ContentTypesEnum.Title, title.Element);

                if (elements.Any())
                {
                    ContentDto targetContentDto = elements.FirstOrDefault(x => x.UrlId != urlDto.UrlId);

                    if (targetContentDto != null)
                    {
                        UrlDto targetUrlDto;
                        sqlRepository.FindUrl(targetContentDto.UrlId, out targetUrlDto);

                        IWarning warning = GetWarning(typeof(DuplicatedTitleWarning));
                        warningOccurrencesList.Add(new WarningOccurrenceDto()
                        {
                            UrlId = (long)urlDto.UrlId,
                            Message = String.Format(warning.Description, title.Element, targetUrlDto.Url),
                            WarningType = warning
                        }
                        );
                    }
                }
            }

            return warningOccurrencesList;
        }
Beispiel #3
0
 public void ProcessPage(ISqlRepository sqlRepository, UrlDto urlDto, Report report)
 {
     foreach (var rule in _rules)
     {
         var warningOccurrences = rule.Analyze(sqlRepository, urlDto);
         report.WarningOccurrences.AddRange(warningOccurrences);
     }
 }
Beispiel #4
0
        internal bool FindUrl(long urlId, out UrlDto urlDto)
        {
            urlDto = new UrlDto();
            bool success = false;
            string selectSql = "SELECT * FROM Urls where url_id='" + urlId + "'";

            using (var selectCommand = new SQLiteCommand(selectSql, _sqliteCon))
            {
                using (var dataReader = selectCommand.ExecuteReader())
                {
                    if (dataReader.Read())
                    {
                        MapDataReaderToUrlDto(dataReader, out urlDto);
                        success = true;
                    }
                }
            }
            return success;
        }
Beispiel #5
0
        internal ContentDto[] GetContentsForUrlDto(UrlDto urlDto)
        {
            var contents = new List<ContentDto>();
            var selectSql = "SELECT * FROM Contents where url_id='" + urlDto.UrlId + "'";

            using (var selectCommand = new SQLiteCommand(selectSql, _sqliteCon))
            {
                using (var dataReader = selectCommand.ExecuteReader())
                {
                    if (dataReader.Read())
                    {
                        ContentDto contentDto;
                        MapDataReaderToContentDto(dataReader, out contentDto);
                        contents.Add(contentDto);
                    }
                }
            }
            return contents.ToArray();
        }
Beispiel #6
0
        static void AddNewPage(CrawledPage crawledPage)
        {
            if (crawledPage.Uri.AbsoluteUri.StartsWith(_uriToCrawl.AbsoluteUri) == false)
                return;

            lock (_sqlRepository)
            {
                UrlDto urlDto;

                if (_sqlRepository.FindUrl(crawledPage.Uri.AbsoluteUri, out urlDto))
                {
                    // For pages that do not repond is empty
                    if (crawledPage.HttpWebResponse != null)
                    {
                        urlDto.Status = crawledPage.HttpWebResponse.StatusCode;

                        // This can be text/html or text/html;charset=utf-8
                        if (crawledPage.HttpWebResponse.ContentType.Contains("text/html"))
                        {
                            urlDto.IsWebPage = true;
                        }
                    }
                }
                else
                {
                    urlDto = new UrlDto
                        {
                            Url = crawledPage.Uri.AbsoluteUri,
                            Status = crawledPage.HttpWebResponse.StatusCode,
                            IsWebPage = true
                        };
                }

                var webPageParsing = new WebPageParsing(crawledPage);
                var list = GetLinks(webPageParsing, urlDto);
                urlDto.Links = list.ToArray();
                urlDto.Contents = GetContent(webPageParsing);
                _sqlRepository.InsertOrUpdateUrl(urlDto);
            }
        }
Beispiel #7
0
        static UrlDto InsertUrlDtoWithOneStatusCodeOKLink(ISqlRepository repository, HttpStatusCode httpStatusCode)
        {
            var urlDtoLink = new UrlDto
                {
                    Status = httpStatusCode
                };

            repository.InsertOrUpdateUrl(urlDtoLink);

            var urlDto = new UrlDto
            {
                Status = HttpStatusCode.OK,
                Links = new List<LinkDto>()
                        {
                            new LinkDto()
                                {
                                    TargetUrlId = (long) urlDtoLink.UrlId,

                                }
                        }.ToArray()
            };

            return repository.InsertOrUpdateUrl(urlDto);
        }
Beispiel #8
0
 public abstract IEnumerable<WarningOccurrenceDto> Analyze(ISqlRepository sqlRepository, UrlDto urlDto);
Beispiel #9
0
 private static void MapDataReaderToUrlDto(SQLiteDataReader dataReader, out UrlDto urlDto)
 {
     urlDto = new UrlDto();
     urlDto.UrlId = dataReader.GetInt64((int)UrlsField.UrlId);
     urlDto.Url = dataReader.GetString((int)UrlsField.Url);
     urlDto.Url = HttpUtility.UrlDecode(urlDto.Url);
     urlDto.Status = (HttpStatusCode) dataReader.GetInt32((int)UrlsField.Status);
     urlDto.IsWebPage = dataReader.GetInt32((int)UrlsField.IsWebPage) > 0;
 }
Beispiel #10
0
        internal UrlDto InsertOrUpdateUrl(UrlDto urlDto)
        {
            string sqlStatement;

            string urlEncoded = HttpUtility.UrlEncode(urlDto.Url);

            if (urlDto.UrlId.HasValue)
            {
                sqlStatement = String.Format("INSERT OR REPLACE INTO Urls (url_id, url, status, is_webpage) VALUES ('{0}', '{1}', '{2}', '{3}')",
                                             urlDto.UrlId, urlEncoded, urlDto.StatusRaw, urlDto.IsWebPage ? 1 : 0);

                ExecuteSqlCommand(sqlStatement);
            }
            else
            {
                sqlStatement = String.Format("INSERT INTO Urls (url, status, is_webpage) VALUES ('{0}', '{1}', '{2}')",
                                             urlEncoded, urlDto.StatusRaw, urlDto.IsWebPage ? 1 : 0);

                ExecuteSqlCommand(sqlStatement);
                urlDto.UrlId = GetLastInsertedRowId();
            }

            if (urlDto.Contents != null)
            {
                foreach (var content in urlDto.Contents)
                {
                    content.UrlId = urlDto.UrlId.HasValue ? urlDto.UrlId.Value : 0;
                    _contentDao.InsertOrUpdateContent(content);
                }
            }

            if (urlDto.Links != null)
            {
                foreach (var link in urlDto.Links)
                {
                    link.SourceUrlId = urlDto.UrlId.HasValue ? urlDto.UrlId.Value : 0;
                    _linkDao.AddLink(link);
                }
            }
            return urlDto;
        }
Beispiel #11
0
        private static List<LinkDto> GetLinks(WebPageParsing webPageParsing, UrlDto urlDto)
        {
            var links = webPageParsing.GetLinks();
            var list = new List<LinkDto>();

            foreach (Uri link in links)
            {
                UrlDto linkUrlDto;

                if (_sqlRepository.FindUrl(link.AbsoluteUri, out linkUrlDto) == false)
                {
                    linkUrlDto = new UrlDto {Url = link.AbsoluteUri};
                    linkUrlDto = _sqlRepository.InsertOrUpdateUrl(linkUrlDto);
                }

                list.Add(new LinkDto()
                    {
                        TargetUrlId = linkUrlDto.UrlId.HasValue ? linkUrlDto.UrlId.Value : 0
                    });
            }
            return list;
        }
Beispiel #12
0
 public bool FindUrl(long urlId, out UrlDto urlDto)
 {
     return _urlDao.FindUrl(urlId, out urlDto);
 }
Beispiel #13
0
 public bool FindUrl(string url, out UrlDto urlDto)
 {
     return _urlDao.FindUrl(url, out urlDto);
 }
Beispiel #14
0
 public UrlDto InsertOrUpdateUrl(UrlDto urlDto)
 {
     return _urlDao.InsertOrUpdateUrl(urlDto);
 }
Beispiel #15
0
        static UrlDto GetValidTitle(string title)
        {
            var urlDto = new UrlDto
                {
                    Status = HttpStatusCode.OK,
                    Contents = new List<ContentDto>()
                        {
                            new ContentDto()
                                {
                                    ContentType = ContentTypesEnum.Title,
                                    Element = title
                                }
                        }.ToArray()
                };

            return urlDto;
        }