示例#1
0
        public override string[] ExtractTargets(string target)
        {
            List <string> results = new List <string>();
            Url           url     = Url.FromUri(target);
            CQ            q       = CQ.Create(url.Html);

            q["a"].Each((i, d) =>
            {
                string href = d.Attributes["href"];
                if (href != null && !href.Equals("#"))
                {
                    if (!Uri.IsWellFormedUriString(href, UriKind.Absolute))
                    {
                        try
                        {
                            href = System.IO.Path.Combine(Root, href);
                        }
                        catch //(Exception ex)
                        {
                            return;
                        }
                        if (!Uri.IsWellFormedUriString(href, UriKind.Absolute))
                        {
                            return;
                        }
                    }

                    results.Add(href);
                }
            });

            return(results.ToArray());
        }
示例#2
0
        public override bool WasProcessed(string target = "")
        {
            Url url = Url.FromUri(target);

            if (url.Id == null)
            {
                return(false);
            }
            return(Image.OneWhere(c => c.UrlId == url.Id) != null);
        }
示例#3
0
        /// <summary>
        /// Reads the target and saves the url of any img tag it finds
        /// </summary>
        /// <param name="target"></param>
        public override void ProcessTarget(string target)
        {
            Url url = Url.FromUri(target);//new Url(target);

            if (url.Id == null)
            {
                url.Save();
            }
            CQ q = CQ.Create(url.Html);

            q["img"].Each((i, d) =>
            {
                try
                {
                    string imgUrl = d.Attributes["src"];

                    if (Uri.IsWellFormedUriString(imgUrl, UriKind.Relative))
                    {
                        imgUrl = $"{url.ProtocolOfProtocolId.Value}://{url.DomainOfDomainId.Value}{url.PathOfPathId.Value}{imgUrl}";
                    }
                    Url image = Url.FromUri(new Uri(imgUrl), true);// new Url(imgUrl);
                    Image img = Image.OneWhere(c => c.UrlId == image.Id);
                    if (img == null)
                    {
                        Crawler cr    = Crawler.OneWhere(c => c.Name == this.Name);
                        cr.RootUrl    = target;
                        img           = new Image();
                        img.UrlId     = image.Id;
                        img.Date      = DateTime.UtcNow;
                        img.CrawlerId = cr.Id;
                        img.Save();
                    }

                    if (OnImageFound != null)
                    {
                        OnImageFound(url, imgUrl);
                    }
                }
                catch (Exception ex)
                {
                    Logging.Log.AddEntry("Error occurred in image crawler: {0}", ex, ex.Message);
                }
            });
        }