Exemplo n.º 1
0
        protected void Save(FeedRequest feedRequest, Response response)
        {
            if (response == null)
            {
                Logger.GetLogger(baseUrl).Error(feedRequest.Request.Uri + " response save response is null.");
                return;
            }
            var request = feedRequest.Request;
            var content = Convert(response.Data.ToString(), Encoding.GetEncoding(response.Charset), Encoding.UTF8);

            var snap = new FeedSnapshot
            {
                Url             = request.Uri.ToString(),
                Content         = content,
                RuiJiExpression = feedRequest.Expression
            };

            var json = JsonConvert.SerializeObject(snap, Formatting.Indented);

            var fileName = Path.Combine(snapshotPath, feedRequest.Setting.Id + "_" + DateTime.Now.Ticks + ".json");

            if (feedRequest.Setting.Delay > 0)
            {
                fileName = Path.Combine(delayPath, feedRequest.Setting.Id + "_" + DateTime.Now.AddMinutes(feedRequest.Setting.Delay).Ticks + ".json");
            }

            Logger.GetLogger(baseUrl).Info(request.Uri + " response save to " + fileName);
            File.WriteAllText(fileName, json, Encoding.UTF8);
        }
Exemplo n.º 2
0
        public FeedSnapshot DoTask(FeedModel feed, bool persistence = false)
        {
            try
            {
                var request = new Request(feed.Address);
                if (feed.Headers != null)
                {
                    request.Headers = feed.Headers;
                }
                request.Headers.Add(new WebHeader("Referer", request.Uri.AbsoluteUri));
                request.Method = feed.Method;
                if (feed.Method == "POST" && !string.IsNullOrEmpty(feed.PostParam))
                {
                    request.PostParam = feed.PostParam;
                }

                var response = new RuiJi.Net.NodeVisitor.Crawler().Request(request);

                if (response != null && response.StatusCode == HttpStatusCode.OK)
                {
                    var content = Convert(response.Data.ToString(), Encoding.GetEncoding(response.Charset), Encoding.UTF8);

                    var snap = new FeedSnapshot
                    {
                        Url             = feed.Address,
                        Content         = content,
                        Type            = feed.Type,
                        BlockExpression = feed.BlockExpression,
                        RuiJiExpression = feed.RuiJiExpression
                    };

                    if (persistence)
                    {
                        var json = JsonConvert.SerializeObject(snap, Formatting.Indented);

                        var fileName = baseDir + @"snapshot\" + feed.Id + "_" + DateTime.Now.Ticks + ".json";
                        if (feed.Delay > 0)
                        {
                            fileName = baseDir + @"delay\" + feed.Id + "_" + DateTime.Now.AddMinutes(feed.Delay).Ticks + ".json";
                        }

                        File.WriteAllText(fileName, json, Encoding.UTF8);
                    }

                    return(snap);
                }
            }
            catch (Exception ex)
            {
            }

            return(null);
        }
Exemplo n.º 3
0
        public List <string> ExtractAddress(FeedSnapshot feed)
        {
            var block = new ExtractBlock();

            block.TileSelector.Selectors.Add(new CssSelector("a", "href"));

            if (feed.UseBlock)
            {
                if (!string.IsNullOrEmpty(feed.BlockExpression))
                {
                    block = JsonConvert.DeserializeObject <ExtractBlock>(feed.BlockExpression);
                }
            }
            else
            {
                if (!string.IsNullOrEmpty(feed.RuiJiExpression))
                {
                    block.TileSelector.Selectors.Clear();

                    var parser = new RuiJiParser();

                    var s = RuiJiBlockParser.ParserBase(feed.RuiJiExpression).Selectors;
                    block.TileSelector.Selectors.AddRange(s);
                }
            }

            var result  = RuiJiExtractor.Extract(feed.Content, block);
            var results = new List <string>();

            if (result.Tiles != null)
            {
                foreach (var item in result.Tiles)
                {
                    var href = item.Content.ToString();
                    if (href.Contains("#"))
                    {
                        href = href.Substring(0, href.IndexOf('#'));
                    }
                    if (Uri.IsWellFormedUriString(href, UriKind.Relative))
                    {
                        href = new Uri(new Uri(feed.Url), href).AbsoluteUri.ToString();
                    }
                    results.Add(href);
                }
            }

            return(results.Distinct().ToList());
        }
Exemplo n.º 4
0
        public FeedSnapshot DoTask(FeedModel feed, bool persistence = false)
        {
            try
            {
                Logger.GetLogger(baseUrl).Info("do task -> request address " + feed.Address);

                var request = new Request(feed.Address);
                request.RunJS = (feed.RunJS == Status.ON);
                if (feed.Headers != null)
                {
                    request.Headers = feed.Headers;

                    if (request.Headers.Count(m => m.Name == "Referer") == 0)
                    {
                        request.Headers.Add(new WebHeader("Referer", request.Uri.AbsoluteUri));
                    }
                }

                request.Method = feed.Method;
                if (feed.Method == "POST" && !string.IsNullOrEmpty(feed.Data))
                {
                    request.Data = feed.Data;
                }

                var ua = UALiteDb.GetOne();
                if (!string.IsNullOrEmpty(ua))
                {
                    request.Headers.Add(new WebHeader("User-Agent", ua));
                }

                var response = NodeVisitor.Crawler.Request(request);

                if (response != null)
                {
                    Logger.GetLogger(baseUrl).Info("request " + feed.Address + " response code is " + response.StatusCode);
                }
                if (response == null)
                {
                    Logger.GetLogger(baseUrl).Error("request " + feed.Address + " response is null");
                }

                if (response != null && response.StatusCode == HttpStatusCode.OK)
                {
                    var content = Convert(response.Data.ToString(), Encoding.GetEncoding(response.Charset), Encoding.UTF8);

                    var snap = new FeedSnapshot
                    {
                        Url             = feed.Address,
                        Content         = content,
                        Type            = feed.Type,
                        BlockExpression = feed.BlockExpression,
                        RuiJiExpression = feed.RuiJiExpression
                    };

                    if (persistence)
                    {
                        var json = JsonConvert.SerializeObject(snap, Formatting.Indented);

                        var fileName = baseDir + @"snapshot\" + feed.Id + "_" + DateTime.Now.Ticks + ".json";
                        if (feed.Delay > 0)
                        {
                            fileName = baseDir + @"delay\" + feed.Id + "_" + DateTime.Now.AddMinutes(feed.Delay).Ticks + ".json";
                        }

                        Logger.GetLogger(baseUrl).Info(feed.Address + " response save to " + fileName);
                        File.WriteAllText(fileName, json, Encoding.UTF8);
                    }

                    return(snap);
                }
            }
            catch (Exception ex)
            {
                Logger.GetLogger(baseUrl).Info("do task -> request address failed " + ex.Message);
            }

            return(null);
        }