Beispiel #1
0
        public static List <ExtractBlock> GetExtractBlock(string url)
        {
            var proxyUrl = ProxyManager.Instance.Elect(ProxyTypeEnum.Feed);

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("no available extracter proxy servers");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/fp/rule?url=" + url);

            restRequest.Method         = Method.GET;
            restRequest.JsonSerializer = new NewtonJsonSerializer();

            restRequest.Timeout = 15000;

            var restResponse = client.Execute(restRequest);

            var response = JsonConvert.DeserializeObject <List <ExtractBlock> >(restResponse.Content);

            return(response);
        }
Beispiel #2
0
        public static List <ExtractResult> Extract(ExtractRequest request)
        {
            if (NodeConfigurationSection.Standalone)
            {
                var result = RuiJiExtractor.Extract(request);
                return(result);
            }
            else
            {
                var proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);

                if (string.IsNullOrEmpty(proxyUrl))
                {
                    throw new Exception("no available Extractor proxy servers");
                }

                proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

                var client      = new RestClient("http://" + proxyUrl);
                var restRequest = new RestRequest("api/ep/extract");
                restRequest.Method         = Method.POST;
                restRequest.JsonSerializer = new NewtonJsonSerializer();

                var json = JsonConvert.SerializeObject(request);

                restRequest.AddJsonBody(json);
                restRequest.Timeout = 15000;

                var restResponse = client.Execute(restRequest);

                var response = JsonConvert.DeserializeObject <List <ExtractResult> >(restResponse.Content);

                return(response);
            }
        }
Beispiel #3
0
        public static string GetRandomSettingUA()
        {
            var proxyUrl = "";

            if (NodeConfigurationSection.Standalone)
            {
                proxyUrl = ConfigurationManager.AppSettings["RuiJiServer"];
            }
            else
            {
                proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
            }

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("get feedjobs: proxyUrl can't be null");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/setting/ua/random");

            restRequest.Method  = Method.GET;
            restRequest.Timeout = 15000;

            var restResponse = client.Execute(restRequest);

            return(restResponse.Content);
        }
Beispiel #4
0
 public NodeBase(string baseUrl, string zkServer, string proxyUrl = "")
 {
     this.BaseUrl   = IPHelper.FixLocalUrl(baseUrl);
     this.ZkServer  = IPHelper.FixLocalUrl(zkServer);
     this.ProxyUrl  = IPHelper.FixLocalUrl(proxyUrl);
     this.StartTime = DateTime.Now;
 }
Beispiel #5
0
        public static bool SaveContent(object content)
        {
            var proxyUrl = "";

            if (NodeConfigurationSection.Standalone)
            {
                proxyUrl = ConfigurationManager.AppSettings["RuiJiServer"];
            }
            else
            {
                proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
            }

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("no available Extractor proxy servers");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/fp/content/save");

            restRequest.Method = Method.POST;
            restRequest.AddJsonBody(content);
            restRequest.Timeout = 15000;

            var restResponse = client.Execute(restRequest);

            var response = JsonConvert.DeserializeObject <bool>(restResponse.Content);

            return(response);
        }
Beispiel #6
0
        public void Start(string baseUrl, string nodeType, string zkServer, string proxy = "")
        {
            Running = true;

            this.Port = baseUrl.Split(':')[1];

            this.baseUrl  = baseUrl;
            this.nodeType = nodeType;
            this.zkServer = zkServer;
            this.proxy    = proxy;

            baseUrl = IPHelper.FixLocalUrl(baseUrl);

            app = WebApp.Start <Startup>("http://" + baseUrl);

            switch (nodeType)
            {
            case "c":
            {
                Node = new CrawlerNode(baseUrl, zkServer, proxy);
                break;
            }

            case "cp":
            {
                Node = new CrawlerProxyNode(baseUrl, zkServer);
                break;
            }

            case "e":
            {
                Node = new ExtractorNode(baseUrl, zkServer, proxy);
                break;
            }

            case "ep":
            {
                Node = new ExtractorProxyNode(baseUrl, zkServer);
                break;
            }

            case "f":
            {
                Node = new FeedNode(baseUrl, zkServer, proxy);
                break;
            }

            case "fp":
            {
                Node = new FeedProxyNode(baseUrl, zkServer);
                break;
            }
            }

            Node.Start();

            resetEvent = new ManualResetEvent(false);
            resetEvent.WaitOne();
        }
Beispiel #7
0
        public DocumentServer(string baseUrl)
        {
            BaseUrl = IPHelper.FixLocalUrl(baseUrl);

            Port = 80;
            if (BaseUrl.IndexOf(":") != -1)
            {
                Port = Convert.ToInt32(BaseUrl.Split(':')[1]);
            }
        }
Beispiel #8
0
        public static void StartDocServer()
        {
            var baseUrl = ConfigurationManager.AppSettings["DocServer"];

            if (!string.IsNullOrEmpty(baseUrl))
            {
                baseUrl = IPHelper.FixLocalUrl(baseUrl);
                var app = WebApp.Start <DStartup>("http://" + baseUrl);
            }
        }
Beispiel #9
0
        public void StartStandalone(string baseUrl)
        {
            baseUrl = IPHelper.FixLocalUrl(baseUrl);

            app = WebApp.Start <Startup>("http://" + baseUrl);

            Node = new StandaloneNode(baseUrl);

            Node.Start();
        }
Beispiel #10
0
        public static Response Request(Request request)
        {
            if (RuiJiConfiguration.Standalone)
            {
                if (string.IsNullOrEmpty(request.Ip))
                {
                    var e = CrawlerServerManager.Instance.ElectIP(request.Uri);
                    if (e != null)
                    {
                        request.Ip = e.ClientIp;
                    }
                    else
                    {
                        request.Ip = IPHelper.GetDefaultIPAddress().ToString();
                    }
                }

                var crawler  = new RuiJiCrawler();
                var response = crawler.Request(request);

                var    maxRefresh = 2;
                string refreshUrl;

                while (HasRefreshMeta(response, out refreshUrl) && maxRefresh > 0)
                {
                    crawler     = new RuiJiCrawler();
                    request.Uri = new Uri(refreshUrl);
                    response    = crawler.Request(request);

                    maxRefresh--;
                }

                return(response);
            }
            else
            {
                var proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.CRAWLERPROXY);
                if (string.IsNullOrEmpty(proxyUrl))
                {
                    throw new Exception("no available crawler proxy servers");
                }

                proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

                if (!request.Session)
                {
                    request = (Request)request.Clone();
                }

                var elect = Elect(new CrawlerElectRequest
                {
                    ElectIp    = string.IsNullOrEmpty(request.Ip),
                    ElectProxy = request.Proxy is null,
                    Uri        = request.Uri
                });
Beispiel #11
0
        public WebApiServer(string baseUrl, string nodeType, string zkServer = "", string proxy = "")
        {
            BaseUrl  = IPHelper.FixLocalUrl(baseUrl);
            NodeType = nodeType;
            ZkServer = zkServer;
            Proxy    = proxy;

            Port = 80;
            if (BaseUrl.IndexOf(":") != -1)
            {
                Port = Convert.ToInt32(BaseUrl.Split(':')[1]);
            }
        }
Beispiel #12
0
        public void Start(string baseUrl)
        {
            baseUrl = IPHelper.FixLocalUrl(baseUrl);

            app = WebApp.Start <Startup>("http://" + baseUrl);

            Node = new StandAloneNode(baseUrl);

            Node.Start();

            FeedScheduler.Start(baseUrl, "", null);
            FeedExtractScheduler.Start(baseUrl);
        }
Beispiel #13
0
        public static string GetFeedJobs(string pages)
        {
            var proxyUrl = "";

            if (RuiJiConfiguration.Standalone)
            {
                proxyUrl = RuiJiConfiguration.RuiJiServer;
            }
            else
            {
                proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
            }

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("get feedjobs: proxyUrl can't be null");
            }

            if (string.IsNullOrEmpty(pages))
            {
                throw new Exception("get feedjobs: pages can't be null");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/fp/feed/page");

            restRequest.Method = Method.GET;
            restRequest.AddParameter("pages", pages);
            restRequest.Timeout = 15000;

            //string response = "";
            //var resetEvent = new ManualResetEvent(false);

            //var handle = client.ExecuteAsync(restRequest, (restResponse) => {
            //    response = restResponse.Content;
            //    resetEvent.Set();
            //});

            //resetEvent.WaitOne();

            var res = client.Execute(restRequest);

            return(res.Content);
        }
Beispiel #14
0
        public static List <ExtractFeatureBlock> GetExtractBlock(string url, bool useBlock = false)
        {
            var proxyUrl = "";

            if (RuiJiConfiguration.Standalone)
            {
                proxyUrl = RuiJiConfiguration.RuiJiServer;
            }
            else
            {
                proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
            }

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("no available Extractor proxy servers");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            proxyUrl = proxyUrl.Replace("118.31.61.230", "172.16.50.52");

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/fp/rule/match?url=" + url);

            restRequest.Method         = Method.GET;
            restRequest.JsonSerializer = new NewtonJsonSerializer();

            restRequest.Timeout = 15000;

            //List<ExtractFeatureBlock> response = null;
            //var resetEvent = new ManualResetEvent(false);

            //var handle = client.ExecuteAsync(restRequest, (restResponse) => {
            //    response = JsonConvert.DeserializeObject<List<ExtractFeatureBlock>>(restResponse.Content);
            //    resetEvent.Set();
            //});

            //resetEvent.WaitOne();

            var res = client.Execute(restRequest);

            return(JsonConvert.DeserializeObject <List <ExtractFeatureBlock> >(res.Content));
        }
Beispiel #15
0
        public static bool SaveContent(object content)
        {
            var proxyUrl = "";

            if (RuiJiConfiguration.Standalone)
            {
                proxyUrl = RuiJiConfiguration.RuiJiServer;
            }
            else
            {
                proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
            }

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("no available Extractor proxy servers");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/fp/content/save");

            restRequest.Method = Method.POST;
            restRequest.AddJsonBody(content);
            restRequest.Timeout = 15000;

            //bool response = false;
            //var resetEvent = new ManualResetEvent(false);

            //var handle = client.ExecuteAsync(restRequest, (restResponse) => {
            //    response = JsonConvert.DeserializeObject<bool>(restResponse.Content);
            //    resetEvent.Set();
            //});

            //resetEvent.WaitOne();

            var res = client.Execute(restRequest);

            return(JsonConvert.DeserializeObject <bool>(res.Content));
        }
Beispiel #16
0
        protected void LoadLiveProxy()
        {
            proxys.Clear();
            try
            {
                var nodes = zooKeeper.GetChildren("/live_nodes/proxy", new LiveProxyWatcher(this));

                foreach (var node in nodes)
                {
                    var d = GetData("/live_nodes/proxy/" + node);

                    proxys.Add(new LiveProxy
                    {
                        BaseUrl = IPHelper.FixLocalUrl(node),
                        Type    = LiveProxy.GetType(d)
                    });
                }
            }
            catch (Exception ex)
            {
            }
        }
Beispiel #17
0
        public static string GetRandomSettingUA()
        {
            var proxyUrl = "";

            if (RuiJiConfiguration.Standalone)
            {
                proxyUrl = RuiJiConfiguration.RuiJiServer;
            }
            else
            {
                proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
            }

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("get feedjobs: proxyUrl can't be null");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/setting/ua/random");

            restRequest.Method  = Method.GET;
            restRequest.Timeout = 15000;

            string response   = "";
            var    resetEvent = new ManualResetEvent(false);

            var handle = client.ExecuteAsync(restRequest, (restResponse) => {
                response = restResponse.Content;
                resetEvent.Set();
            });

            resetEvent.WaitOne();

            return(response);
        }
        protected void LoadLiveProxy()
        {
            proxys.Clear();
            try
            {
                var nodes = zooKeeper.getChildrenAsync("/live_nodes/proxy", new LiveProxyWatcher(this)).Result.Children;

                foreach (var node in nodes)
                {
                    var d = GetData("/live_nodes/proxy/" + node);

                    proxys.Add(new LiveProxy
                    {
                        BaseUrl = IPHelper.FixLocalUrl(node),
                        Type    = LiveProxy.GetType(d)
                    });
                }
            }
            catch (Exception ex)
            {
                Logger.GetLogger("").Error(ex.Message);
            }
        }
Beispiel #19
0
        public static List <ExtractFeatureBlock> GetExtractBlock(string url, bool useBlock = false)
        {
            var proxyUrl = "";

            if (NodeConfigurationSection.Standalone)
            {
                proxyUrl = ConfigurationManager.AppSettings["RuiJiServer"];
            }
            else
            {
                proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
            }

            if (string.IsNullOrEmpty(proxyUrl))
            {
                throw new Exception("no available Extractor proxy servers");
            }

            proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

            proxyUrl = proxyUrl.Replace("118.31.61.230", "172.16.50.52");

            var client      = new RestClient("http://" + proxyUrl);
            var restRequest = new RestRequest("api/fp/rule?url=" + url);

            restRequest.Method         = Method.GET;
            restRequest.JsonSerializer = new NewtonJsonSerializer();

            restRequest.Timeout = 15000;

            var restResponse = client.Execute(restRequest);

            var response = JsonConvert.DeserializeObject <List <ExtractFeatureBlock> >(restResponse.Content);

            return(response);
        }
Beispiel #20
0
        public static Response Request(Request request, bool usecp = false)
        {
            if (NodeConfigurationSection.Standalone)
            {
                var crawler  = new RuiJiCrawler();
                var response = crawler.Request(request);

                if (string.IsNullOrEmpty(request.Ip))
                {
                    var e = CrawlerServerManager.Instance.ElectIP(request.Uri);
                    if (e != null)
                    {
                        request.Ip = e.ClientIp;
                    }
                }

                var    maxRefresh = 2;
                string refreshUrl;

                while (HasRefreshMeta(response, out refreshUrl) && maxRefresh > 0)
                {
                    crawler     = new RuiJiCrawler();
                    request.Uri = new Uri(refreshUrl);
                    response    = crawler.Request(request);

                    maxRefresh--;
                }

                return(response);
            }
            else
            {
                var proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY);
                if (string.IsNullOrEmpty(proxyUrl))
                {
                    throw new Exception("no available crawler proxy servers");
                }

                proxyUrl = IPHelper.FixLocalUrl(proxyUrl);

                if (usecp)
                {
                    var client      = new RestClient("http://" + proxyUrl);
                    var restRequest = new RestRequest("api/cp/crawl");
                    restRequest.Method = Method.POST;
                    restRequest.AddJsonBody(request);
                    restRequest.Timeout = request.Timeout;

                    var restResponse = client.Execute(restRequest);

                    var response = JsonConvert.DeserializeObject <Response>(restResponse.Content);

                    return(response);
                }
                else
                {
                    var elect = Elect(new CrawlerElectRequest
                    {
                        ElectIp    = string.IsNullOrEmpty(request.Ip),
                        ElectProxy = request.Proxy is null,
                        Uri        = request.Uri
                    });