public static List <ExtractBlock> GetExtractBlock(string url) { var proxyUrl = ProxyManager.Instance.Elect(ProxyTypeEnum.Feed); if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available extracter proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/fp/rule?url=" + url); restRequest.Method = Method.GET; restRequest.JsonSerializer = new NewtonJsonSerializer(); restRequest.Timeout = 15000; var restResponse = client.Execute(restRequest); var response = JsonConvert.DeserializeObject <List <ExtractBlock> >(restResponse.Content); return(response); }
public static List <ExtractResult> Extract(ExtractRequest request) { if (NodeConfigurationSection.Standalone) { var result = RuiJiExtractor.Extract(request); return(result); } else { var proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available Extractor proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/ep/extract"); restRequest.Method = Method.POST; restRequest.JsonSerializer = new NewtonJsonSerializer(); var json = JsonConvert.SerializeObject(request); restRequest.AddJsonBody(json); restRequest.Timeout = 15000; var restResponse = client.Execute(restRequest); var response = JsonConvert.DeserializeObject <List <ExtractResult> >(restResponse.Content); return(response); } }
public static string GetRandomSettingUA() { var proxyUrl = ""; if (NodeConfigurationSection.Standalone) { proxyUrl = ConfigurationManager.AppSettings["RuiJiServer"]; } else { proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); } if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("get feedjobs: proxyUrl can't be null"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/setting/ua/random"); restRequest.Method = Method.GET; restRequest.Timeout = 15000; var restResponse = client.Execute(restRequest); return(restResponse.Content); }
public NodeBase(string baseUrl, string zkServer, string proxyUrl = "") { this.BaseUrl = IPHelper.FixLocalUrl(baseUrl); this.ZkServer = IPHelper.FixLocalUrl(zkServer); this.ProxyUrl = IPHelper.FixLocalUrl(proxyUrl); this.StartTime = DateTime.Now; }
public static bool SaveContent(object content) { var proxyUrl = ""; if (NodeConfigurationSection.Standalone) { proxyUrl = ConfigurationManager.AppSettings["RuiJiServer"]; } else { proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); } if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available Extractor proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/fp/content/save"); restRequest.Method = Method.POST; restRequest.AddJsonBody(content); restRequest.Timeout = 15000; var restResponse = client.Execute(restRequest); var response = JsonConvert.DeserializeObject <bool>(restResponse.Content); return(response); }
public void Start(string baseUrl, string nodeType, string zkServer, string proxy = "") { Running = true; this.Port = baseUrl.Split(':')[1]; this.baseUrl = baseUrl; this.nodeType = nodeType; this.zkServer = zkServer; this.proxy = proxy; baseUrl = IPHelper.FixLocalUrl(baseUrl); app = WebApp.Start <Startup>("http://" + baseUrl); switch (nodeType) { case "c": { Node = new CrawlerNode(baseUrl, zkServer, proxy); break; } case "cp": { Node = new CrawlerProxyNode(baseUrl, zkServer); break; } case "e": { Node = new ExtractorNode(baseUrl, zkServer, proxy); break; } case "ep": { Node = new ExtractorProxyNode(baseUrl, zkServer); break; } case "f": { Node = new FeedNode(baseUrl, zkServer, proxy); break; } case "fp": { Node = new FeedProxyNode(baseUrl, zkServer); break; } } Node.Start(); resetEvent = new ManualResetEvent(false); resetEvent.WaitOne(); }
public DocumentServer(string baseUrl) { BaseUrl = IPHelper.FixLocalUrl(baseUrl); Port = 80; if (BaseUrl.IndexOf(":") != -1) { Port = Convert.ToInt32(BaseUrl.Split(':')[1]); } }
public static void StartDocServer() { var baseUrl = ConfigurationManager.AppSettings["DocServer"]; if (!string.IsNullOrEmpty(baseUrl)) { baseUrl = IPHelper.FixLocalUrl(baseUrl); var app = WebApp.Start <DStartup>("http://" + baseUrl); } }
public void StartStandalone(string baseUrl) { baseUrl = IPHelper.FixLocalUrl(baseUrl); app = WebApp.Start <Startup>("http://" + baseUrl); Node = new StandaloneNode(baseUrl); Node.Start(); }
public static Response Request(Request request) { if (RuiJiConfiguration.Standalone) { if (string.IsNullOrEmpty(request.Ip)) { var e = CrawlerServerManager.Instance.ElectIP(request.Uri); if (e != null) { request.Ip = e.ClientIp; } else { request.Ip = IPHelper.GetDefaultIPAddress().ToString(); } } var crawler = new RuiJiCrawler(); var response = crawler.Request(request); var maxRefresh = 2; string refreshUrl; while (HasRefreshMeta(response, out refreshUrl) && maxRefresh > 0) { crawler = new RuiJiCrawler(); request.Uri = new Uri(refreshUrl); response = crawler.Request(request); maxRefresh--; } return(response); } else { var proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.CRAWLERPROXY); if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available crawler proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); if (!request.Session) { request = (Request)request.Clone(); } var elect = Elect(new CrawlerElectRequest { ElectIp = string.IsNullOrEmpty(request.Ip), ElectProxy = request.Proxy is null, Uri = request.Uri });
public WebApiServer(string baseUrl, string nodeType, string zkServer = "", string proxy = "") { BaseUrl = IPHelper.FixLocalUrl(baseUrl); NodeType = nodeType; ZkServer = zkServer; Proxy = proxy; Port = 80; if (BaseUrl.IndexOf(":") != -1) { Port = Convert.ToInt32(BaseUrl.Split(':')[1]); } }
public void Start(string baseUrl) { baseUrl = IPHelper.FixLocalUrl(baseUrl); app = WebApp.Start <Startup>("http://" + baseUrl); Node = new StandAloneNode(baseUrl); Node.Start(); FeedScheduler.Start(baseUrl, "", null); FeedExtractScheduler.Start(baseUrl); }
public static string GetFeedJobs(string pages) { var proxyUrl = ""; if (RuiJiConfiguration.Standalone) { proxyUrl = RuiJiConfiguration.RuiJiServer; } else { proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); } if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("get feedjobs: proxyUrl can't be null"); } if (string.IsNullOrEmpty(pages)) { throw new Exception("get feedjobs: pages can't be null"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/fp/feed/page"); restRequest.Method = Method.GET; restRequest.AddParameter("pages", pages); restRequest.Timeout = 15000; //string response = ""; //var resetEvent = new ManualResetEvent(false); //var handle = client.ExecuteAsync(restRequest, (restResponse) => { // response = restResponse.Content; // resetEvent.Set(); //}); //resetEvent.WaitOne(); var res = client.Execute(restRequest); return(res.Content); }
public static List <ExtractFeatureBlock> GetExtractBlock(string url, bool useBlock = false) { var proxyUrl = ""; if (RuiJiConfiguration.Standalone) { proxyUrl = RuiJiConfiguration.RuiJiServer; } else { proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); } if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available Extractor proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); proxyUrl = proxyUrl.Replace("118.31.61.230", "172.16.50.52"); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/fp/rule/match?url=" + url); restRequest.Method = Method.GET; restRequest.JsonSerializer = new NewtonJsonSerializer(); restRequest.Timeout = 15000; //List<ExtractFeatureBlock> response = null; //var resetEvent = new ManualResetEvent(false); //var handle = client.ExecuteAsync(restRequest, (restResponse) => { // response = JsonConvert.DeserializeObject<List<ExtractFeatureBlock>>(restResponse.Content); // resetEvent.Set(); //}); //resetEvent.WaitOne(); var res = client.Execute(restRequest); return(JsonConvert.DeserializeObject <List <ExtractFeatureBlock> >(res.Content)); }
public static bool SaveContent(object content) { var proxyUrl = ""; if (RuiJiConfiguration.Standalone) { proxyUrl = RuiJiConfiguration.RuiJiServer; } else { proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); } if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available Extractor proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/fp/content/save"); restRequest.Method = Method.POST; restRequest.AddJsonBody(content); restRequest.Timeout = 15000; //bool response = false; //var resetEvent = new ManualResetEvent(false); //var handle = client.ExecuteAsync(restRequest, (restResponse) => { // response = JsonConvert.DeserializeObject<bool>(restResponse.Content); // resetEvent.Set(); //}); //resetEvent.WaitOne(); var res = client.Execute(restRequest); return(JsonConvert.DeserializeObject <bool>(res.Content)); }
protected void LoadLiveProxy() { proxys.Clear(); try { var nodes = zooKeeper.GetChildren("/live_nodes/proxy", new LiveProxyWatcher(this)); foreach (var node in nodes) { var d = GetData("/live_nodes/proxy/" + node); proxys.Add(new LiveProxy { BaseUrl = IPHelper.FixLocalUrl(node), Type = LiveProxy.GetType(d) }); } } catch (Exception ex) { } }
public static string GetRandomSettingUA() { var proxyUrl = ""; if (RuiJiConfiguration.Standalone) { proxyUrl = RuiJiConfiguration.RuiJiServer; } else { proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); } if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("get feedjobs: proxyUrl can't be null"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/setting/ua/random"); restRequest.Method = Method.GET; restRequest.Timeout = 15000; string response = ""; var resetEvent = new ManualResetEvent(false); var handle = client.ExecuteAsync(restRequest, (restResponse) => { response = restResponse.Content; resetEvent.Set(); }); resetEvent.WaitOne(); return(response); }
protected void LoadLiveProxy() { proxys.Clear(); try { var nodes = zooKeeper.getChildrenAsync("/live_nodes/proxy", new LiveProxyWatcher(this)).Result.Children; foreach (var node in nodes) { var d = GetData("/live_nodes/proxy/" + node); proxys.Add(new LiveProxy { BaseUrl = IPHelper.FixLocalUrl(node), Type = LiveProxy.GetType(d) }); } } catch (Exception ex) { Logger.GetLogger("").Error(ex.Message); } }
public static List <ExtractFeatureBlock> GetExtractBlock(string url, bool useBlock = false) { var proxyUrl = ""; if (NodeConfigurationSection.Standalone) { proxyUrl = ConfigurationManager.AppSettings["RuiJiServer"]; } else { proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); } if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available Extractor proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); proxyUrl = proxyUrl.Replace("118.31.61.230", "172.16.50.52"); var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/fp/rule?url=" + url); restRequest.Method = Method.GET; restRequest.JsonSerializer = new NewtonJsonSerializer(); restRequest.Timeout = 15000; var restResponse = client.Execute(restRequest); var response = JsonConvert.DeserializeObject <List <ExtractFeatureBlock> >(restResponse.Content); return(response); }
public static Response Request(Request request, bool usecp = false) { if (NodeConfigurationSection.Standalone) { var crawler = new RuiJiCrawler(); var response = crawler.Request(request); if (string.IsNullOrEmpty(request.Ip)) { var e = CrawlerServerManager.Instance.ElectIP(request.Uri); if (e != null) { request.Ip = e.ClientIp; } } var maxRefresh = 2; string refreshUrl; while (HasRefreshMeta(response, out refreshUrl) && maxRefresh > 0) { crawler = new RuiJiCrawler(); request.Uri = new Uri(refreshUrl); response = crawler.Request(request); maxRefresh--; } return(response); } else { var proxyUrl = ProxyManager.Instance.Elect(NodeProxyTypeEnum.FEEDPROXY); if (string.IsNullOrEmpty(proxyUrl)) { throw new Exception("no available crawler proxy servers"); } proxyUrl = IPHelper.FixLocalUrl(proxyUrl); if (usecp) { var client = new RestClient("http://" + proxyUrl); var restRequest = new RestRequest("api/cp/crawl"); restRequest.Method = Method.POST; restRequest.AddJsonBody(request); restRequest.Timeout = request.Timeout; var restResponse = client.Execute(restRequest); var response = JsonConvert.DeserializeObject <Response>(restResponse.Content); return(response); } else { var elect = Elect(new CrawlerElectRequest { ElectIp = string.IsNullOrEmpty(request.Ip), ElectProxy = request.Proxy is null, Uri = request.Uri });