public override PhaseResult Run(Context context) { GeckoWebBrowser browser = (GeckoWebBrowser)context.GetService(typeof(GeckoWebBrowser)); Debug.Assert(browser != null, "browser is null"); String regex = context.Resolve(RegularExpression); Debug.Assert(String.IsNullOrWhiteSpace(regex), "regular expression is null!"); String url = browser.Document.Uri; Match match = Regex.Match(url, regex); PhaseResult pr = new PhaseResult(this); if (match.Success) { // 表达式中所有的Group的id名 List<String> ids = RegexHelper.ParseGroupIndexNames(regex); // 按照Group的名字写入到ParameterProvider foreach (String id in ids) { context.ParameterProvider.SetString(id, match.Groups[id].Value); } pr.Succeed = true; pr.SetInt(Constant.RVCount, ids.Count); } else { pr.Succeed = false; } return pr; }
public override PhaseResult Run(Context context) { PhaseResult pr = new PhaseResult(this); pr.ListResult = List; return pr; }
public PhaseResult Run(Context context) { List<String> bind = null; if (!String.IsNullOrWhiteSpace(Binding)) { if (context.JsonResults.ContainsKey(Binding)) { bind = context.JsonResults[Binding]; } else { bind = new List<string>(); } } else { PhaseResult last = context.Stack.LastOrDefault(); if (last != null) { bind = last.ListResult; } } PhaseResult pr = new PhaseResult(this); pr.ListResult = new List<string>(); pr.Succeed = true; if (bind == null) { pr.Succeed = false; } else { // 将Json中PDF的Url下载并替换成文字 Int32 downloadSucc = 0; foreach (String json in bind) { Dictionary<String, String> dict = JsonConvert.DeserializeObject<Dictionary<String, String>>(json); Boolean succ = false; if (dict.ContainsKey(UrlTagName)) { String url = dict[UrlTagName]; succ = DownloadHelper.DownloadFile(url, Directory); if (!succ) { succ = DownloadHelper.DownloadFile(url, Directory); } } downloadSucc += succ ? 1 : 0; } pr.SetInt("download", downloadSucc); pr.SetInt("total", bind.Count); } return pr; }
public PhaseResult Run(Context context) { Account account = new Account(); account.UserName = context.Resolve(Username); account.Password = context.Resolve(Password); context.Account = account; PhaseResult pr = new PhaseResult(this); pr.Succeed = true; return pr; }
public override PhaseResult Run(Context context) { GeckoWebBrowser browser = (GeckoWebBrowser)context.GetService(typeof(GeckoWebBrowser)); Debug.Assert(browser != null, "browser is null"); Locator.Locator = context.Resolve(Locator.Locator); Boolean succ = RequestHelper.OperateBrowserClick(browser, Locator); PhaseResult pr = new PhaseResult(this); pr.Succeed = succ; context.LastRequestContent = RequestHelper.GetGeckoContent(browser) ?? String.Empty; return pr; }
public override PhaseResult Run(Context context) { PhaseResult pr = new PhaseResult(this); GeckoWebBrowser browser = (GeckoWebBrowser)context.GetService(typeof(GeckoWebBrowser)); Debug.Assert(browser != null, "browser is null"); if (WaitMilliseconds > 0) { // 等待一定的毫秒数 Thread.Sleep(WaitMilliseconds); } else { String urlRegex = context.Resolve(UrlRegex); String contentRegex = context.Resolve(ContentRegex); // TODO::加上Timeout!!!返回结果只有为Timeout的时候才算失败 // 先等待Url的Pattern if (!String.IsNullOrWhiteSpace(urlRegex)) { while (browser.Document.Uri == null || !Regex.IsMatch(browser.Document.Uri, urlRegex)) { Thread.Sleep(200); } } // 再等待Content的Pattern if (!String.IsNullOrWhiteSpace(contentRegex)) { while (!Regex.IsMatch(RequestHelper.GetGeckoContent(browser), contentRegex)) { Thread.Sleep(200); } } } string content = RequestHelper.GetGeckoContent(browser); context.LastRequestContent = content ?? String.Empty; pr.SetString(Constant.RVHttpRequestResult, content); pr.Succeed = true; return pr; }
/// <summary> /// /// </summary> /// <param name="context"></param> /// <returns>Succeed表示是否成功,ListResult表示抓到的Json数据</returns> public override PhaseResult Run(Context context) { Initialize(context); if (String.IsNullOrWhiteSpace(context.LastRequestContent)) { } String content = context.LastRequestContent; List<String> jsonResult = new List<String>(); //======================================================================= // 用正则表达式 jsonResult.AddRange(ParseByRegex(content)); //======================================================================= // 用XPath抓取 jsonResult.AddRange(ParseByXPath(content)); PhaseResult pr = new PhaseResult(this); pr.Succeed = true; pr.ListResult = jsonResult; if (Save) { context.JsonResult.AddRange(jsonResult); } if (!String.IsNullOrWhiteSpace(ListID)) { if (context.JsonResults.ContainsKey(ListID)) { context.JsonResults[ListID] = pr.ListResult; } else { context.JsonResults.Add(ListID, pr.ListResult); } } return pr; }
public PhaseResult Run(Context context) { Boolean succ = true; foreach (IPhase phase in _logoutPhases) { PhaseResult pr = phase.Run(context); context.PushResult(pr); if (!pr.Succeed) { succ = false; break; } } PhaseResult result = new PhaseResult(this); result.Succeed = succ; return result; }
public PhaseResult RunAsNestedList(Context context) { PhaseResult list = MetaList.Run(context); PhaseResult pr = new PhaseResult(this); List<String> jsonResult = new List<string>(); if (list.ListResult != null) { GeckoWebBrowser browser = (GeckoWebBrowser)context.GetService(typeof(GeckoWebBrowser)); foreach (String item in list.ListResult) { Dictionary<String, String> itemDict = JsonConvert.DeserializeObject<Dictionary<String, String>>(item); if (itemDict.ContainsKey(MetaListUrlKey)) { String url = itemDict[MetaListUrlKey]; if (!url.StartsWith("http")) { url = String.Format("{0}/{1}", "http://club.autohome.com.cn", url.TrimStart('/')); } String content = String.Empty; if (browser != null) { content = RequestHelper.BrowserGet(browser, url); } else { content = RequestHelper.Get(url); } context.LastRequestContent = content; PhaseResult parseResult = Parse.Run(context); context.PushResult(parseResult); jsonResult.AddRange(parseResult.ListResult ?? new List<String>()); } } } pr.ListResult = jsonResult; pr.Succeed = true; return pr; }
public PhaseResult Run(Context context) { List<String> bind = null; if (!String.IsNullOrWhiteSpace(Binding)) { if (context.JsonResults.ContainsKey(Binding)) { bind = context.JsonResults[Binding]; } else { bind = new List<string>(); } } else { PhaseResult last = context.Stack.LastOrDefault(); if (last != null) { bind = last.ListResult; } } PhaseResult pr = new PhaseResult(this); pr.ListResult = new List<string>(); pr.Succeed = true; if (bind == null) { pr.Succeed = false; } else { // 将Json中PDF的Url下载并替换成文字 foreach (String json in bind) { Dictionary<String, String> dict = JsonConvert.DeserializeObject<Dictionary<String, String>>(json); if (dict.ContainsKey(FileUrlTagName)) { String url = dict[FileUrlTagName]; String content = null; switch (Type) { case DocumentType.PDF: content = PdfParser.Extract(url, Directory); if (String.IsNullOrWhiteSpace(content)) { content = PdfParser.Extract(url, Directory); } break; case DocumentType.WORD: content = WordParser.Extract(url, Directory); if (String.IsNullOrWhiteSpace(content)) { content = WordParser.Extract(url, Directory); } break; case DocumentType.EXCEL: content = ExcelParser.Extract(url, Directory); if (String.IsNullOrWhiteSpace(content)) { content = ExcelParser.Extract(url, Directory); } break; } dict[FileUrlTagName] = content.Replace(Environment.NewLine, String.Empty); pr.ListResult.Add(JsonConvert.SerializeObject(dict)); } } // 将结果保存 if (Save) { context.JsonResult.AddRange(pr.ListResult); } // 保存ListID等会可以在其他Phase中作为绑定对象 if (!String.IsNullOrWhiteSpace(ListID)) { if (context.JsonResults.ContainsKey(ListID)) { context.JsonResults[ListID] = pr.ListResult; } else { context.JsonResults.Add(ListID, pr.ListResult); } } } return pr; }
public override PhaseResult Run(Context context) { //PhaseResult last = context.Stack.LastOrDefault(); List<String> bind = null; if (!String.IsNullOrWhiteSpace(Binding)) { if (context.JsonResults.ContainsKey(Binding)) { bind = context.JsonResults[Binding]; } else { bind = new List<string>(); } } else { PhaseResult last = context.Stack.LastOrDefault(); if (last != null) { bind = last.ListResult; } } PhaseResult pr = new PhaseResult(this); pr.ListResult = new List<string>(); if (bind != null) { foreach (String json in bind) { try { IDataProvider provider = ParameterResolver.ParseProvider(json); context.RuntimeProviders.Push(provider); pr.ListResult.AddRange(GenerateOneBatch(context)); } finally { context.RuntimeProviders.Pop(); } } } else { pr.ListResult.AddRange(GenerateOneBatch(context)); } if (!String.IsNullOrWhiteSpace(ListID)) { if (context.JsonResults.ContainsKey(ListID)) { context.JsonResults[ListID] = pr.ListResult; } else { context.JsonResults.Add(ListID, pr.ListResult); } } return pr; }
public PhaseResult Run(Context context) { //PhaseResult last = context.Stack.LastOrDefault(); List<String> bind = null; if (!String.IsNullOrWhiteSpace(Binding)) { if (context.JsonResults.ContainsKey(Binding)) { bind = context.JsonResults[Binding]; } else { bind = new List<string>(); } } else { PhaseResult last = context.Stack.LastOrDefault(); if (last != null) { bind = last.ListResult; } } PhaseResult pr = new PhaseResult(this); pr.ListResult = new List<string>(); if (bind != null) { foreach (String json in bind) { //IDataProvider provider = new BasicDataProvider(); // TODO:使用动态绑定来获取url // provider.SetString("url", json); try { IDataProvider provider = BasicDataProvider.CreateFromJson(json); context.RuntimeProviders.Push(provider); PhaseResult result = Request.Run(context); context.PushResult(result); result = Parse.Run(context); context.PushResult(result); if (result.ListResult != null) { pr.ListResult.AddRange(result.ListResult); } } finally { context.RuntimeProviders.Pop(); } } } else { PhaseResult result = Request.Run(context); context.PushResult(result); result = Parse.Run(context); context.PushResult(result); if (result.ListResult != null) { pr.ListResult.AddRange(result.ListResult); } } if (!String.IsNullOrWhiteSpace(ListID)) { if (context.JsonResults.ContainsKey(ListID)) { context.JsonResults[ListID] = pr.ListResult; } else { context.JsonResults.Add(ListID, pr.ListResult); } } return pr; }
public PhaseResult RunAsIteration(Context context) { Initialize(context); Boundary bound = _step > 0 ? new Boundary(LessOrEqual) : new Boundary(GreaterOrEqual); List<String> jsonResult = new List<string>(); for (int i = _from; bound(i, _to); i += _step) { _updatableRequest.Update(context, i); PhaseResult pr = Request.Run(context); context.PushResult(pr); // 检查是否可以更新_to if (!_toPageInitialized) { Match match = Regex.Match(context.LastRequestContent, To); String pageStr = match.Groups[1].Value; Int32 page; if (Int32.TryParse(pageStr, out page)) { _to = page; _toPageInitialized = true; } } pr = Parse.Run(context); context.PushResult(pr); if (pr.ListResult != null) { jsonResult.AddRange(pr.ListResult); } } PhaseResult result = new PhaseResult(this); result.Succeed = true; result.ListResult = jsonResult; context.PushResult(result); return result; }
/// <summary> /// 将一次流程的结果放入堆栈 /// </summary> /// <param name="result"></param> public void PushResult(PhaseResult result) { Stack.Add(result); }