private void GetOneKeyWordsRelatedInfos(Dictionary <string, string> seedRow) { //下一步必须执行 bool isNewDo = false; string localLogFileName = null; string keyWords = this.GetKeyWords(seedRow); List <string> allListPageUrls = null; localLogFileName = "_" + this.GetLoginName(seedRow) + "_" + this.GetKeyWords(seedRow) + "_listPageUrl"; if (SysConfig.SysExecuteType == SysExecuteType.Produce) { //如果是生产环境,那么直接爬取列表页 allListPageUrls = this.GetAllListPages(this.GetSeedPageUrl(seedRow), keyWords); this.RunPage.SaveInfoToMiddleFile(localLogFileName, "listPageUrl", allListPageUrls); isNewDo = true; } else { //读取历史爬取的列表页地址文件 allListPageUrls = this.RunPage.TryGetInfoFromMiddleFile(localLogFileName, "listPageUrl"); if (allListPageUrls == null) { allListPageUrls = this.GetAllListPages(this.GetSeedPageUrl(seedRow), keyWords); this.RunPage.SaveInfoToMiddleFile(localLogFileName, "listPageUrl", allListPageUrls); isNewDo = true; } } List <Dictionary <string, string> > allPersonPageUrlInfos = null; localLogFileName = "_" + this.GetLoginName(seedRow) + "_" + this.GetKeyWords(seedRow) + "_personPageUrlInfo"; if (SysConfig.SysExecuteType == SysExecuteType.Produce || isNewDo) { //如果是生产环境,那么直接解析列表页 allPersonPageUrlInfos = this.GetPersonPageUrlsFromListPages(this.RunPage.GetDetailSourceFileDir(), allListPageUrls); this.RunPage.SaveInfoToMiddleFile(localLogFileName, new string[] { "personUrl", "personName" }, allPersonPageUrlInfos); } else { //读取历史解析获得的个人网页地址 allPersonPageUrlInfos = this.RunPage.TryGetInfoFromMiddleFile(localLogFileName, new string[] { "personUrl", "personName" }); if (allPersonPageUrlInfos == null) { allPersonPageUrlInfos = this.GetPersonPageUrlsFromListPages(this.RunPage.GetDetailSourceFileDir(), allListPageUrls); this.RunPage.SaveInfoToMiddleFile(localLogFileName, new string[] { "personUrl", "personName" }, allPersonPageUrlInfos); isNewDo = true; } } LoginLinkedin.LoginByRandomUser(this.RunPage, this.LinkedinLoginPageUrl, this.LinkedinLoginSucceedCheckUrl); List <Dictionary <string, string> > allPersonPageInfosWithJustDownloadMark = null; localLogFileName = "_" + this.GetLoginName(seedRow) + "_" + this.GetKeyWords(seedRow) + "_personPageUrl"; if (SysConfig.SysExecuteType == SysExecuteType.Produce || isNewDo) { //如果是生产环境,那么直接爬取个人详情页 allPersonPageInfosWithJustDownloadMark = ProcessPersonPage.GetAllPersonPages(this.RunPage, allPersonPageUrlInfos, this.GetLoginName(seedRow), this.GetLoginPassword(seedRow)); this.RunPage.SaveInfoToMiddleFile(localLogFileName, new string[] { "personUrl", "personUrl", "isJustDownload" }, allPersonPageInfosWithJustDownloadMark); isNewDo = true; } else { //读取历史生成的个人网页网址 allPersonPageInfosWithJustDownloadMark = this.RunPage.TryGetInfoFromMiddleFile(this.GetLoginName(seedRow) + "." + this.GetKeyWords(seedRow) + ".personPageUrl", new string[] { "personUrl", "personUrl", "isJustDownload" }); if (allPersonPageInfosWithJustDownloadMark == null) { allPersonPageInfosWithJustDownloadMark = ProcessPersonPage.GetAllPersonPages(this.RunPage, allPersonPageUrlInfos, this.GetLoginName(seedRow), this.GetLoginPassword(seedRow)); this.RunPage.SaveInfoToMiddleFile(localLogFileName, new string[] { "personUrl", "personUrl", "isJustDownload" }, allPersonPageInfosWithJustDownloadMark); isNewDo = true; } } List <Dictionary <string, string> > personInfoList = ProcessPersonPage.GetPersonInfoFromLocalPages(this.RunPage, allPersonPageInfosWithJustDownloadMark, true, this.GetKeyWords(seedRow)); string personInfosResultFilePath = this.RunPage.GetFilePath("SearchResult_Google2Linkedin_" + this.GetLoginName(seedRow) + "_" + this.GetKeyWords(seedRow) + ".xlsx", this.RunPage.GetExportDir()); ProcessPersonPage.SavePersonInfoToFile(this.RunPage, personInfoList, personInfosResultFilePath); }
public override void GetDataByOtherAccessType(Dictionary <string, string> listRow) { string keyWords = listRow["keyWords"]; string loginName = listRow["loginName"]; string loginPassword = listRow["loginPassword"]; string fileName = "_" + keyWords + "_" + loginName; List <Dictionary <string, string> > personInfos = this.RunPage.TryGetInfoFromMiddleFile(fileName, new string[] { "personName", "personUrl" }); if (personInfos == null) { this.RunPage.InvokeAppendLogText("开始使用APP搜索相关人员, 关键词为'" + keyWords + "'", LogLevelType.System, true); AndroidAppAccess appAccess = null; try { this.RunPage.InvokeAppendLogText("开始连接手机APP", LogLevelType.System, true); appAccess = this.InitAppAccess(); this.RunPage.InvokeAppendLogText("连接手机APP成功", LogLevelType.System, true); bool needLogout = false; try { this.RunPage.InvokeAppendLogText("检测是否需要登出", LogLevelType.System, true); needLogout = this.CheckNeedLogout(appAccess); } catch (Exception ex) { throw new Exception("检查是否需要登出出错.", ex); } if (needLogout) { try { this.RunPage.InvokeAppendLogText("开始登出", LogLevelType.System, true); this.DoLogout(appAccess); this.RunPage.InvokeAppendLogText("已经登出", LogLevelType.System, true); } catch (Exception ex) { throw new Exception("登出失败.", ex); } } try { this.RunPage.InvokeAppendLogText("开始登录", LogLevelType.System, true); this.DoLogin(appAccess, loginName, loginPassword); this.RunPage.InvokeAppendLogText("已经登录", LogLevelType.System, true); } catch (Exception ex) { throw new Exception("登录失败.", ex); } try { personInfos = this.SearchPersonByKeyWord(appAccess, keyWords); } catch (Exception ex) { throw new Exception("在app中根据关键字抓取出错, keyWords = " + keyWords); } this.RunPage.SaveInfoToMiddleFile(fileName, new string[] { "personName", "personUrl" }, personInfos); this.RunPage.InvokeAppendLogText("完成使用手机APP搜索关键词'" + keyWords + "", LogLevelType.System, true); } catch (Exception ex) { throw ex; } finally { if (appAccess != null) { this.CloseAppAccess(appAccess); } } } try { string checkedRelatedPersonInfosResultFilePath = this.RunPage.GetFilePath("_SearchResult_LinkedinApp_" + loginName + "_" + keyWords + ".xlsx", this.RunPage.GetExportDir()); if (!File.Exists(checkedRelatedPersonInfosResultFilePath)) { this.RunPage.InvokeAppendLogText("登录Linkedin系统", LogLevelType.System, true); LoginLinkedin.LoginByRandomUser(this.RunPage, this.LinkedinLoginPageUrl, this.LinkedinLoginSucceedCheckUrl); this.RunPage.InvokeAppendLogText("已登录Linkedin系统", LogLevelType.System, true); this.RunPage.InvokeAppendLogText("根据个人页面地址(人员列表是从手机APP搜索到的,关键词为'" + keyWords + "'),从Linkedin网页版获取个人信息", LogLevelType.System, true); string personPageInfosWithJustDownloadMarkFileName = "_" + loginName + "_" + keyWords + "_personPageInfosWithJustDownloadMark"; //读取历史生成的个人网页网址 List <Dictionary <string, string> > allPersonPageInfosWithJustDownloadMark = this.RunPage.TryGetInfoFromMiddleFile(personPageInfosWithJustDownloadMarkFileName, new string[] { "personUrl", "personName", "isJustDownload" }); if (allPersonPageInfosWithJustDownloadMark == null) { allPersonPageInfosWithJustDownloadMark = ProcessPersonPage.GetAllPersonPages(this.RunPage, personInfos, loginName, loginPassword); this.RunPage.SaveInfoToMiddleFile(personPageInfosWithJustDownloadMarkFileName, new string[] { "personUrl", "personName", "isJustDownload" }, allPersonPageInfosWithJustDownloadMark); } string personInfosFilePath = this.RunPage.GetFilePath("_SearchResult_LinkedinApp_" + loginName + "_" + keyWords + ".xlsx", this.RunPage.GetExportDir()); if (!File.Exists(personInfosFilePath)) { List <Dictionary <string, string> > personInfoList = ProcessPersonPage.GetPersonInfoFromLocalPages(this.RunPage, allPersonPageInfosWithJustDownloadMark, false, null); ProcessPersonPage.SavePersonInfoToFile(this.RunPage, personInfoList, personInfosFilePath); } this.RunPage.InvokeAppendLogText("完成获取并处理个人页面(人员列表是从手机APP搜索到的,关键词为'" + keyWords + "')", LogLevelType.System, true); /* * this.RunPage.InvokeAppendLogText("从已爬取到的页面中,找到'看过本页的会员还看了'栏目里的个人信息,递归获取", LogLevelType.System, true); * string checkedRelatedPersonIdsFileName = "_" + keyWords + "_" + loginName + "_CheckedRelated"; * List<Dictionary<string, string>> allCheckedRelatedIds = this.RunPage.TryGetInfoFromMiddleFile(checkedRelatedPersonIdsFileName, new string[] { "checkedRelatedPersonInfoId", "levelCount" }); * if (allCheckedRelatedIds == null) * { * allCheckedRelatedIds = new List<Dictionary<string, string>>(); * Dictionary<string, string> allRelatedPersonUrlInfos = new Dictionary<string, string>(); * ProcessPersonPage.GetRelatedPersonInfos(this.RunPage, allPersonPageInfosWithJustDownloadMark, allRelatedPersonUrlInfos, allCheckedRelatedIds, keyWords); * this.RunPage.SaveInfoToMiddleFile(checkedRelatedPersonIdsFileName, new string[] { "checkedRelatedPersonInfoId", "levelCount" }, allCheckedRelatedIds); * } * * this.RunPage.InvokeAppendLogText("从已爬取到的页面中,找到'看过本页的会员还看了'栏目里的个人信息", LogLevelType.System, true); * string checkedRelatedPersonUrlsFileName = "_" + loginName + "_" + keyWords + "_personUrl_CheckedRelated"; * List<Dictionary<string, string>> allCheckAllCheckedRelatedUrls = this.RunPage.TryGetInfoFromMiddleFile(checkedRelatedPersonUrlsFileName, new string[] { "personUrl", "personName" }); * if (allCheckAllCheckedRelatedUrls == null) * { * allCheckAllCheckedRelatedUrls = ProcessPersonPage.GetAllPersonPages(this.RunPage, allCheckedRelatedIds); * } * * List<string> checkRelatedPersonPageIds = new List<string>(); * foreach (Dictionary<string, string> checkedRelatedId in allCheckedRelatedIds) * { * string checkedRelatedPersonInfoId = checkedRelatedId["checkedRelatedPersonInfoId"]; * checkRelatedPersonPageIds.Add(checkedRelatedPersonInfoId); * } * List<Dictionary<string, string>> relatedPersonInfoList = ProcessPersonPage.GetPersonInfoFromLocalPages(this.RunPage, allCheckAllCheckedRelatedUrls, false, null); * ProcessPersonPage.SavePersonInfoToFile(this.RunPage, relatedPersonInfoList, checkedRelatedPersonInfosResultFilePath); */ this.RunPage.InvokeAppendLogText("完成递归爬取到关键词'" + keyWords + "'相关的所有的'看过本页的会员还看了'", LogLevelType.System, true); } } catch (Exception ex) { throw new Exception("通过网页获取/处理个人信息出错", ex); } }