public string Execute() { using var ws = new ExcelWriter(); Helper.ApplyDefaultStyling(ws, "Transactions", $"Charges for Sample Place"); ws.AddRow($"Since: {DateTime.Today.AddMonths(-1):yyyy-MM-dd}"); ws.AddRow(); var headerRow = ws.AddHeaderRow() .AddHeader("Customer Id", 12) .AddHeader("First name", 30) .AddHeader("Last name", 30) .AddHeader("DOB", 30) .AddHeader("Trans. Date", 15) .AddHeader("Description", 90) .AddHeader("Amount", 15); var row = ws.AddRow() .Add(123456, "@") .Add("Firstname", "@") .Add("Lastname", "@") .Add(new DateTime(1976, 8, 22), Helper.FormatDate, style: s => s.HorizontalAlignment = ExcelHorizontalAlignment.Left) .Add(DateTime.Today, Helper.FormatDate, style: s => s.HorizontalAlignment = ExcelHorizontalAlignment.Left) .Add("Sample Charge", "@") .Add(123.45, Helper.FormatMoney); Helper.ApplyDefaultReportSettings(ws, DateTime.Now, eOrientation.Landscape); string filename = ws.SaveCloseAndGetFileName(); return(filename); }
private void GetWordPageUrls(IListSheet listSheet) { string sourceDir = this.RunPage.GetDetailSourceFileDir(); ExcelWriter resultEW = this.CreateResultWriter(); Dictionary <string, bool> pageDic = new Dictionary <string, bool>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]); if (giveUp) { string pageUrl = listRow["detailPageUrl"]; if (!pageDic.ContainsKey(pageUrl)) { pageDic.Add(pageUrl, true); Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("detailPageUrl", listRow["detailPageUrl"]); resultRow.Add("detailPageName", listRow["detailPageName"]); resultRow.Add("name", listRow["name"]); resultRow.Add("pageType", listRow["pageType"]); resultEW.AddRow(resultRow); } } else { HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//ul[@class=\"ulLi120 fsc16\"]/li/a"); foreach (HtmlNode linkNode in linkNodes) { string linkUrl = linkNode.GetAttributeValue("href", ""); string fullLinkUrl = "http://www.yitang.org" + linkUrl; if (!pageDic.ContainsKey(fullLinkUrl)) { string linkName = CommonUtil.HtmlDecode(linkNode.InnerText).Trim(); Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("detailPageUrl", fullLinkUrl); resultRow.Add("detailPageName", linkName + "_word"); resultRow.Add("giveUpGrab", "N"); resultRow.Add("name", linkName); resultRow.Add("pageType", "word"); resultEW.AddRow(resultRow); } } } } resultEW.SaveToDisk(); }
private void GetNextPageUrls(IListSheet listSheet) { ExcelWriter resultEw = this.CreateNextPageUrlExcelWriter(); Dictionary <string, bool> urlDic = new Dictionary <string, bool>(); int rowCount = listSheet.RowCount; for (int i = 0; i < rowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string pageUrl = listRow[SysConfig.DetailPageUrlFieldName]; if (!urlDic.ContainsKey(pageUrl)) { Dictionary <string, string> oldRow = new Dictionary <string, string>(); oldRow.Add(SysConfig.DetailPageUrlFieldName, pageUrl); oldRow.Add(SysConfig.DetailPageNameFieldName, pageUrl); oldRow.Add("linkName", listRow["linkName"]); resultEw.AddRow(oldRow); urlDic.Add(pageUrl, true); } HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i, Encoding.GetEncoding("gb2312")); HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//a"); if (linkNodes != null) { foreach (HtmlNode linkNode in linkNodes) { string linkUrl = linkNode.GetAttributeValue("href", "").Trim(); if (linkUrl.StartsWith("http://114.xixik.com/") && !urlDic.ContainsKey(linkUrl)) { string linkText = CommonUtil.HtmlDecode(linkNode.InnerText).Trim(); Dictionary <string, string> newRow = new Dictionary <string, string>(); newRow.Add(SysConfig.DetailPageUrlFieldName, linkUrl); newRow.Add(SysConfig.DetailPageNameFieldName, linkUrl); newRow.Add("linkName", linkText); resultEw.AddRow(newRow); urlDic.Add(linkUrl, true); } } } } } resultEw.SaveToDisk(); }
private void GetListPageUrls(IListSheet listSheet) { ExcelWriter ew = this.CreateWriter(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { try { HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"cont\"]/a"); for (int j = 0; j < linkNodes.Count; j++) { HtmlNode linkNode = linkNodes[j]; string url = "http://www.lszj.com" + linkNode.GetAttributeValue("href", ""); string name = CommonUtil.HtmlDecode(linkNode.InnerText).Trim(); Dictionary <string, string> row = new Dictionary <string, string>(); row.Add("detailPageUrl", url); row.Add("detailPageName", url); row.Add("name", name); ew.AddRow(row); } } catch (Exception ex) { throw ex; } } } ew.SaveToDisk(); }
/// <summary> /// GetCats /// </summary> /// <param name="listSheet"></param> /// <param name="pageSourceDir"></param> /// <param name="resultEW"></param> private void GetCats(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW) { for (int i = 0; i < listSheet.RowCount; i++) { //listSheet中只有一条记录 string pageUrl = listSheet.PageUrlList[i]; Dictionary <string, string> row = listSheet.GetRow(i); string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection allCat1Nodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"catDiv\"]/div/h5"); HtmlNodeCollection allCat2GroupNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"catDiv\"]/div/ul"); for (int j = 0; j < allCat1Nodes.Count; j++) { HtmlNode cat1Node = allCat1Nodes[j]; HtmlNode cat2GroupNode = allCat2GroupNodes[j]; string cat1Name = cat1Node.InnerText.Trim(); HtmlNodeCollection allCat2Nodes = cat2GroupNode.SelectNodes("./li"); for (int k = 0; k < allCat2Nodes.Count; k++) { HtmlNode cat2Node = allCat2Nodes[k]; string cat2Code = cat2Node.Attributes["catid"].Value; string cat2Name = cat2Node.InnerText.Trim(); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("cat1Name", cat1Name); f2vs.Add("cat2Name", cat2Name); f2vs.Add("cat2Code", cat2Code); resultEW.AddRow(f2vs); } } } }
private void GetSubCategoryKeywordPercents(ExcelWriter resultEW, string categoryName, string subCategoryName, ExcelReader er, Dictionary <string, double> keywordsSumValueDic, Dictionary <string, double> keywordsValueDic) { int rowCount = er.GetRowCount(); for (int i = 0; i < rowCount; i++) { Dictionary <string, string> keywordRow = er.GetFieldValues(i); string keyword = keywordRow["词汇"]; //去掉包含空格的 if (!keyword.Contains(" ")) { double value = keywordsValueDic[keyword]; double sumValue = keywordsSumValueDic[keyword]; double percent = (double)value / (double)sumValue; if (value > 0.0001) { Dictionary <string, object> resultRow = new Dictionary <string, object>(); resultRow.Add("category", categoryName); resultRow.Add("subCategory", subCategoryName); resultRow.Add("keyword", keyword); resultRow.Add("percent", percent); resultEW.AddRow(resultRow); } } } }
private void GetList(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("fromName", 0); resultColumnDic.Add("toCode", 1); string resultFilePath = Path.Combine(exportDir, "翻译结果.xlsx"); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, null); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { try { string resultTextFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); CsvReader csvReader = new CsvReader(resultTextFilePath); Dictionary <string, string> f2vs = csvReader.GetFieldValues(0); resultEW.AddRow(f2vs); } catch (Exception ex) { throw ex; } } } resultEW.SaveToDisk(); }
/// <summary> /// GetCities /// </summary> /// <param name="listSheet"></param> /// <param name="pageSourceDir"></param> /// <param name="resultEW"></param> private void GetShopList(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW) { for (int i = 0; i < listSheet.RowCount; i++) { string pageUrl = listSheet.PageUrlList[i]; Dictionary <string, string> row = listSheet.GetRow(i); string provinceName = row["provinceName"]; string cityName = row["cityName"]; string cityCode = row["cityCode"]; string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection allShopNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"cityMapLeft\"]/div/b/a"); for (int j = 0; j < allShopNodes.Count; j++) { HtmlNode shopNode = allShopNodes[j]; string shopUrl = shopNode.Attributes["href"].Value; string[] shopPieces = shopUrl.Split(new string[] { "/" }, StringSplitOptions.RemoveEmptyEntries); string shopCodeStr = shopPieces[shopPieces.Length - 1]; string shopCode = shopCodeStr.Substring(0, shopCodeStr.IndexOf(".")); string shopName = shopNode.InnerText.Trim(); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", shopUrl); f2vs.Add("detailPageName", shopCode + shopName); f2vs.Add("provinceName", provinceName); f2vs.Add("cityCode", cityCode); f2vs.Add("cityName", cityName); f2vs.Add("shopCode", shopCode); f2vs.Add("shopName", shopName); resultEW.AddRow(f2vs); } } }
/// <summary> /// GetCities /// </summary> /// <param name="listSheet"></param> /// <param name="pageSourceDir"></param> /// <param name="resultEW"></param> private void GetQuals(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW) { Dictionary <string, string> codeDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { string pageUrl = listSheet.PageUrlList[i]; HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection allQualNodes = pageHtmlDoc.DocumentNode.SelectNodes("//input[@class=\"icheck\"]"); if (allQualNodes != null) { for (int j = 0; j < allQualNodes.Count; j++) { String jsonText = allQualNodes[j].GetAttributeValue("value", ""); JObject rootJo = JObject.Parse(jsonText); string aptCode = (rootJo.SelectToken("apt_code") as JValue).ToString().Trim(); string aptScope = (rootJo.SelectToken("apt_scope") as JValue).ToString().Trim(); if (!codeDic.ContainsKey(aptCode)) { codeDic.Add(aptCode, null); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("aptCode", aptCode); f2vs.Add("aptScope", aptScope); resultEW.AddRow(f2vs); } } } } }
/// <summary> /// 生成车辆详细信息 /// </summary> /// <param name="listSheet"></param> /// <returns></returns> private bool GenerateUrlListFile() { bool succeed = true; string exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> columnDic = CommonUtil.InitStringIndexDic(new string[] { "detailPageUrl", "detailPageName", "cookie", "grabStatus", "giveUpGrab", "pageNum" }); string slPath = Path.Combine(exportDir, "绿色建筑列表页.xlsx"); ExcelWriter slEW = new ExcelWriter(slPath, "List", columnDic, null); for (int pageNum = 0; pageNum < PageCount; pageNum++) { string pageUrl = this.GetShopSearchPageUrl(pageNum); Dictionary <string, string> row = new Dictionary <string, string>(); row.Add("detailPageUrl", pageUrl); row.Add("detailPageName", pageNum.ToString()); row.Add("pageNum", (pageNum + 1).ToString()); slEW.AddRow(row); } slEW.SaveToDisk(); return(succeed); }
private void GetCategoryToPageUrls(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); string resultFilePath = Path.Combine(exportDir, "美食天下_分类与菜谱列表对照.xlsx"); ExcelWriter resultEW = this.CreateSubCategoryMapWriter(resultFilePath); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailPageUrl = row[SysConfig.DetailPageUrlFieldName]; string category = row["category"]; string subCategory = row["subCategory"]; string sourceDir = this.RunPage.GetDetailSourceFileDir(); string subCategoryFilePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir); ExcelReader er = new ExcelReader(subCategoryFilePath); int rowCount = er.GetRowCount(); for (int j = 0; j < rowCount; j++) { Dictionary <string, string> subRow = er.GetFieldValues(j); Dictionary <string, string> mapRow = new Dictionary <string, string>(); mapRow.Add("category", subRow["category"]); mapRow.Add("subCategory", subRow["subCategory"]); mapRow.Add("name", subRow["name"]); mapRow.Add("url", subRow["url"]); resultEW.AddRow(mapRow); } } resultEW.SaveToDisk(); }
private void GetListPageUrls(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("detailPageUrl", 0); resultColumnDic.Add("detailPageName", 1); resultColumnDic.Add("cookie", 2); resultColumnDic.Add("grabStatus", 3); resultColumnDic.Add("giveUpGrab", 4); resultColumnDic.Add("category", 5); resultColumnDic.Add("subCategory", 6); string resultFilePath = Path.Combine(exportDir, "美食天下_获取各小类菜谱列表页.xlsx"); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, null); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); try { HtmlNodeCollection categoryDivList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"category_sub clear\"]"); foreach (HtmlNode categoryDiv in categoryDivList) { HtmlNode categoryNameNode = categoryDiv.SelectSingleNode("./h3"); string categoryName = CommonUtil.HtmlDecode(categoryNameNode.InnerText).Trim(); HtmlNodeCollection subCategoryNodeList = categoryDiv.SelectNodes("./ul/li/a"); for (int j = 0; j < subCategoryNodeList.Count; j++) { HtmlNode subCategoryNode = subCategoryNodeList[j]; string subCategoryName = subCategoryNode.GetAttributeValue("title", ""); string subCategoryPageUrl = subCategoryNode.GetAttributeValue("href", ""); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", subCategoryPageUrl); f2vs.Add("detailPageName", subCategoryPageUrl); f2vs.Add("category", categoryName); f2vs.Add("subCategory", subCategoryName); resultEW.AddRow(f2vs); } } } catch (Exception ex) { throw ex; } } } resultEW.SaveToDisk(); }
/// <summary> /// GetCities /// </summary> /// <param name="listSheet"></param> /// <param name="pageSourceDir"></param> /// <param name="resultEW"></param> private void GetCities(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW) { for (int i = 0; i < listSheet.RowCount; i++) { string pageUrl = listSheet.PageUrlList[i]; Dictionary <string, string> row = listSheet.GetRow(i); string areaLevel1Code = row["areaLevel1Code"]; string areaLevel1Name = row["areaLevel1Name"]; string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir); string fileText = FileHelper.GetTextFromFile(localFilePath); int jsonBeginIndex = fileText.IndexOf("{"); int jsonEndIndex = fileText.LastIndexOf("}"); string jsonStr = fileText.Substring(jsonBeginIndex, jsonEndIndex - jsonBeginIndex + 1); JObject rootJo = JObject.Parse(jsonStr); JArray allAreaObjects = rootJo.SelectToken("data") as JArray; for (int j = 0; j < allAreaObjects.Count; j++) { JObject areaObject = allAreaObjects[j] as JObject; string areaLevel2Code = (areaObject.SelectToken("id") as JValue).Value.ToString(); string areaLevel2Name = (areaObject.SelectToken("name") as JValue).Value.ToString(); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", "http://autobeta.jd.com/queryAreaList?area_lev=3&area_id=" + areaLevel2Code + "&callback=jQuery7711772&_=1469734421125"); f2vs.Add("detailPageName", areaLevel2Code + areaLevel2Name); f2vs.Add("areaLevel1Code", areaLevel1Code); f2vs.Add("areaLevel1Name", areaLevel1Name); f2vs.Add("areaLevel2Code", areaLevel2Code); f2vs.Add("areaLevel2Name", areaLevel2Name); resultEW.AddRow(f2vs); } } }
/// <summary> /// GetProvinces /// </summary> /// <param name="listSheet"></param> /// <param name="pageSourceDir"></param> /// <param name="resultEW"></param> private void GetProvinces(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW) { for (int i = 0; i < listSheet.RowCount; i++) { //listSheet中只有一条记录 string pageUrl = listSheet.PageUrlList[i]; Dictionary <string, string> row = listSheet.GetRow(i); string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection allProvinceNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"listTab\"]/ul[1]/li"); for (int j = 0; j < allProvinceNodes.Count; j++) { HtmlNode provinceNode = allProvinceNodes[j]; string provinceCode = provinceNode.Attributes["data-value"].Value; string provinceName = provinceNode.InnerText; Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", "http://www.tuhu.cn/Shops/" + provinceCode + ".aspx"); f2vs.Add("detailPageName", provinceCode + provinceName); f2vs.Add("provinceCode", provinceCode); f2vs.Add("provinceName", provinceName); resultEW.AddRow(f2vs); } } }
/// <summary> /// GetCities /// </summary> /// <param name="listSheet"></param> /// <param name="pageSourceDir"></param> /// <param name="resultEW"></param> private void ReadCityPages(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW) { for (int i = 0; i < listSheet.RowCount; i++) { //listSheet中只有一条记录 Dictionary <string, string> row = listSheet.GetRow(i); string pageUrl = row[SysConfig.DetailPageUrlFieldName]; string provinceCode = row["provinceCode"]; string provinceName = row["provinceName"]; string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection allCityNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"listTab\"]/ul[2]/li/a"); for (int j = 0; j < allCityNodes.Count; j++) { HtmlNode cityNode = allCityNodes[j]; string cityUrl = cityNode.Attributes["href"].Value; string[] cityUrlPieces = cityUrl.Split(new string[] { "/" }, StringSplitOptions.RemoveEmptyEntries); string[] cityPageNamePieces = cityUrlPieces[cityUrlPieces.Length - 1].Split(new string[] { "." }, StringSplitOptions.RemoveEmptyEntries); string cityCode = cityPageNamePieces[0]; string cityName = cityNode.InnerText; Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", cityUrl); f2vs.Add("detailPageName", cityCode + cityName); f2vs.Add("provinceCode", provinceCode); f2vs.Add("provinceName", provinceName); f2vs.Add("cityCode", cityCode); f2vs.Add("cityName", cityName); resultEW.AddRow(f2vs); } } }
/// <summary> /// 生成车辆详细信息 /// </summary> /// <param name="listSheet"></param> /// <returns></returns> private bool GenerateUrlListFile() { bool succeed = true; string exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> columnDic = CommonUtil.InitStringIndexDic(new string[] { "detailPageUrl", "detailPageName", "cookie", "grabStatus", "giveUpGrab" }); string slPath = Path.Combine(exportDir, "Id详情页.xlsx"); ExcelWriter slEW = new ExcelWriter(slPath, "List", columnDic, null); int totalPageCount = this.GetTotalPageCount(); int pageIndex = 1; while (pageIndex <= totalPageCount) { string pageUrl = this.GetShopSearchPageUrl(pageIndex); Dictionary <string, string> row = new Dictionary <string, string>(); row.Add("detailPageUrl", pageUrl); row.Add("detailPageName", pageIndex.ToString()); slEW.AddRow(row); pageIndex = pageIndex + 1; } slEW.SaveToDisk(); return(succeed); }
private void GetProjectItem(HtmlNodeCollection listNodeList, string pageNum, Dictionary <string, string> projectUrlToNull, ExcelWriter ew) { for (int j = 0; j < listNodeList.Count; j++) { HtmlNode listNode = listNodeList[j]; string projectName = ""; string projectUrl = ""; HtmlNode projectNameNode = listNode.SelectSingleNode("./a[1]"); projectName = projectNameNode.InnerText.Trim(); HtmlNode projectUrlNode = listNode.SelectSingleNode("./a[2]"); projectUrl = "http://www.gbmap.org" + projectUrlNode.GetAttributeValue("href", ""); Dictionary <string, object> projectInfo = new Dictionary <string, object>(); if (!projectUrlToNull.ContainsKey(projectUrl)) { projectUrlToNull.Add(projectUrl, null); projectInfo.Add("detailPageUrl", projectUrl); projectInfo.Add("detailPageName", projectUrl); projectInfo.Add("projectName", projectName); projectInfo.Add("pageNum", pageNum); ew.AddRow(projectInfo); } } }
private void GetListPageUrls(IListSheet listSheet) { string sourceDir = this.RunPage.GetDetailSourceFileDir(); ExcelWriter resultEW = this.CreateResultWriter(); Dictionary <string, bool> pageUrlDic = new Dictionary <string, bool>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection pageUrlNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"gclear pp bt center f14\"]/a"); foreach (HtmlNode pageUrlNode in pageUrlNodes) { string pageUrl = pageUrlNode.GetAttributeValue("href", ""); string fullPageUrl = "https://chengyu.911cha.com/" + pageUrl; if (!pageUrlDic.ContainsKey(fullPageUrl)) { pageUrlDic.Add(fullPageUrl, true); Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("detailPageUrl", fullPageUrl); resultRow.Add("detailPageName", fullPageUrl); resultEW.AddRow(resultRow); } } } resultEW.SaveToDisk(); }
public override bool AfterAllGrab(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("detailPageUrl", 0); resultColumnDic.Add("detailPageName", 1); resultColumnDic.Add("cookie", 2); resultColumnDic.Add("grabStatus", 3); resultColumnDic.Add("giveUpGrab", 4); resultColumnDic.Add("CompanyId", 5); resultColumnDic.Add("企业名称", 6); resultColumnDic.Add("统一社会信用代码", 7); resultColumnDic.Add("企业法定代表人", 8); resultColumnDic.Add("企业登记注册类型", 9); resultColumnDic.Add("企业注册属地", 10); resultColumnDic.Add("企业经营地址", 11); resultColumnDic.Add("addressParts", 12); string resultFilePath = Path.Combine(exportDir, "企业数据_企业工商信息列表页.xlsx"); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, null); string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; Dictionary <string, string> companyDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string companyName = row["企业名称"].Trim().Replace("造价企业", "").Replace("测试企业", ""); if (!companyDic.ContainsKey(companyName)) { Dictionary <string, string> f2vs = new Dictionary <string, string>(); companyDic.Add(companyName, null); f2vs.Add("detailPageUrl", "https://www.tianyancha.com/search?key=" + companyName); f2vs.Add("detailPageName", row["CompanyId"]); f2vs.Add("CompanyId", row["CompanyId"]); f2vs.Add("企业名称", companyName); f2vs.Add("统一社会信用代码", row["统一社会信用代码"]); f2vs.Add("企业法定代表人", row["企业法定代表人"]); f2vs.Add("企业登记注册类型", row["企业登记注册类型"]); f2vs.Add("企业注册属地", row["企业注册属地"]); f2vs.Add("企业经营地址", row["企业经营地址"]); string addressParts = this.GetAddresParts(row); f2vs.Add("addressParts", addressParts); resultEW.AddRow(f2vs); } } } resultEW.SaveToDisk(); return(true); }
private void GenerateListPageUrls() { string[] parameters = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); string sourceFilePath = parameters[0]; string destFilePath = parameters[1]; ExcelReader er = new ExcelReader(sourceFilePath); ExcelWriter ew = this.GetExcelWriter(destFilePath); Dictionary <string, string> companyDic = new Dictionary <string, string>(); int rowCount = er.GetRowCount(); for (int i = 0; i < rowCount; i++) { Dictionary <string, string> sourceRow = er.GetFieldValues(i); string companyName = sourceRow["Company Name"]; if (!companyDic.ContainsKey(companyName)) { companyDic.Add(companyName, null); string encodeCompanyName = CommonUtil.UrlEncode(companyName); string pageUrl = "https://www.glassdoor.com/Reviews/company-reviews.htm?suggestCount=0&suggestChosen=false&clickSource=searchBtn&typedKeyword=" + encodeCompanyName + "&sc.keyword=" + encodeCompanyName + "&locT=&locId=&jobType="; Dictionary <string, string> destRow = new Dictionary <string, string>(); destRow.Add("detailPageUrl", pageUrl); destRow.Add("detailPageName", pageUrl); destRow.Add("cookie", "ARPNTS=1952819392.64288.0000; ARPNTS_AB=115; gdId=94517b85-9d89-47c1-a5ab-2a04b242c067; trs=direct:direct:direct:2018-07-15+23%3A50%3A22.919:undefined:undefined; _ga=GA1.2.216399378.1531723803; __qca=P0-1262345758-1531723804448; G_ENABLED_IDPS=google; __gads=ID=62251b7c5d596d61:T=1531723836:S=ALNI_MZk81H-OcTT9PjdVFK8PYIrVGTx1A; __gdpopuc=1; cto_lwid=8e5c6f44-854b-492e-be0f-09a9dc915819; rm=bGl4aW4xNTUzQGdtYWlsLmNvbToxNTYzMjkyNzgzNzgxOjVhMDQ1MWI1NjBiYjYzYzE3NjM3YmEzOThjNTJlM2Ix; uc=8F0D0CFA50133D96DAB3D34ABA1B873399807652C6C76982808553CADAB58BBB131EFE7DE1E6A4B95851EB3294212EB393007ED539985D9CDE873DE04D4FC71FEE18FB9F0BDE4138B3E34D8411CDEA90F25EDE93274F0D5D5FDED9B003FBA6F43CA9014AC0BB0289EB0204D279873038C3CF7E94AE6F099E0174A86BB3453633759C8511C218159EA514952BE5A78210E84BCCC56AAAAD09; _mibhv=anon-1531735166141-5684441656_6890; JSESSIONID=E01A35E3A52310CD24E42EC5FF252052; _uac=00000164ca63c0a6a0bcb163ea7dc134; GSESSIONID=E01A35E3A52310CD24E42EC5FF252052; _gid=GA1.2.739342608.1532403870; ht=%7B%22quantcast%22%3A%5B%22D%22%5D%7D; JSESSIONID_KYWI_APP=B31D8DA6C274B6196C84875AE7D7942A; JSESSIONID_JX_APP=8E738CEAE7DF1A613C3E7B6006442DE4; cass=1; AWSALB=4plUYq9nqfzCEW/AJ4UDiC11DqFrHS0JteBY5hN5Ok2HoX9iLI04hye/Bpq8j7Syv8PnKkRAsMcWCNGXkxlGMnVvbn1nPp99yMD5TcSM4g+ORjkL9rbNGIoiSAN4qYGv/Ir11PRBDXxXJIp8E0TRnpuNo3fcuCImeBiC/rzpGYMOeTyaTw32g+C3rlEvLCmoohAKaUTrzpDwu1OXM6sBFG9S5jfl0NtU/cmqv5muVCjMBcqr8FoqD9WjlkUkNe0="); destRow.Add("Company_Name", companyName); ew.AddRow(destRow); } } ew.SaveToDisk(); }
private void GetRenWuInfos(IListSheet listSheet) { string sourceDir = this.RunPage.GetDetailSourceFileDir(); ExcelWriter resultEW = this.CreateRenWuResultWriter(); Dictionary <string, bool> pageUrlDic = new Dictionary <string, bool>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); try { HtmlNode mainInfoNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"info_txt2 clearfix\"]"); HtmlNode titleNode = mainInfoNode.SelectSingleNode("./h2"); string renWuTitle = CommonUtil.HtmlDecode(titleNode.InnerText).Trim(); HtmlNode descriptionNode = mainInfoNode.SelectSingleNode("./p"); string description = descriptionNode == null ? "" : CommonUtil.HtmlDecode(descriptionNode.InnerText).Trim(); Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("人物", listRow["renWu"]); resultRow.Add("时代", listRow["shiDai"]); resultRow.Add("人物页面标题", renWuTitle); resultRow.Add("简介", description); resultRow.Add("url", listRow[SysConfig.DetailPageUrlFieldName]); resultEW.AddRow(resultRow); } catch (Exception ex) { throw ex; } } resultEW.SaveToDisk(); }
public void Test() { Dictionary <string, int> columnNameToIndex = CommonUtil.InitStringIndexDic(new string[] { "word" }); ExcelWriter ew = new ExcelWriter("f:\\c.xlsx", "List", columnNameToIndex); string[] ssArray = new string[] { "sina.com.cn", "xinhua.com", "twitter.com", "amazon.com", "baidu.com", "nytimes.com", "jd.com", "tmall.com", "sohu.com", "qq.com", "taobao.com", "tianya.com", "bustbuy.com" }; var listCount = ssArray.Length; for (int i = 1; i < listCount; i++) { string word = ssArray[i]; string[] ws = word.Split(new string[] { "\t" }, StringSplitOptions.RemoveEmptyEntries); Dictionary <string, string> listRow = new Dictionary <string, string>(); listRow["word"] = ws[0]; ew.AddRow(listRow); } ew.SaveToDisk(); }
private void GetShiShuDetailPageUrls(IListSheet listSheet) { string sourceDir = this.RunPage.GetDetailSourceFileDir(); ExcelWriter resultEW = this.CreateResultWriter(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"info_cate clearfix\"]/dl/dd/a"); foreach (HtmlNode linkNode in linkNodes) { string juanName = CommonUtil.HtmlDecode(linkNode.InnerText).Trim(); string juanPageUrl = linkNode.GetAttributeValue("href", ""); string fullJuanPageUrl = "http://www.guoxuedashi.com" + juanPageUrl; Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("detailPageUrl", fullJuanPageUrl); resultRow.Add("detailPageName", fullJuanPageUrl); resultRow.Add("shiShu", listRow["shiShu"]); resultRow.Add("leiXing", listRow["leiXing"]); resultRow.Add("juan", juanName); resultEW.AddRow(resultRow); } } resultEW.SaveToDisk(); }
private void GetYearInfos(IListSheet listSheet) { string sourceDir = this.RunPage.GetDetailSourceFileDir(); ExcelWriter resultEW = this.CreateResultWriter(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]); string detailPageUrl = listRow[SysConfig.DetailPageUrlFieldName]; if (!giveUp) { try { string localFilePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir); string html = FileHelper.GetTextFromFile(localFilePath, Encoding.UTF8); if (!html.Contains("您所访问的页面不存在")) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(html); HtmlNode mainInfoNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"lemma-summary\"]"); if (mainInfoNode == null) { this.RunPage.InvokeAppendLogText("此词条不存在摘要信息, pageUrl = " + detailPageUrl, LogLevelType.Error, true); } else { HtmlNode itemBaseInfoNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"lemmaWgt-promotion-rightPreciseAd\"]"); string itemId = itemBaseInfoNode.GetAttributeValue("data-lemmaid", ""); string itemName = itemBaseInfoNode.GetAttributeValue("data-lemmatitle", ""); string mainInfo = CommonUtil.HtmlDecode(mainInfoNode.InnerText).Trim(); Dictionary <string, string> newRow = new Dictionary <string, string>(); newRow.Add("url", detailPageUrl); newRow.Add("yearValue", listRow["yearValue"]); newRow.Add("yearName", listRow["yearName"]); newRow.Add("itemId", itemId); newRow.Add("itemName", itemName); newRow.Add("mainInfo", mainInfo); resultEW.AddRow(newRow); } } else { this.RunPage.InvokeAppendLogText("放弃解析此页, 所访问的页面不存在, pageUrl = " + detailPageUrl, LogLevelType.Error, true); } } catch (Exception ex) { this.RunPage.InvokeAppendLogText(ex.Message + ". 解析出错, pageUrl = " + detailPageUrl, LogLevelType.Error, true); throw ex; } } } resultEW.SaveToDisk(); }
private void GetCityList(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("code", 0); resultColumnDic.Add("name", 1); resultColumnDic.Add("url", 2); string resultFilePath = Path.Combine(exportDir, "安居客城市列表.xlsx"); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, null); Dictionary <string, string> urlDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); try { HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection allCityNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"cl-c-list\"]/ul[@class=\"cl-c-l-ul\"]/li[@class=\"cl-c-l-li\"]/a"); for (int j = 0; j < allCityNodes.Count; j++) { HtmlNode cityNode = allCityNodes[j]; string url = cityNode.GetAttributeValue("href", ""); int cityCodeFromIndex = url.IndexOf("com/") + 4; int cityCodeEndIndex = url.IndexOf("/commu"); if (cityCodeEndIndex > 0) { string code = url.Substring(cityCodeFromIndex, cityCodeEndIndex - cityCodeFromIndex); string name = CommonUtil.HtmlDecode(cityNode.InnerText.Trim()).Trim(); if (!urlDic.ContainsKey(url)) { urlDic.Add(url, null); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("code", code); f2vs.Add("name", name); f2vs.Add("url", url); resultEW.AddRow(f2vs); } } } } catch (Exception ex) { throw ex; } } } resultEW.SaveToDisk(); }
private bool GetProvinceCompCountList(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("regionId", 0); resultColumnDic.Add("regionName", 1); resultColumnDic.Add("regionFullName", 2); resultColumnDic.Add("aptCode", 3); resultColumnDic.Add("aptScope", 4); resultColumnDic.Add("companyCount", 5); string resultFilePath = Path.Combine(exportDir, "各省企业个数.xlsx"); Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>(); resultColumnFormat.Add("companyCount", "#,##0"); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, resultColumnFormat); string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string url = row[detailPageUrlColumnName]; string provinceId = row["regionId"]; string provinceName = row["regionName"]; string provinceFullName = row["regionFullName"]; string aptCode = row["aptCode"]; string aptScope = row["aptScope"]; string cookie = row["cookie"]; HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); string pageText = pageHtmlDoc.DocumentNode.SelectSingleNode("//form[@class=\"pagingform\"]").NextSibling.NextSibling.InnerText; int totalStartIndex = pageText.IndexOf("\"$total\":") + 9; int totalEndIndex = pageText.IndexOf(",", totalStartIndex); string totalCountStr = pageText.Substring(totalStartIndex, totalEndIndex - totalStartIndex); int companyCount = int.Parse(totalCountStr); Dictionary <string, object> f2vs = new Dictionary <string, object>(); f2vs.Add("regionId", provinceId); f2vs.Add("regionName", provinceName); f2vs.Add("regionFullName", provinceFullName); f2vs.Add("aptCode", aptCode); f2vs.Add("aptScope", aptScope); f2vs.Add("companyCount", companyCount); resultEW.AddRow(f2vs); } } resultEW.SaveToDisk(); return(true); }
private void SaveShopsToPointFile(string subCategoryFilePath, string detailPageUrl, int pageCount, string pointShopDir, string urlFormat, string lat, string lng) { ExcelWriter pointShopsEW = this.CreatePointShopsWriter(subCategoryFilePath); Dictionary <string, string> urlDic = new Dictionary <string, string>(); for (int i = 0; i < pageCount; i++) { int pageIndex = i; string nextListPageUrl = this.GetNextListPageUrl(urlFormat, lat, lng, pageIndex); string localPath = this.RunPage.GetFilePath(nextListPageUrl, pointShopDir); string pageText = FileHelper.GetTextFromFile(localPath); JObject rootJo = JObject.Parse(pageText); JArray itemArray = rootJo.GetValue("items") as JArray; for (int j = 0; j < itemArray.Count; j++) { try { JObject itemJo = (itemArray[j] as JObject).GetValue("restaurant") as JObject; if (itemJo != null) { string address = itemJo.GetValue("address").ToString(); string description = itemJo.GetValue("description").ToString(); string id = itemJo.GetValue("id").ToString(); string latitude = itemJo.GetValue("latitude").ToString(); string longitude = itemJo.GetValue("longitude").ToString(); string name = itemJo.GetValue("name").ToString(); string phone = itemJo.GetValue("phone") == null ? "" : itemJo.GetValue("phone").ToString(); string promotion_info = itemJo.GetValue("promotion_info") == null ? "" : itemJo.GetValue("promotion_info").ToString(); if (!urlDic.ContainsKey(id)) { urlDic.Add(id, null); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("address", address); f2vs.Add("description", description); f2vs.Add("id", id); f2vs.Add("latitude", latitude); f2vs.Add("longitude", longitude); f2vs.Add("name", name); f2vs.Add("phone", phone); f2vs.Add("promotion_info", promotion_info); pointShopsEW.AddRow(f2vs); } } } catch (Exception ex) { throw ex; } } } pointShopsEW.SaveToDisk(); }
private void GetList(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("detailPageUrl", 0); resultColumnDic.Add("detailPageName", 1); resultColumnDic.Add("cookie", 2); resultColumnDic.Add("grabStatus", 3); resultColumnDic.Add("giveUpGrab", 4); resultColumnDic.Add("学科", 5); resultColumnDic.Add("学科id", 6); resultColumnDic.Add("门类", 7); resultColumnDic.Add("门类id", 8); string resultFilePath = Path.Combine(exportDir, "教育_本科_专业_jhcee_com.xlsx"); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, null); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); try { string pageFileText = FileHelper.GetTextFromFile(localFilePath); JObject rootJo = JObject.Parse(pageFileText); JArray itemJsons = rootJo["data"] as JArray; foreach (JObject itemJson in itemJsons) { string name = itemJson["name"].ToString(); string id = itemJson["id"].ToString(); string parentId = itemJson["parentId"].ToString(); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", "http://www.jhcee.com/specialized/loadByParentId.json?parentId=" + id); f2vs.Add("detailPageName", id); f2vs.Add("门类", name); f2vs.Add("门类id", id); f2vs.Add("学科", row["name"]); f2vs.Add("学科id", row["id"]); resultEW.AddRow(f2vs); } } catch (Exception ex) { throw ex; } } } resultEW.SaveToDisk(); }
private bool GetBuildingListPageUrls(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("detailPageUrl", 0); resultColumnDic.Add("detailPageName", 1); resultColumnDic.Add("cookie", 2); resultColumnDic.Add("grabStatus", 3); resultColumnDic.Add("giveUpGrab", 4); resultColumnDic.Add("projectId", 5); resultColumnDic.Add("projectName", 6); resultColumnDic.Add("pageIndex", 7); string resultFilePath = Path.Combine(exportDir, "济南楼盘_楼列表页.xlsx"); Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>(); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, resultColumnFormat); string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; Dictionary <string, string> loupanDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string url = row[detailPageUrlColumnName]; string projectId = row["projectId"]; string projectName = row["projectName"]; HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNode pageCountNode = pageHtmlDoc.DocumentNode.SelectSingleNode("//input[@id=\"allpage\"]"); if (pageCountNode != null) { int pageCount = int.Parse(pageCountNode.GetAttributeValue("value", "")); for (int j = 0; j < pageCount; j++) { int pageIndex = j + 1; string detailPageUrl = "http://www.jnfdc.gov.cn/onsaling/show_" + pageIndex.ToString() + ".shtml?prjno=" + projectId; Dictionary <string, object> f2vs = new Dictionary <string, object>(); f2vs.Add("detailPageUrl", detailPageUrl); f2vs.Add("detailPageName", projectId + "_" + pageIndex.ToString()); f2vs.Add("projectId", projectId); f2vs.Add("projectName", projectName); f2vs.Add("pageIndex", pageIndex.ToString()); resultEW.AddRow(f2vs); } } } } resultEW.SaveToDisk(); return(true); }
private bool GetLoupanDetailInfos(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("项目ID", 0); resultColumnDic.Add("项目名称", 1); resultColumnDic.Add("项目地址", 2); resultColumnDic.Add("企业名称", 3); resultColumnDic.Add("所在区县", 4); resultColumnDic.Add("项目规模", 5); resultColumnDic.Add("总栋数", 6); resultColumnDic.Add("可售套数", 7); string resultFilePath = Path.Combine(exportDir, "济南楼盘_楼盘详情.xlsx"); Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>(); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, resultColumnFormat); string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string url = row[detailPageUrlColumnName]; string projectId = row["projectId"]; string sellable = row["sellable"]; HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection trNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//table[@class=\"message_table\"]/tr"); string projectName = trNodeList[1].SelectNodes("./td")[1].InnerText.Trim(); string address = trNodeList[1].SelectNodes("./td")[3].InnerText.Trim(); string companyName = trNodeList[2].SelectNodes("./td")[1].InnerText.Trim(); string scope = trNodeList[2].SelectNodes("./td")[3].InnerText.Trim(); string projectSize = trNodeList[3].SelectNodes("./td")[1].InnerText.Trim(); string buildingCount = trNodeList[3].SelectNodes("./td")[3].InnerText.Trim(); Dictionary <string, object> f2vs = new Dictionary <string, object>(); f2vs.Add("项目ID", projectId); f2vs.Add("项目名称", projectName); f2vs.Add("项目地址", address); f2vs.Add("企业名称", companyName); f2vs.Add("所在区县", scope); f2vs.Add("项目规模", projectSize); f2vs.Add("总栋数", buildingCount); f2vs.Add("可售套数", sellable); resultEW.AddRow(f2vs); } } resultEW.SaveToDisk(); return(true); }