private void GetAllInfos(IListSheet listSheet) { CsvWriter cw = this.GetCsvExcelWriter(); string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; Dictionary <string, string> companyDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailPageUrl = row[SysConfig.DetailPageUrlFieldName]; string detailPageName = row[SysConfig.DetailPageNameFieldName]; string companyId = row["companyId"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection trNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//table/tbody/tr"); if (trNodeList != null) { for (int j = 0; j < trNodeList.Count; j++) { try { HtmlNode trNode = trNodeList[j]; HtmlNodeCollection tdNodeList = trNode.SelectNodes("./td"); HtmlNode indexNode = tdNodeList[0]; if (indexNode.GetAttributeValue("data-header", "") == "序号") { try { Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("CompanyId", companyId); f2vs.Add("资质类别", tdNodeList.Count < 2 ? "" : tdNodeList[1].InnerText.Trim()); f2vs.Add("资质证书号", tdNodeList.Count < 3 ? "" : tdNodeList[2].InnerText.Trim()); f2vs.Add("资质名称", tdNodeList.Count < 4 ? "" : tdNodeList[3].InnerText.Trim()); f2vs.Add("发证日期", tdNodeList.Count < 5 ? "" : tdNodeList[4].InnerText.Trim()); f2vs.Add("证件有效期", tdNodeList.Count < 6 ? "" : tdNodeList[5].InnerText.Trim()); f2vs.Add("发证机关", tdNodeList.Count < 7 ? "" : tdNodeList[6].InnerText.Trim()); cw.AddRow(f2vs); } catch (Exception ex) { throw ex; } } } catch (Exception ex) { throw ex; } } } } } cw.SaveToDisk(); }
public override void GetDataByOtherAccessType(Dictionary <string, string> listRow) { string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); string fromNameStr = listRow["fromName"]; List <string> toNameList = new List <string>(); List <string> fromNameList = new List <string>(); string[] fromNames = fromNameStr.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < fromNames.Length; i++) { string fromName = fromNames[i].Trim(); string fromNameLow = fromName.ToLower(); if (this.Dic.ContainsKey(fromNameLow)) { string toName = this.Dic[fromNameLow]; toNameList.Add(toName); fromNameList.Add(fromName); } else { throw new Exception("无法翻译, fromName = " + fromName); } } CsvWriter tempCsvWriter = this.GetCsvWriter(listRow); Dictionary <string, string> row = new Dictionary <string, string>(); row.Add("fromName", CommonUtil.StringArrayToString(fromNameList.ToArray(), ", ")); row.Add("toCode", CommonUtil.StringArrayToString(toNameList.ToArray(), ", ")); tempCsvWriter.AddRow(row); tempCsvWriter.SaveToDisk(); }
private void GetItemTagsTypes(IListSheet listSheet) { string[] parameters = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); string sourceFilePath = parameters[0]; string destFilePath = parameters[1]; ExcelReader er = new ExcelReader(sourceFilePath); int sourceRowCount = er.GetRowCount(); List <string> tagList = new List <string>(); for (int i = 0; i < sourceRowCount; i++) { Dictionary <string, string> sourceRow = er.GetFieldValues(i); string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); foreach (string itemTag in itemTags) { if (!tagList.Contains(itemTag)) { tagList.Add(itemTag); } } } Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("url", 0); resultColumnDic.Add("itemId", 1); resultColumnDic.Add("itemName", 2); resultColumnDic.Add("tags", 3); for (int i = 0; i < tagList.Count; i++) { resultColumnDic.Add(tagList[i], i + 4); } CsvWriter itemTagMatrixCW = new CsvWriter(destFilePath, resultColumnDic); for (int i = 0; i < sourceRowCount; i++) { Dictionary <string, string> sourceRow = er.GetFieldValues(i); Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("url", sourceRow["url"]); resultRow.Add("itemId", sourceRow["itemId"]); resultRow.Add("itemName", sourceRow["itemName"]); resultRow.Add("tags", sourceRow["tags"]); string tagsStr = sourceRow["tags"]; string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); foreach (string itemTag in itemTags) { resultRow.Add(itemTag, "1"); } itemTagMatrixCW.AddRow(resultRow); } itemTagMatrixCW.SaveToDisk(); }
private void SavePointsToFile(List <Dictionary <string, string> > points, CsvWriter resultEW) { for (int i = 0; i < points.Count; i++) { Dictionary <string, string> f2vs = points[i]; resultEW.AddRow(f2vs); } resultEW.SaveToDisk(); }
public override bool AfterAllGrab(IListSheet listSheet) { string[] parameters = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); string sourceFilePath = parameters[0]; string exportDir = parameters[1]; string cityName = parameters[2]; Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("city", 0); resultColumnDic.Add("distrctName", 1); resultColumnDic.Add("shopName", 2); resultColumnDic.Add("shopCode", 3); resultColumnDic.Add("address", 4); resultColumnDic.Add("tel", 5); resultColumnDic.Add("shopType", 6); resultColumnDic.Add("commentNum", 7); resultColumnDic.Add("lat", 8); resultColumnDic.Add("lng", 9); resultColumnDic.Add("人均", 10); resultColumnDic.Add("口味", 11); resultColumnDic.Add("环境", 12); resultColumnDic.Add("服务", 13); string resultFilePath = Path.Combine(exportDir, "大众点评店铺信息" + cityName + ".xlsx"); Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>(); resultColumnFormat.Add("reviewNum", "#,##0"); resultColumnFormat.Add("lat", "#,##0.000000"); resultColumnFormat.Add("lng", "#,##0.000000"); resultColumnFormat.Add("人均", "#,##0.00"); resultColumnFormat.Add("环境", "#,##0.0"); resultColumnFormat.Add("口味", "#,##0.0"); resultColumnFormat.Add("服务", "#,##0.0"); CsvWriter resultEW = new CsvWriter(resultFilePath, resultColumnDic); CsvReader cr = new CsvReader(sourceFilePath); int sourceRowCount = cr.GetRowCount(); for (int i = 0; i < sourceRowCount; i++) { Dictionary <string, string> sourceRow = cr.GetFieldValues(i); string city = sourceRow["city"]; if (city == cityName) { resultEW.AddRow(sourceRow); } } resultEW.SaveToDisk(); return(true); }
private void MergeInfoFile(string exportDir, IListSheet listSheet) { string allBlockInfoPath = Path.Combine(exportDir, "爬取结果.csv"); StringBuilder ss = new StringBuilder(); Dictionary <string, int> allBlockInfoDic = new Dictionary <string, int>(); allBlockInfoDic.Add("x", 0); allBlockInfoDic.Add("y", 1); allBlockInfoDic.Add("z", 2); allBlockInfoDic.Add("xp", 3); allBlockInfoDic.Add("yp", 4); allBlockInfoDic.Add("v", 5); allBlockInfoDic.Add("time", 6); CsvWriter allBlockInfoCW = new CsvWriter(allBlockInfoPath, allBlockInfoDic); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> listRow = listSheet.GetRow(i); string x = listRow["x"]; string y = listRow["y"]; string z = listRow["z"]; string time = listRow["time"]; string tempDir = Path.Combine(exportDir, "temp"); string blockInfoPath = Path.Combine(tempDir, x + "_" + y + "_" + z + "_" + time + ".csv"); CsvReader csvReader = new CsvReader(blockInfoPath); int pCount = csvReader.GetRowCount(); if (pCount > 0) { for (int pIndex = 0; pIndex < pCount; pIndex++) { Dictionary <string, string> pValues = csvReader.GetFieldValues(pIndex); string xp = pValues["xp"]; string yp = pValues["yp"]; string v = pValues["v"]; allBlockInfoCW.AddRow(pValues); if (ss.Length != 0) { ss.Append(","); ss.AppendLine(); } ss.Append(" {\"x\":" + x.ToString() + "." + xp + ",\"y\":" + y.ToString() + "." + yp + ", \"lng\":" + this.ToBaiduX(int.Parse(x), int.Parse(xp), int.Parse(z)) + ",\"lat\":" + this.ToBaiduY(int.Parse(y), int.Parse(yp), int.Parse(z)) + ",\"count\":" + double.Parse(v).ToString() + "}"); } } } allBlockInfoCW.SaveToDisk(); string allBlockInfoTextPath = Path.Combine(exportDir, "爬取结果.txt"); FileHelper.SaveTextToFile(ss.ToString(), allBlockInfoTextPath); }
private void SaveSkuInfoToLocalFile(string filePath, Dictionary <string, string> skuInfo) { Dictionary <string, int> columnNameToIndex = new Dictionary <string, int>(); columnNameToIndex.Add("name", 0); columnNameToIndex.Add("price", 1); columnNameToIndex.Add("transportFee", 2); columnNameToIndex.Add("monthSellCount", 3); columnNameToIndex.Add("district", 4); columnNameToIndex.Add("commentCount", 5); CsvWriter cw = new CsvWriter(filePath, columnNameToIndex); cw.AddRow(skuInfo); cw.SaveToDisk(); }
public async Task <DownloadResult> DownloadObjects([NotNull] string objectIdentifier, [NotNull][FromBody] ObjectSearchRequest query) { var type = schemaRegistry.GetTypeByTypeIdentifier(objectIdentifier); var schema = schemaRegistry.GetSchemaByTypeIdentifier(objectIdentifier); var downloadLimit = schema.Description.DownloadLimit; var count = await schemaRegistry.GetConnector(objectIdentifier).Count(query.GetFilters(), downloadLimit + 1).ConfigureAwait(false); if (count > downloadLimit) { return new DownloadResult { File = null, Count = (int)count, CountLimit = downloadLimit, } } ; var results = await schemaRegistry.GetConnector(objectIdentifier).Search(query.GetFilters(), query.GetSorts(), 0, downloadLimit).ConfigureAwait(false); var properties = new List <string>(); var getters = new List <Func <object, object> >(); PropertyHelpers.BuildGettersForProperties(type, "", x => x, properties, getters); var excludedIndices = properties.Select((x, i) => (x, i)).Where(x => query.ExcludedFields.Contains(x.x)).Select(x => x.i).ToArray(); var filteredProperties = properties.Where((x, i) => !excludedIndices.Contains(i)).ToArray(); var filteredGetters = getters.Where((x, i) => !excludedIndices.Contains(i)).ToArray(); var csvWriter = new CsvWriter(filteredProperties); foreach (var item in results) { csvWriter.AddRow(filteredGetters.Select(f => PropertyHelpers.ToString(f, item)).ToArray()); } return(new DownloadResult { Count = count ?? 0, CountLimit = downloadLimit, File = new FileInfo { Content = csvWriter.GetBytes(), ContentType = "text/csv", Name = $"{objectIdentifier}-{DateTime.UtcNow:yyyy-MM-dd-HHmm}.csv" } }); }
public static int AddText(string text) { if (string.IsNullOrEmpty(text)) { return(0); } int id = ++max_id; langdic.Add(id, text); langdic_id.Add(text, id); writer.AddRow(new string[] { id.ToString(), text }); string contents = writer.Write(); File.WriteAllText(Application.dataPath + filepath, contents, encoding); return(id); }
private void Search() { string[] parameters = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); string sourceFilePath = parameters[0]; string destFilePath = parameters[1]; string keywordsGroupStr = parameters[2]; string[] keywordsGroup = keywordsGroupStr.Split(new string[] { "$" }, StringSplitOptions.RemoveEmptyEntries); List <String[]> keywordsList = new List <string[]>(); foreach (string keywordsStr in keywordsGroup) { keywordsList.Add(keywordsStr.Split(new string[] { "|" }, StringSplitOptions.RemoveEmptyEntries)); } CsvWriter cw = this.GetCsvWriter(destFilePath); CsvReader cr = new CsvReader(sourceFilePath); int rowCount = cr.GetRowCount(); for (int i = 0; i < rowCount; i++) { Dictionary <string, string> row = cr.GetFieldValues(i); string announcementTitle = row["announcementTitle"]; for (int j = 0; j < keywordsList.Count; j++) { bool matched = true; string[] keywords = keywordsList[j]; foreach (string keyword in keywords) { if (!announcementTitle.Contains(keyword)) { matched = false; break; } } if (matched) { cw.AddRow(row); break; } } } cw.SaveToDisk(); }
private void SaveKeywordShopInfoToLocalFile(string filePath, List <Dictionary <string, string> > allKeywordShops, string keyword) { Dictionary <string, int> columnNameToIndex = new Dictionary <string, int>(); columnNameToIndex.Add("name", 0); columnNameToIndex.Add("subscribe", 1); columnNameToIndex.Add("mark", 2); columnNameToIndex.Add("jdSelf", 3); columnNameToIndex.Add("keyword", 4); CsvWriter cw = new CsvWriter(filePath, columnNameToIndex); foreach (Dictionary <string, string> skuInfo in allKeywordShops) { skuInfo.Add("keyword", keyword); cw.AddRow(skuInfo); } cw.SaveToDisk(); }
private void SaveShopProductInfoToLocalFile(string filePath, List <Dictionary <string, string> > allProducts, string shopName) { Dictionary <string, int> columnNameToIndex = new Dictionary <string, int>(); columnNameToIndex.Add("shopName", 0); columnNameToIndex.Add("productName", 1); columnNameToIndex.Add("price", 2); columnNameToIndex.Add("commentNum", 3); columnNameToIndex.Add("goodMark", 4); CsvWriter cw = new CsvWriter(filePath, columnNameToIndex); foreach (Dictionary <string, string> skuInfo in allProducts) { skuInfo.Add("shopName", shopName); cw.AddRow(skuInfo); } cw.SaveToDisk(); }
private void GetCityToShops(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); string resultFilePath = Path.Combine(exportDir, "饿了么_城市与店铺对照.csv"); CsvWriter resultEW = this.CreateCityToShopsWriter(resultFilePath); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailPageUrl = row[SysConfig.DetailPageUrlFieldName]; string elemeCity = row["elemeCity"]; string searchLat = row["lat"]; string searchLng = row["lng"]; string sourceDir = this.RunPage.GetDetailSourceFileDir(); string shopsFilePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir); ExcelReader er = new ExcelReader(shopsFilePath); int rowCount = er.GetRowCount(); for (int j = 0; j < rowCount; j++) { Dictionary <string, string> subRow = er.GetFieldValues(j); Dictionary <string, string> mapRow = new Dictionary <string, string>(); mapRow.Add("address", subRow["address"]); mapRow.Add("description", subRow["description"]); mapRow.Add("id", subRow["id"]); mapRow.Add("latitude", subRow["latitude"]); mapRow.Add("longitude", subRow["longitude"]); mapRow.Add("name", subRow["name"]); mapRow.Add("phone", subRow["phone"]); mapRow.Add("promotion_info", subRow["promotion_info"]); mapRow.Add("searchLat", searchLat); mapRow.Add("searchLng", searchLng); mapRow.Add("elemeCity", elemeCity); resultEW.AddRow(mapRow); } } resultEW.SaveToDisk(); }
public static void Init(int client_id) { encoding = GetFileEncodeType(Application.dataPath + filepath); string csv = File.ReadAllText(Application.dataPath + filepath, encoding); CsvReader reader = new CsvReader(); langdic = new Dictionary <int, string> (); langdic_id = new Dictionary <string, int> (); writer = new CsvWriter(); if (!string.IsNullOrEmpty(csv.Trim())) { foreach (var row in reader.Read(csv)) { string value; int id = int.Parse(row [0]); if (id > client_id && id < client_id + 1000000) { max_id = Mathf.Max(max_id, id); } langdic.TryGetValue(id, out value); if (string.IsNullOrEmpty(value)) { langdic.Add(id, row[1]); langdic_id.Add(row[1], id); writer.AddRow(row); } else { Debug.LogError("language.csv 中有重复id:" + id); } } } if (max_id == 0) { max_id = client_id; } }
public void GetPart(string destFilePath, int fromRowIndex, int rowCount) { Dictionary <string, int> columnName2Index = CR.GetColumnNameToIndex(); CsvWriter cw = new CsvWriter(destFilePath, columnName2Index); int rightRowCount = CR.GetRowCount() - fromRowIndex; if (rightRowCount < 0) { throw new Exception("获取csv部分数据时, 起始行超出总行数"); } else if (rightRowCount < rowCount) { rowCount = rightRowCount; } int toRowIndex = rowCount + fromRowIndex - 1; for (int i = fromRowIndex; i <= toRowIndex; i++) { Dictionary <string, string> row = CR.GetFieldValues(i); cw.AddRow(row); } cw.SaveToDisk(); }
private void GetCityXiaoquList(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("cityCode", 0); resultColumnDic.Add("cityName", 1); resultColumnDic.Add("level1AreaCode", 2); resultColumnDic.Add("level1AreaName", 3); resultColumnDic.Add("level2AreaCode", 4); resultColumnDic.Add("level2AreaName", 5); resultColumnDic.Add("name", 6); resultColumnDic.Add("address", 7); resultColumnDic.Add("sale_num", 8); resultColumnDic.Add("build_year", 9); resultColumnDic.Add("mid_price", 10); resultColumnDic.Add("url", 11); string resultFilePath = Path.Combine(exportDir, "安居客小区列表.csv"); CsvWriter resultEW = new CsvWriter(resultFilePath, resultColumnDic); Dictionary <string, string> urlDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { if (i % 100 == 0) { this.RunPage.InvokeAppendLogText("正在输出CSV文件... " + ((double)(i * 100) / (double)listSheet.RowCount).ToString("0.00") + "%", LogLevelType.System, true); } Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; string cityName = row["cityName"]; string cityCode = row["cityCode"]; string level1AreaName = row["level1AreaName"]; string level1AreaCode = row["level1AreaCode"]; string level2AreaCode = row["level2AreaCode"]; string level2AreaName = row["level2AreaName"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); string fileText = FileHelper.GetTextFromFile(localFilePath); int requestPageCount = int.Parse(fileText); for (int j = 0; j < requestPageCount; j++) { string pageLocalFilePath = this.RunPage.GetFilePath(detailUrl + "?p=" + (j + 1).ToString(), pageSourceDir); string pageFileText = FileHelper.GetTextFromFile(pageLocalFilePath); try { JObject rootJo = JObject.Parse(pageFileText); JArray xiaoquJsonArray = rootJo["data"] as JArray; for (int k = 0; k < xiaoquJsonArray.Count; k++) { JObject xiaoquJson = xiaoquJsonArray[k] as JObject; string name = CommonUtil.HtmlDecode(xiaoquJson["name"].ToString()); string area = CommonUtil.HtmlDecode(xiaoquJson["area"].ToString()); string address = CommonUtil.HtmlDecode(xiaoquJson["address"].ToString()); string sale_num = xiaoquJson["sale_num"].ToString(); string build_year = CommonUtil.HtmlDecode(xiaoquJson["build_year"].ToString()); string mid_price = xiaoquJson["mid_price"].ToString(); string url = CommonUtil.HtmlDecode(xiaoquJson["url"].ToString()); if (!urlDic.ContainsKey(url)) { urlDic.Add(url, null); Dictionary <string, string> f2vs = new Dictionary <string, string>(); //f2vs.Add("detailPageUrl", url); //f2vs.Add("detailPageName", url); f2vs.Add("cityCode", cityCode); f2vs.Add("cityName", cityName); f2vs.Add("level1AreaCode", cityCode); f2vs.Add("level1AreaName", cityName); f2vs.Add("level2AreaCode", cityCode); f2vs.Add("level2AreaName", cityName); f2vs.Add("name", name); f2vs.Add("address", address); f2vs.Add("sale_num", sale_num); f2vs.Add("build_year", build_year); f2vs.Add("mid_price", mid_price); f2vs.Add("url", url); resultEW.AddRow(f2vs); } } } catch (Exception ex) { throw ex; } } } } resultEW.SaveToDisk(); this.RunPage.InvokeAppendLogText("完成输出CSV文件... 100%", LogLevelType.System, true); }
private void GetGongGaoListAllPagesToCsv(IListSheet listSheet) { string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); CsvWriter ew = this.GetCsvWriter(); Dictionary <string, string> announcementDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; string orgId = row["orgId"]; string pinyin = row["pinyin"]; string code = row["code"]; string zwjc = row["zwjc"]; string category = row["category"]; string stockExchange = row["stockExchange"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); TextReader tr = null; try { tr = new StreamReader(localFilePath, Encoding.UTF8); string js = tr.ReadToEnd(); JObject rootJo = JObject.Parse(js); JArray itemArrayJsons = rootJo.SelectToken("announcements") as JArray; for (int j = 0; j < itemArrayJsons.Count; j++) { JObject itemJson = itemArrayJsons[j] as JObject; string announcementId = itemJson.GetValue("announcementId").ToString().Trim(); string announcementTitle = CommonUtil.HtmlDecode(itemJson.GetValue("announcementTitle").ToString().Trim()); string announcementTimeStr = itemJson.GetValue("announcementTime").ToString().Trim(); string adjunctType = itemJson.GetValue("adjunctType").ToString().Trim(); string adjunctUrl = itemJson.GetValue("adjunctUrl").ToString().Trim(); DateTime announcementTime = (new DateTime(1970, 1, 1)).AddMilliseconds(long.Parse(announcementTimeStr)).ToLocalTime(); string outAnnouncementTimeStr = announcementTime.ToString("yyyy-MM-dd HH:mm:ss"); if (!announcementDic.ContainsKey(announcementId)) { announcementDic.Add(announcementId, null); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("fileUrl", "http://www.cninfo.com.cn/" + adjunctUrl); f2vs.Add("announcementId", announcementId); f2vs.Add("zwjc", zwjc); f2vs.Add("code", code); f2vs.Add("pinyin", pinyin); f2vs.Add("orgId", orgId); f2vs.Add("category", category); f2vs.Add("stockExchange", stockExchange); f2vs.Add("announcementTitle", announcementTitle); f2vs.Add("announcementTime", outAnnouncementTimeStr); f2vs.Add("adjunctType", adjunctType); ew.AddRow(f2vs); } } } catch (Exception ex) { throw ex; } finally { if (tr != null) { tr.Close(); tr.Dispose(); } } } } ew.SaveToDisk(); }
/// <summary> /// 期刊每期目录首页 /// </summary> /// <param name="listSheet"></param> private void GetAllPerioIndexPageUrls(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); int allListFileIndex = 1; CsvWriter ew = null; Dictionary <string, string> idDic = new Dictionary <string, string>(); int paperCount = 0; for (int i = 0; i < listSheet.RowCount; i++) { if (paperCount % 1000 == 0) { this.RunPage.InvokeAppendLogText("已处理到: fileIndex = " + allListFileIndex.ToString() + ", paperIndex = " + paperCount.ToString(), LogLevelType.System, true); } if (paperCount >= 500000) { if (ew != null) { ew.SaveToDisk(); } ew = this.GetAllPerioIndexPageCsvWriter(allListFileIndex); allListFileIndex++; paperCount = 0; } Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); try { string pageFileText = FileHelper.GetTextFromFile(localFilePath); JArray itemJsonArray = JObject.Parse(pageFileText).GetValue("pageRow") as JArray; if (itemJsonArray != null && itemJsonArray.Count > 0) { for (int j = 0; j < itemJsonArray.Count; j++) { JObject itemJson = itemJsonArray[j] as JObject; string id = itemJson.GetValue("id").ToString(); if (!idDic.ContainsKey(id)) { idDic.Add(id, null); Dictionary <string, string> f2vs = new Dictionary <string, string>(); this.GetAttributeValue(itemJson, "id", f2vs); this.GetAttributeValue(itemJson, "publish_year", f2vs); this.GetAttributeValue(itemJson, "fund_info02", f2vs); this.GetAttributeValue(itemJson, "page_range", f2vs); this.GetAttributeValue(itemJson, "keywords", f2vs); this.GetAttributeValue(itemJson, "auto_keys", f2vs); this.GetAttributeValue(itemJson, "page_cnt", f2vs); this.GetAttributeValue(itemJson, "doc_num", f2vs); this.GetAttributeValue(itemJson, "perio_id", f2vs); this.GetAttributeValue(itemJson, "language", f2vs); this.GetAttributeValue(itemJson, "refdoc_cnt", f2vs); this.GetAttributeValue(itemJson, "abstract_url", f2vs); this.GetAttributeValue(itemJson, "scholar_id", f2vs); this.GetAttributeValue(itemJson, "auto_classcode", f2vs); this.GetAttributeValue(itemJson, "authors_name", f2vs); this.GetAttributeValue(itemJson, "share_num", f2vs); this.GetAttributeValue(itemJson, "trans_column", f2vs); this.GetAttributeValue(itemJson, "is_fulltext", f2vs); this.GetAttributeValue(itemJson, "issue_num", f2vs); this.GetAttributeValue(itemJson, "pro_pub_date", f2vs); this.GetAttributeValue(itemJson, "hxkbj_pku", f2vs); this.GetAttributeValue(itemJson, "perio_title02", f2vs); this.GetAttributeValue(itemJson, "cite_num", f2vs); this.GetAttributeValue(itemJson, "unit_name", f2vs); this.GetAttributeValue(itemJson, "linkdoc_cnt", f2vs); this.GetAttributeValue(itemJson, "issn", f2vs); this.GetAttributeValue(itemJson, "unit_name02", f2vs); this.GetAttributeValue(itemJson, "data_state", f2vs); this.GetAttributeValue(itemJson, "random_id", f2vs); this.GetAttributeValue(itemJson, "cited_cnt", f2vs); this.GetAttributeValue(itemJson, "doi", f2vs); this.GetAttributeValue(itemJson, "fund_info", f2vs); this.GetAttributeValue(itemJson, "trans_authors", f2vs); this.GetAttributeValue(itemJson, "literature_code", f2vs); this.GetAttributeValue(itemJson, "data_sort", f2vs); this.GetAttributeValue(itemJson, "new_org", f2vs); this.GetAttributeValue(itemJson, "core_perio", f2vs); this.GetAttributeValue(itemJson, "publish_year02", f2vs); this.GetAttributeValue(itemJson, "auth_area", f2vs); this.GetAttributeValue(itemJson, "article_id", f2vs); this.GetAttributeValue(itemJson, "tag_num", f2vs); this.GetAttributeValue(itemJson, "abstract_reading_num", f2vs); this.GetAttributeValue(itemJson, "auto_classcode_level", f2vs); this.GetAttributeValue(itemJson, "first_authors", f2vs); this.GetAttributeValue(itemJson, "full_pubdate", f2vs); this.GetAttributeValue(itemJson, "hxkbj_istic", f2vs); this.GetAttributeValue(itemJson, "common_year", f2vs); this.GetAttributeValue(itemJson, "authors_unit", f2vs); this.GetAttributeValue(itemJson, "thirdparty_links_num", f2vs); this.GetAttributeValue(itemJson, "abst_webdate", f2vs); this.GetAttributeValue(itemJson, "article_seq", f2vs); this.GetAttributeValue(itemJson, "import_num", f2vs); this.GetAttributeValue(itemJson, "common_sort_time", f2vs); this.GetAttributeValue(itemJson, "issue_id", f2vs); this.GetAttributeValue(itemJson, "full_url", f2vs); this.GetAttributeValue(itemJson, "orig_pub_date", f2vs); this.GetAttributeValue(itemJson, "source_db", f2vs); this.GetAttributeValue(itemJson, "column_name", f2vs); this.GetAttributeValue(itemJson, "cn", f2vs); this.GetAttributeValue(itemJson, "collection_num", f2vs); this.GetAttributeValue(itemJson, "download_num", f2vs); this.GetAttributeValue(itemJson, "orig_classcode", f2vs); this.GetAttributeValue(itemJson, "service_model", f2vs); this.GetAttributeValue(itemJson, "first_publish", f2vs); this.GetAttributeValue(itemJson, "is_oa", f2vs); this.GetAttributeValue(itemJson, "subject_class_codes", f2vs); this.GetAttributeValue(itemJson, "fulltext_reading_num", f2vs); this.GetAttributeValue(itemJson, "note_num", f2vs); this.GetAttributeValue(itemJson, "updatetime", f2vs); this.GetAttributeValue(itemJson, "head_words", f2vs); this.GetAttributeValue(itemJson, "subject_classcode_level", f2vs); this.GetAttributeValue(itemJson, "trans_title", f2vs); this.GetAttributeValue(itemJson, "perio_title_en", f2vs); this.GetAttributeValue(itemJson, "title", f2vs); this.GetAttributeValue(itemJson, "summary", f2vs); this.GetAttributeValue(itemJson, "perio_title", f2vs); this.GetAttributeValue(itemJson, "class_type", f2vs); this.GetAttributeValue(itemJson, "doct_collect", f2vs); paperCount++; ew.AddRow(f2vs); } } } } catch (Exception ex) { this.RunPage.InvokeAppendLogText(ex.Message + ". detailUrl = " + detailUrl, LogLevelType.Error, true); throw ex; } } } ew.SaveToDisk(); }
private void GetBlockInfo(string exportDir, Dictionary <string, string> listRow, string pageSourceDir) { string detailUrl = listRow["detailPageUrl"]; string x = listRow["x"]; string y = listRow["y"]; string z = listRow["z"]; string time = listRow["time"]; string tempDir = Path.Combine(exportDir, "temp"); string blockInfoPath = Path.Combine(tempDir, x + "_" + y + "_" + z + "_" + time + ".csv"); if (!File.Exists(blockInfoPath)) { string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); Dictionary <string, int> blockInfoDic = new Dictionary <string, int>(); blockInfoDic.Add("x", 0); blockInfoDic.Add("y", 1); blockInfoDic.Add("z", 2); blockInfoDic.Add("xp", 3); blockInfoDic.Add("yp", 4); blockInfoDic.Add("v", 5); blockInfoDic.Add("time", 6); CsvWriter blockInfoCW = new CsvWriter(blockInfoPath, blockInfoDic); int blackSize = 16; Bitmap img = new Bitmap(localFilePath); List <Point> allPoints = new List <Point>(); Dictionary <Point, float> p2hs = new Dictionary <Point, float>(); for (int xx = 0; xx < img.Width; xx = xx + blackSize) { for (int yy = 0; yy < img.Width; yy = yy + blackSize) { float sumH = 0; int X = xx + blackSize / 2; int Y = yy + blackSize / 2; List <Point> sameHPoints = new List <Point>(); for (int i = 0; i < blackSize; i++) { if (xx + i < img.Width) { for (int j = 0; j < blackSize; j++) { if (yy + j < img.Height) { Color c = img.GetPixel(xx + i, yy + j); float h = 360 - c.GetHue(); if (h > 0 && h < 360) { sumH += h; } /* * if (h > maxH && h != 360) * { * maxH = h; * X = xx + i; * Y = yy + j; * sameHPoints.Clear(); * sameHPoints.Add(new Point(X, Y)); * } * else if (h == maxH && h != 360) * { * sameHPoints.Add(new Point(xx + i, yy + j)); * }*/ } } } } float avgH = sumH / (blackSize * blackSize); if (avgH < 360 && avgH > 0) { Point p = new Point(X, Y); allPoints.Add(p); p2hs.Add(p, avgH); } } } /* * List<Point> remainPoints = new List<Point>(); * while (allPoints.Count > 0) * { * Point maxHP = getMaxHPoint(allPoints, p2hs); * remainPoints.Add(maxHP); * allPoints.Remove(maxHP); * List<Point> deletePoints = new List<Point>(); * foreach (Point p in allPoints) * { * if ((maxHP.X - p.X) * (maxHP.X - p.X) + (maxHP.Y - p.Y) * (maxHP.Y - p.Y) < blackSize / 2 * blackSize / 2) * { * deletePoints.Add(p); * } * } * foreach (Point p in deletePoints) * { * allPoints.Remove(p); * } * }*/ foreach (Point p in allPoints) { float h = p2hs[p]; Dictionary <string, string> cityReport = new Dictionary <string, string>(); cityReport.Add("x", x); cityReport.Add("y", y); cityReport.Add("z", z); cityReport.Add("xp", p.X.ToString()); cityReport.Add("yp", p.Y.ToString()); cityReport.Add("v", h.ToString()); cityReport.Add("time", time); blockInfoCW.AddRow(cityReport); } blockInfoCW.SaveToDisk(); } }
public override bool AfterAllGrab(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("city", 0); resultColumnDic.Add("distrctName", 1); resultColumnDic.Add("shopName", 2); resultColumnDic.Add("shopCode", 3); resultColumnDic.Add("address", 4); resultColumnDic.Add("tel", 5); resultColumnDic.Add("shopType", 6); resultColumnDic.Add("commentNum", 7); resultColumnDic.Add("lat", 8); resultColumnDic.Add("lng", 9); resultColumnDic.Add("人均", 10); resultColumnDic.Add("口味", 11); resultColumnDic.Add("环境", 12); resultColumnDic.Add("服务", 13); string resultFilePath = Path.Combine(exportDir, "大众点评店铺信息.xlsx"); Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>(); resultColumnFormat.Add("reviewNum", "#,##0"); resultColumnFormat.Add("lat", "#,##0.000000"); resultColumnFormat.Add("lng", "#,##0.000000"); resultColumnFormat.Add("人均", "#,##0.00"); resultColumnFormat.Add("环境", "#,##0.0"); resultColumnFormat.Add("口味", "#,##0.0"); resultColumnFormat.Add("服务", "#,##0.0"); CsvWriter resultEW = new CsvWriter(resultFilePath, resultColumnDic); string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; Dictionary <string, string> shopDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { try { string url = row[detailPageUrlColumnName]; string city = row["city"]; string distrctName = row["rName"]; string shopName = row["shopName"]; string shopCode = row["shopCode"]; string shopType = row["gName"]; string commentNumStr = row["reviewNum"]; Nullable <int> commentNum = commentNumStr == null || commentNumStr.Length == 0 ? (Nullable <int>)null : int.Parse(row["reviewNum"]); Nullable <decimal> lat = null; Nullable <decimal> lng = null; string address = ""; string tel = ""; Nullable <decimal> renJun = null; Nullable <decimal> kouWei = null; Nullable <decimal> huanJing = null; Nullable <decimal> fuWu = null; HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); string pageText = pageHtmlDoc.DocumentNode.InnerHtml; int latNameBeginIndex = pageText.IndexOf("shopGlat:"); if (latNameBeginIndex > 0) { int latBeginIndex = pageText.IndexOf("\"", latNameBeginIndex); int latEndIndex = pageText.IndexOf("\"", latBeginIndex + 1); if (latEndIndex - latBeginIndex > 1) { decimal latValue = 0; if (decimal.TryParse(pageText.Substring(latBeginIndex + 1, latEndIndex - latBeginIndex - 1), out latValue)) { lat = latValue; } } } int lngNameBeginIndex = pageText.IndexOf("shopGlng:"); if (lngNameBeginIndex > 0) { int lngBeginIndex = pageText.IndexOf("\"", lngNameBeginIndex); int lngEndIndex = pageText.IndexOf("\"", lngBeginIndex + 1); if (lngEndIndex - lngBeginIndex > 1) { decimal lngValue = 0; if (decimal.TryParse(pageText.Substring(lngBeginIndex + 1, lngEndIndex - lngBeginIndex - 1), out lngValue)) { lng = lngValue; } } } /* * HtmlNode preMapScriptNode = pageHtmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"J_midas-4\"]"); * if (preMapScriptNode != null) * { * HtmlNode mapScriptNode = preMapScriptNode.PreviousSibling; * while (mapScriptNode != null && mapScriptNode.Name != "script") * { * mapScriptNode = mapScriptNode.PreviousSibling; * } * if (mapScriptNode != null) * { * string scriptString = mapScriptNode.InnerText; * int lngBeginIndex = scriptString.LastIndexOf("{lng:") + 5; * int lngEndIndex = scriptString.LastIndexOf(",lat:"); * int latBeginIndex = lngEndIndex + 5; * int latEndIndex = scriptString.LastIndexOf("});"); * lng = decimal.Parse(scriptString.Substring(lngBeginIndex, lngEndIndex - lngBeginIndex)); * lat = decimal.Parse(scriptString.Substring(latBeginIndex, latEndIndex - latBeginIndex)); * } * } * */ HtmlNode addressNode = pageHtmlDoc.DocumentNode.SelectSingleNode("//span[@itemprop=\"street-address\"]"); if (addressNode != null) { address = addressNode.Attributes["title"].Value; } HtmlNodeCollection allTelNodes = pageHtmlDoc.DocumentNode.SelectNodes("//span[@itemprop=\"tel\"]"); if (allTelNodes != null) { StringBuilder tels = new StringBuilder(); foreach (HtmlNode telNode in allTelNodes) { tels.Append((tels.Length == 0 ? "" : ",") + telNode.InnerText); } tel = tels.ToString(); } HtmlNodeCollection allBriefNodes = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"brief-info\"]/span"); foreach (HtmlNode briefNode in allBriefNodes) { string briefText = briefNode.InnerText; if (briefText.StartsWith("人均:")) { string briefValue = briefText.Substring(3, briefText.Length - 4).Trim(); renJun = briefValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(briefValue); } } HtmlNodeCollection allScoreNodes = pageHtmlDoc.DocumentNode.SelectNodes("//span[@id=\"comment_score\"]/span"); if (allScoreNodes != null) { foreach (HtmlNode scoreNode in allScoreNodes) { string scoreText = scoreNode.InnerText; if (scoreText.StartsWith("口味:")) { string scoreValue = scoreText.Substring(3).Trim(); kouWei = scoreValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(scoreValue); } else if (scoreText.StartsWith("环境:")) { string scoreValue = scoreText.Substring(3).Trim(); huanJing = scoreValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(scoreValue); } else if (scoreText.StartsWith("服务:")) { string scoreValue = scoreText.Substring(3).Trim(); fuWu = scoreValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(scoreValue); } } } Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("city", city); f2vs.Add("distrctName", distrctName); f2vs.Add("shopName", shopName); f2vs.Add("shopCode", shopCode); f2vs.Add("address", address); f2vs.Add("shopType", shopType); f2vs.Add("commentNum", commentNum.ToString()); f2vs.Add("lat", lat.ToString()); f2vs.Add("lng", lng.ToString()); f2vs.Add("人均", renJun.ToString()); f2vs.Add("tel", tel); f2vs.Add("口味", kouWei.ToString()); f2vs.Add("服务", fuWu.ToString()); f2vs.Add("环境", huanJing.ToString()); resultEW.AddRow(f2vs); } catch (Exception ex) { throw ex; } } } resultEW.SaveToDisk(); return(true); }
private void GetYearWordsMatrixCount(IListSheet listSheet) { try { string[] parameters = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); string allKeywordsFilePath = parameters[0]; string exportDirPath = parameters[2]; ExcelReader er = new ExcelReader(allKeywordsFilePath); int inputRowCount = er.GetRowCount(); List <string> keywordList = new List <string>(); Dictionary <int, Dictionary <string, List <string> > > yearSourceWordList = new Dictionary <int, Dictionary <string, List <string> > >(); for (int i = 0; i < inputRowCount; i++) { Dictionary <string, string> row = er.GetFieldValues(i); string source = row["source"]; int year = int.Parse(row["year"]); string keyword = row["keyword"]; if (!keywordList.Contains(keyword)) { keywordList.Add(keyword); } if (!yearSourceWordList.ContainsKey(year)) { yearSourceWordList.Add(year, new Dictionary <string, List <string> >()); } Dictionary <string, List <string> > sourceWordList = yearSourceWordList[year]; if (!sourceWordList.ContainsKey(source)) { sourceWordList.Add(source, new List <string>()); } List <string> wordList = sourceWordList[source]; if (!wordList.Contains(keyword)) { wordList.Add(keyword); } } Dictionary <string, Dictionary <string, int> > totalYearMartixDataDic = new Dictionary <string, Dictionary <string, int> >(); foreach (int year in yearSourceWordList.Keys) { Dictionary <string, Dictionary <string, int> > yearMatrixDataDic = new Dictionary <string, Dictionary <string, int> >(); Dictionary <string, List <string> > sourceWordList = yearSourceWordList[year]; foreach (string source in sourceWordList.Keys) { List <string> kwList = sourceWordList[source]; for (int i = 0; i < kwList.Count; i++) { string kw_i = kwList[i]; if (!yearMatrixDataDic.ContainsKey(kw_i)) { yearMatrixDataDic.Add(kw_i, new Dictionary <string, int>()); } Dictionary <string, int> iDic = yearMatrixDataDic[kw_i]; if (!iDic.ContainsKey(kw_i)) { iDic.Add(kw_i, 1); } else { iDic[kw_i] = iDic[kw_i] + 1; } /* * if (!totalYearMartixDataDic.ContainsKey(kw_i)) * { * totalYearMartixDataDic.Add(kw_i, new Dictionary<string, int>()); * } * Dictionary<string, int> iTotalDic = totalYearMartixDataDic[kw_i]; * if (!iTotalDic.ContainsKey(kw_i)) * { * iTotalDic.Add(kw_i, 1); * } * else * { * iTotalDic[kw_i] = iTotalDic[kw_i] + 1; * } * */ for (int j = 0; j < kwList.Count; j++) { string kw_j = kwList[j]; if (kw_i != kw_j) { if (!iDic.ContainsKey(kw_j)) { iDic.Add(kw_j, 1); } else { iDic[kw_j] = iDic[kw_j] + 1; } /* * if (!iTotalDic.ContainsKey(kw_j)) * { * iTotalDic.Add(kw_j, 1); * } * else * { * iTotalDic[kw_i] = iTotalDic[kw_j] + 1; * } */ } } } } CsvWriter resultWriter = this.GetMatrixCsvWriter(exportDirPath, year, keywordList); for (int i = 0; i < keywordList.Count; i++) { Dictionary <string, string> matrixRow = new Dictionary <string, string>(); string kw_i = keywordList[i]; matrixRow["keywordMatrix"] = kw_i; Dictionary <string, int> iMatrixDataDic = yearMatrixDataDic.ContainsKey(kw_i) ? yearMatrixDataDic[kw_i] : null; for (int j = 0; j < keywordList.Count; j++) { string kw_j = keywordList[j]; if (iMatrixDataDic == null) { matrixRow.Add(kw_j, "0"); } else { matrixRow.Add(kw_j, iMatrixDataDic.ContainsKey(kw_j) ? iMatrixDataDic[kw_j].ToString() : "0"); } } resultWriter.AddRow(matrixRow); } resultWriter.SaveToDisk(); foreach (string kw_i in yearMatrixDataDic.Keys) { if (!totalYearMartixDataDic.ContainsKey(kw_i)) { totalYearMartixDataDic.Add(kw_i, new Dictionary <string, int>()); } Dictionary <string, int> iTotalDataDic = totalYearMartixDataDic[kw_i]; Dictionary <string, int> iDataDic = yearMatrixDataDic[kw_i]; foreach (string kw_j in iDataDic.Keys) { if (!iTotalDataDic.ContainsKey(kw_j)) { iTotalDataDic.Add(kw_j, iDataDic[kw_j]); } else { iTotalDataDic[kw_j] = iTotalDataDic[kw_j] + iDataDic[kw_j]; } } } } CsvWriter totalRresultWriter = this.GetMatrixCsvWriter(exportDirPath, 0, keywordList); for (int i = 0; i < keywordList.Count; i++) { Dictionary <string, string> matrixRow = new Dictionary <string, string>(); string kw_i = keywordList[i]; matrixRow["keywordMatrix"] = kw_i; Dictionary <string, int> iMatrixDataDic = totalYearMartixDataDic.ContainsKey(kw_i) ? totalYearMartixDataDic[kw_i] : null; for (int j = 0; j < keywordList.Count; j++) { string kw_j = keywordList[j]; if (iMatrixDataDic == null) { matrixRow.Add(kw_j, "0"); } else { matrixRow.Add(kw_j, iMatrixDataDic.ContainsKey(kw_j) ? iMatrixDataDic[kw_j].ToString() : "0"); } } totalRresultWriter.AddRow(matrixRow); } totalRresultWriter.SaveToDisk(); } catch (Exception ex) { throw ex; } }
private bool GetAllPages(IListSheet listSheet) { CsvWriter mainCW = this.GetMainCsvWriter(); CsvWriter ztbCW = this.GetZtbCsvWriter(); CsvWriter sgtscCW = this.GetSgtscCsvWriter(); CsvWriter htbaCW = this.GetHtbaCsvWriter(); CsvWriter sgxkCW = this.GetSgxkCsvWriter(); CsvWriter jgysbaCW = this.GetJgysbaCsvWriter(); string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; Dictionary <string, string> projectDic = new Dictionary <string, string>(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailPageUrl = row[SysConfig.DetailPageUrlFieldName]; string detailPageName = row[SysConfig.DetailPageNameFieldName]; try { bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); #region 基础信息 string xmmc = ""; string xmbh = ""; string sjxmbh = ""; string szqh = ""; string jsdw = ""; string jsdwzzjgdm = ""; string xmfl = ""; string jsxz = ""; string gcyt = ""; string ztz = ""; string zmj = ""; string lxjb = ""; string lxwh = ""; HtmlNode xmmcNode = pageHtmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"user_info spmtop\"]"); if (xmmcNode == null) { throw new Exception("没有找到项目名称节点"); } else { xmmc = CommonUtil.HtmlDecode(xmmcNode.InnerText.Trim()).Trim(); } HtmlNodeCollection projectFieldNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"query_info_box \"]/div/div[@class=\"activeTinyTabContent\"]/dl/dd"); if (projectFieldNodeList != null) { for (int j = 0; j < projectFieldNodeList.Count; j++) { HtmlNode projectFieldNode = projectFieldNodeList[j]; string fieldText = projectFieldNode.InnerText.Trim(); int sIndex = fieldText.IndexOf(":"); string fieldName = CommonUtil.HtmlDecode(fieldText.Substring(0, sIndex)).Trim(); string fieldValue = CommonUtil.HtmlDecode(fieldText.Substring(sIndex + 1)).Trim(); switch (fieldName) { case "项目编号": xmbh = fieldValue; break; case "省级项目编号": sjxmbh = fieldValue; break; case "所在区划": szqh = fieldValue; break; case "建设单位": jsdw = fieldValue; break; case "建设单位组织机构代码(统一社会信用代码)": jsdwzzjgdm = fieldValue; break; case "项目分类": xmfl = fieldValue; break; case "建设性质": jsxz = fieldValue; break; case "工程用途": gcyt = fieldValue; break; case "总投资": ztz = fieldValue; break; case "总面积": zmj = fieldValue; break; case "立项级别": lxjb = fieldValue; break; case "立项文号": lxwh = fieldValue; break; } } } else { throw new Exception("无法获取项目基本信息属性值"); } Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("项目编号", xmbh); f2vs.Add("省级项目编号", sjxmbh); f2vs.Add("项目名称", xmmc); f2vs.Add("所在区划", szqh); f2vs.Add("建设单位", jsdw); f2vs.Add("建设单位组织机构代码(统一社会信用代码)", jsdwzzjgdm); f2vs.Add("项目分类", xmfl); f2vs.Add("建设性质", jsxz); f2vs.Add("工程用途", gcyt); f2vs.Add("总投资", ztz); f2vs.Add("总面积", zmj); f2vs.Add("立项级别", lxjb); f2vs.Add("立项文号", lxwh); mainCW.AddRow(f2vs); #endregion #region 招投标 HtmlNodeCollection ztbNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_ztb\"]/table/tbody/tr[@class=\"row\"]"); if (ztbNodeList != null) { foreach (HtmlNode ztbNode in ztbNodeList) { HtmlNodeCollection ztbFieldNodeList = ztbNode.SelectNodes("./td"); Dictionary <string, string> ztbF2vs = new Dictionary <string, string>(); ztbF2vs.Add("项目编码", xmbh); ztbF2vs.Add("招标类型", CommonUtil.HtmlDecode(ztbFieldNodeList[1].InnerText.Trim())); ztbF2vs.Add("招标方式", CommonUtil.HtmlDecode(ztbFieldNodeList[2].InnerText.Trim())); ztbF2vs.Add("中标单位名称", CommonUtil.HtmlDecode(ztbFieldNodeList[3].InnerText.Trim())); ztbF2vs.Add("中标日期", CommonUtil.HtmlDecode(ztbFieldNodeList[4].InnerText.Trim())); ztbF2vs.Add("中标金额(万元)", CommonUtil.HtmlDecode(ztbFieldNodeList[5].InnerText.Trim())); ztbF2vs.Add("中标通知书编号", CommonUtil.HtmlDecode(ztbFieldNodeList[6].InnerText.Trim())); ztbF2vs.Add("省级中标通知书编号", CommonUtil.HtmlDecode(ztbFieldNodeList[7].InnerText.Trim())); ztbCW.AddRow(ztbF2vs); } } #endregion #region 施工图审查 HtmlNodeCollection sgtscNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_sgtsc\"]/table/tbody/tr[@class=\"row\"]"); if (sgtscNodeList != null) { foreach (HtmlNode sgtscNode in sgtscNodeList) { HtmlNodeCollection sgtscFieldNodeList = sgtscNode.SelectNodes("./td"); Dictionary <string, string> sgtscF2vs = new Dictionary <string, string>(); sgtscF2vs.Add("项目编码", xmbh); sgtscF2vs.Add("施工图审查合格书编号", CommonUtil.HtmlDecode(sgtscFieldNodeList[1].InnerText.Trim())); sgtscF2vs.Add("省级施工图审查合格书编号", CommonUtil.HtmlDecode(sgtscFieldNodeList[2].InnerText.Trim())); sgtscF2vs.Add("勘察单位名称", CommonUtil.HtmlDecode(sgtscFieldNodeList[3].InnerText.Trim())); sgtscF2vs.Add("设计单位名称", CommonUtil.HtmlDecode(sgtscFieldNodeList[4].InnerText.Trim())); sgtscF2vs.Add("施工图审查机构名称", CommonUtil.HtmlDecode(sgtscFieldNodeList[5].InnerText.Trim())); sgtscF2vs.Add("审查完成日期", CommonUtil.HtmlDecode(sgtscFieldNodeList[6].InnerText.Trim())); sgtscCW.AddRow(sgtscF2vs); } } #endregion #region 合同备案 HtmlNodeCollection htbaNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_htba\"]/table/tbody/tr[@class=\"row\"]"); if (htbaNodeList != null) { foreach (HtmlNode htbaNode in htbaNodeList) { HtmlNodeCollection htbaFieldNodeList = htbaNode.SelectNodes("./td"); Dictionary <string, string> htbaF2vs = new Dictionary <string, string>(); htbaF2vs.Add("项目编码", xmbh); htbaF2vs.Add("合同类别", CommonUtil.HtmlDecode(htbaFieldNodeList[1].InnerText.Trim())); htbaF2vs.Add("合同备案编号", CommonUtil.HtmlDecode(htbaFieldNodeList[2].InnerText.Trim())); htbaF2vs.Add("省级合同备案编号", CommonUtil.HtmlDecode(htbaFieldNodeList[3].InnerText.Trim())); htbaF2vs.Add("合同金额(万元)", CommonUtil.HtmlDecode(htbaFieldNodeList[4].InnerText.Trim())); htbaF2vs.Add("合同签订日期", CommonUtil.HtmlDecode(htbaFieldNodeList[5].InnerText.Trim())); htbaCW.AddRow(htbaF2vs); } } #endregion #region 施工许可 HtmlNodeCollection sgxkNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_sgxk\"]/table/tbody/tr[@class=\"row\"]"); if (sgxkNodeList != null) { foreach (HtmlNode sgxkNode in sgxkNodeList) { HtmlNodeCollection sgxkFieldNodeList = sgxkNode.SelectNodes("./td"); Dictionary <string, string> sgxkF2vs = new Dictionary <string, string>(); sgxkF2vs.Add("项目编码", xmbh); sgxkF2vs.Add("施工许可证编号", CommonUtil.HtmlDecode(sgxkFieldNodeList[1].InnerText.Trim())); sgxkF2vs.Add("省级施工许可证编号", CommonUtil.HtmlDecode(sgxkFieldNodeList[2].InnerText.Trim())); sgxkF2vs.Add("合同金额(万元)", CommonUtil.HtmlDecode(sgxkFieldNodeList[3].InnerText.Trim())); sgxkF2vs.Add("面积(平方米)", CommonUtil.HtmlDecode(sgxkFieldNodeList[4].InnerText.Trim())); sgxkF2vs.Add("发证日期", CommonUtil.HtmlDecode(sgxkFieldNodeList[5].InnerText.Trim())); sgxkCW.AddRow(sgxkF2vs); } } #endregion #region 竣工验收备案 HtmlNodeCollection jgysbaNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_jgysba\"]/table/tbody/tr[@class=\"row\"]"); if (jgysbaNodeList != null) { foreach (HtmlNode jgysbaNode in jgysbaNodeList) { HtmlNodeCollection jgysbaFieldNodeList = jgysbaNode.SelectNodes("./td"); Dictionary <string, string> jgysbaF2vs = new Dictionary <string, string>(); jgysbaF2vs.Add("项目编码", xmbh); jgysbaF2vs.Add("竣工备案编号", CommonUtil.HtmlDecode(jgysbaFieldNodeList[1].InnerText.Trim())); jgysbaF2vs.Add("省级竣工备案编号", CommonUtil.HtmlDecode(jgysbaFieldNodeList[2].InnerText.Trim())); jgysbaF2vs.Add("实际造价(万元)", CommonUtil.HtmlDecode(jgysbaFieldNodeList[3].InnerText.Trim())); jgysbaF2vs.Add("实际面积(平方米)", CommonUtil.HtmlDecode(jgysbaFieldNodeList[4].InnerText.Trim())); jgysbaF2vs.Add("实际开工日期", CommonUtil.HtmlDecode(jgysbaFieldNodeList[5].InnerText.Trim())); jgysbaF2vs.Add("实际竣工验收日期", CommonUtil.HtmlDecode(jgysbaFieldNodeList[6].InnerText.Trim())); jgysbaCW.AddRow(jgysbaF2vs); } } #endregion } } catch (Exception ex) { //throw ex; string dir = this.RunPage.GetDetailSourceFileDir(); string toDir = Path.Combine(Path.GetDirectoryName(dir), "deleted"); string fileUrl = this.RunPage.GetFilePath(detailPageUrl, dir); string toFileUrl = this.RunPage.GetFilePath(detailPageUrl, toDir); File.Move(fileUrl, toFileUrl); this.RunPage.InvokeAppendLogText("文件不完整,删除", LogLevelType.Error, true); } } mainCW.SaveToDisk(); ztbCW.SaveToDisk(); sgtscCW.SaveToDisk(); htbaCW.SaveToDisk(); sgxkCW.SaveToDisk(); jgysbaCW.SaveToDisk(); return(true); }
private void GetPTsMatrix(IListSheet listSheet) { string[] parameters = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); string sourceFilePath = parameters[0]; string destFilePath = parameters[1]; ExcelReader er = new ExcelReader(sourceFilePath); int sourceRowCount = er.GetRowCount(); Dictionary <string, int> allPTCountDic = new Dictionary <string, int>(); List <string> allPTList = new List <string>(); for (int i = 0; i < sourceRowCount; i++) { Dictionary <string, string> sourceRow = er.GetFieldValues(i); string[] itemPTs = sourceRow["pts"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); foreach (string itemPT in itemPTs) { if (allPTCountDic.ContainsKey(itemPT)) { allPTCountDic[itemPT] = allPTCountDic[itemPT] + 1; } else { allPTList.Add(itemPT); allPTCountDic.Add(itemPT, 1); } } } //如果出现少于等于2次,那么忽略此属性 int ignoreNum = 10; List <string> ptList = new List <string>(); Dictionary <string, bool> ptListDic = new Dictionary <string, bool>(); foreach (string itemPT in allPTList) { if (allPTCountDic[itemPT] > ignoreNum) { ptList.Add(itemPT); ptListDic.Add(itemPT, true); } } int maxTime = 1; Dictionary <string, Dictionary <string, int> > ptToPTDic = new Dictionary <string, Dictionary <string, int> >(); for (int i = 0; i < sourceRowCount; i++) { Dictionary <string, string> sourceRow = er.GetFieldValues(i); string[] itemPTs = sourceRow["pts"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); foreach (string fromItemPT in itemPTs) { if (ptListDic.ContainsKey(fromItemPT)) { if (!ptToPTDic.ContainsKey(fromItemPT)) { ptToPTDic.Add(fromItemPT, new Dictionary <string, int>()); } Dictionary <string, int> ptDic = ptToPTDic[fromItemPT]; if (!ptDic.ContainsKey(fromItemPT)) { ptDic.Add(fromItemPT, 1); } else { ptDic[fromItemPT] = ptDic[fromItemPT] + 1; } foreach (string toItemPT in itemPTs) { if (ptListDic.ContainsKey(toItemPT)) { if (fromItemPT != toItemPT) { if (!ptDic.ContainsKey(toItemPT)) { ptDic.Add(toItemPT, 1); } else { int tmpValue = ptDic[toItemPT] + 1; ptDic[toItemPT] = tmpValue; if (tmpValue > maxTime) { maxTime = tmpValue; } } } } } } } } Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("ptToPT", 0); for (int i = 0; i < ptList.Count; i++) { resultColumnDic.Add(ptList[i], i + 1); } CsvWriter ptMatrixCW = new CsvWriter(destFilePath, resultColumnDic); foreach (string fromPT in ptList) { Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("ptToPT", fromPT); Dictionary <string, int> propertyDic = ptToPTDic.ContainsKey(fromPT) ? ptToPTDic[fromPT] : null; foreach (string toPT in ptList) { double value = fromPT == toPT ? 0 : (propertyDic == null || !propertyDic.ContainsKey(toPT) || propertyDic[toPT] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)propertyDic[toPT])); resultRow.Add(toPT, value.ToString()); } ptMatrixCW.AddRow(resultRow); } ptMatrixCW.SaveToDisk(); string allPTNameFilePath = destFilePath + "_AllPTName.xlsx"; Dictionary <string, int> allPTNameColumnDic = new Dictionary <string, int>(); allPTNameColumnDic.Add("name", 0); allPTNameColumnDic.Add("count", 1); Dictionary <string, string> allPTNameColumnFormats = new Dictionary <string, string>(); allPTNameColumnFormats.Add("count", "#0"); ExcelWriter allPTNameEW = new ExcelWriter(allPTNameFilePath, "List", allPTNameColumnDic, allPTNameColumnFormats); for (int i = 0; i < allPTList.Count; i++) { string fromPT = allPTList[i]; Dictionary <string, object> resultRow = new Dictionary <string, object>(); resultRow.Add("name", fromPT); resultRow.Add("count", allPTCountDic[fromPT]); allPTNameEW.AddRow(resultRow); } allPTNameEW.SaveToDisk(); string ptNameFilePath = destFilePath + "_PTName.xlsx"; Dictionary <string, int> ptNameColumnDic = new Dictionary <string, int>(); ptNameColumnDic.Add("name", 0); ExcelWriter ptNameEW = new ExcelWriter(ptNameFilePath, "List", ptNameColumnDic); for (int i = 0; i < ptList.Count; i++) { string fromPT = ptList[i]; Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("name", fromPT); ptNameEW.AddRow(resultRow); } ptNameEW.SaveToDisk(); string ptArrayFilePath = destFilePath + "_Array.txt"; StringBuilder ptArrayStringBuilder = new StringBuilder(); ptArrayStringBuilder.Append("arr = ["); for (int i = 0; i < ptList.Count; i++) { string fromPT = ptList[i]; ptArrayStringBuilder.Append((i == 0 ? "" : ", \r\n") + "["); Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("ptToPT", fromPT); Dictionary <string, int> ptDic = ptToPTDic.ContainsKey(fromPT) ? ptToPTDic[fromPT] : null; for (int j = 0; j < ptListDic.Count; j++) { string toPT = ptList[j]; double value = fromPT == toPT ? 0 : (ptDic == null || !ptDic.ContainsKey(toPT) || ptDic[toPT] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)ptDic[toPT])); resultRow.Add(toPT, value.ToString()); ptArrayStringBuilder.Append((j == 0 ? "" : ", ") + value.ToString()); } ptMatrixCW.AddRow(resultRow); ptArrayStringBuilder.Append("]"); } ptArrayStringBuilder.Append("]"); FileHelper.SaveTextToFile(ptArrayStringBuilder.ToString(), ptArrayFilePath); }
private bool GetHouseListInfos(IListSheet listSheet) { string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName; string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, string> houseBuildingDic = new Dictionary <string, string>(); CsvWriter resultEW = this.CreateResultCsvWriter(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string pageUrl = listSheet.PageUrlList[i]; string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir); string fileText = FileHelper.GetTextFromFile(localFilePath); JObject rootJo = JObject.Parse(fileText); string houseStatusNo = rootJo["housestatus"].ToString(); string fid = rootJo["fid"].ToString(); string unitArea = rootJo["unitarea"].ToString(); string apportioArea = rootJo["apportioarea"].ToString(); string usedTypeNo = rootJo["usedtypeno"].ToString(); string houseArea = rootJo["housearea"].ToString(); string houseStatus = ""; switch (houseStatusNo) { case "15701": houseStatus = "可售"; break; case "15702": houseStatus = "已预订"; break; case "15703": houseStatus = "已备案"; break; case "15704": houseStatus = "已签约"; break; case "15705": houseStatus = "可租"; break; case "15707": houseStatus = "不可租售"; break; case "15709": houseStatus = "已预订"; break; case "15710": houseStatus = "查封"; break; case "15711": houseStatus = "冻结"; break; default: houseStatus = "可售"; break; } Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("projectId", row["projectId"]); f2vs.Add("项目名称", row["项目名称"]); f2vs.Add("buildingId", row["buildingId"]); f2vs.Add("楼名称", row["楼名称"]); f2vs.Add("是否住宅房屋", row["是否住宅房屋"]); f2vs.Add("单元号", row["单元号"]); f2vs.Add("顺序号", row["顺序号"]); f2vs.Add("楼层", row["楼层"]); f2vs.Add("houseId", row["houseId"]); f2vs.Add("houseName", row["houseName"]); f2vs.Add("房屋面积", houseArea); f2vs.Add("套内面积", unitArea); f2vs.Add("公摊面积", apportioArea); f2vs.Add("房屋用途", usedTypeNo); f2vs.Add("销售状态编码", houseStatusNo); f2vs.Add("销售状态", houseStatus); resultEW.AddRow(f2vs); } } resultEW.SaveToDisk(); return(true); }
private void GetCities(IListSheet listSheet) { string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); CsvWriter ew = this.GetCsvWriter(); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; string detailName = row["detailPageName"]; string year = row["year"]; string parentCode = row["code"]; string parentName = row["name"]; //添加父节点到下一级文件 Dictionary <string, string> parentF2vs = new Dictionary <string, string>(); parentF2vs.Add("detailPageUrl", detailUrl); parentF2vs.Add("detailPageName", detailName); parentF2vs.Add("year", year); parentF2vs.Add("code", parentCode); parentF2vs.Add("name", parentName); parentF2vs.Add("giveUpGrab", "Y"); ew.AddRow(parentF2vs); bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { Uri uri = new Uri(detailUrl); string queryString = uri.Query; string baseUrl = detailUrl.Substring(0, detailUrl.Length - queryString.Length); baseUrl = baseUrl.Substring(0, baseUrl.Length - uri.Segments[uri.Segments.Length - 1].Length); HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i, Encoding.GetEncoding("gb2312")); try { HtmlNodeCollection cityNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"citytr\"]"); if (cityNodeList != null) { for (int j = 0; j < cityNodeList.Count; j++) { HtmlNode cityNode = cityNodeList[j]; HtmlNodeCollection cityFieldNodeList = cityNode.SelectNodes("./td"); HtmlNode cityCodeNode = cityFieldNodeList[0]; HtmlNode cityNameNode = cityFieldNodeList[1]; string cityCode = cityCodeNode.InnerText.Trim(); string cityName = cityNameNode.InnerText.Trim(); HtmlNode linkNode = cityCodeNode.SelectSingleNode("./a"); string hrefValue = ""; if (linkNode != null) { hrefValue = linkNode.GetAttributeValue("href", ""); } Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", baseUrl + hrefValue); f2vs.Add("detailPageName", year + "_" + cityCode); f2vs.Add("year", year); f2vs.Add("code", cityCode); f2vs.Add("name", cityName); f2vs.Add("giveUpGrab", hrefValue.Length == 0 ? "Y" : ""); ew.AddRow(f2vs); } } HtmlNodeCollection townNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"towntr\"]"); if (townNodeList != null) { for (int j = 0; j < townNodeList.Count; j++) { HtmlNode townNode = townNodeList[j]; HtmlNodeCollection townFieldNodeList = townNode.SelectNodes("./td"); HtmlNode townCodeNode = townFieldNodeList[0]; HtmlNode townNameNode = townFieldNodeList[1]; string townCode = townCodeNode.InnerText.Trim(); string townName = townNameNode.InnerText.Trim(); HtmlNode linkNode = townCodeNode.SelectSingleNode("./a"); string hrefValue = ""; if (linkNode != null) { hrefValue = linkNode.GetAttributeValue("href", ""); } Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", baseUrl + hrefValue); f2vs.Add("detailPageName", year + "_" + townCode); f2vs.Add("year", year); f2vs.Add("code", townCode); f2vs.Add("name", townName); f2vs.Add("giveUpGrab", hrefValue.Length == 0 ? "Y" : ""); ew.AddRow(f2vs); } } HtmlNodeCollection countyNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"countytr\"]"); if (countyNodeList != null) { for (int j = 0; j < countyNodeList.Count; j++) { HtmlNode countyNode = countyNodeList[j]; HtmlNodeCollection countyFieldNodeList = countyNode.SelectNodes("./td"); HtmlNode countyCodeNode = countyFieldNodeList[0]; HtmlNode countyNameNode = countyFieldNodeList[1]; string countyCode = countyCodeNode.InnerText.Trim(); string countyName = countyNameNode.InnerText.Trim(); HtmlNode linkNode = countyCodeNode.SelectSingleNode("./a"); string hrefValue = ""; if (linkNode != null) { hrefValue = linkNode.GetAttributeValue("href", ""); } Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", baseUrl + hrefValue); f2vs.Add("detailPageName", year + "_" + countyCode); f2vs.Add("year", year); f2vs.Add("code", countyCode); f2vs.Add("name", countyName); f2vs.Add("giveUpGrab", hrefValue.Length == 0 ? "Y" : ""); ew.AddRow(f2vs); } } HtmlNodeCollection villageNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"villagetr\"]"); if (villageNodeList != null) { for (int j = 0; j < villageNodeList.Count; j++) { HtmlNode villageNode = villageNodeList[j]; HtmlNodeCollection villageFieldNodeList = villageNode.SelectNodes("./td"); HtmlNode villageCodeNode = villageFieldNodeList[0]; HtmlNode villageTypeNode = villageFieldNodeList[1]; HtmlNode villageNameNode = villageFieldNodeList[2]; string villageCode = villageCodeNode.InnerText.Trim(); string villageType = villageTypeNode.InnerText.Trim(); string villageName = villageNameNode.InnerText.Trim(); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("detailPageUrl", year + "_" + villageCode); f2vs.Add("detailPageName", year + "_" + villageCode); f2vs.Add("year", year); f2vs.Add("code", villageCode); f2vs.Add("name", villageName); f2vs.Add("giveUpGrab", "Y"); f2vs.Add("城乡分类代码", villageType); ew.AddRow(f2vs); } } } catch (Exception ex) { throw ex; } } } ew.SaveToDisk(); }
private void GetTagsMatrix(IListSheet listSheet) { string[] parameters = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); string sourceFilePath = parameters[0]; string destFilePath = parameters[1]; ExcelReader er = new ExcelReader(sourceFilePath); int sourceRowCount = er.GetRowCount(); List <string> tagList = new List <string>(); for (int i = 0; i < sourceRowCount; i++) { Dictionary <string, string> sourceRow = er.GetFieldValues(i); string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); foreach (string itemTag in itemTags) { if (!tagList.Contains(itemTag)) { tagList.Add(itemTag); } } } int maxTime = 1; Dictionary <string, Dictionary <string, int> > tagToTagDic = new Dictionary <string, Dictionary <string, int> >(); for (int i = 0; i < sourceRowCount; i++) { Dictionary <string, string> sourceRow = er.GetFieldValues(i); string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); foreach (string fromTag in itemTags) { if (!tagToTagDic.ContainsKey(fromTag)) { tagToTagDic.Add(fromTag, new Dictionary <string, int>()); } Dictionary <string, int> tagDic = tagToTagDic[fromTag]; if (!tagDic.ContainsKey(fromTag)) { tagDic.Add(fromTag, 1); } else { tagDic[fromTag] = tagDic[fromTag] + 1; } foreach (string toTag in itemTags) { if (fromTag != toTag) { if (!tagDic.ContainsKey(toTag)) { tagDic.Add(toTag, 1); } else { int tmpValue = tagDic[toTag] + 1; tagDic[toTag] = tmpValue; if (tmpValue > maxTime) { maxTime = tmpValue; } } } } } } Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("tagToTag", 0); for (int i = 0; i < tagList.Count; i++) { resultColumnDic.Add(tagList[i], i + 1); } CsvWriter tagMatrixCW = new CsvWriter(destFilePath, resultColumnDic); foreach (string fromTag in tagList) { Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("tagToTag", fromTag); Dictionary <string, int> tagDic = tagToTagDic.ContainsKey(fromTag) ? tagToTagDic[fromTag] : null; foreach (string toTag in tagList) { double value = fromTag == toTag ? 0 : (tagDic == null || !tagDic.ContainsKey(toTag) || tagDic[toTag] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)tagDic[toTag])); resultRow.Add(toTag, value.ToString()); } tagMatrixCW.AddRow(resultRow); } tagMatrixCW.SaveToDisk(); string tagNameFilePath = destFilePath + "_TagName.xlsx"; Dictionary <string, int> tagNameColumnDic = new Dictionary <string, int>(); tagNameColumnDic.Add("name", 0); ExcelWriter tagNameEW = new ExcelWriter(tagNameFilePath, "List", tagNameColumnDic); for (int i = 0; i < tagList.Count; i++) { string fromTag = tagList[i]; Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("name", fromTag); tagNameEW.AddRow(resultRow); } tagNameEW.SaveToDisk(); string tagArrayFilePath = destFilePath + "_Array.txt"; StringBuilder tagArrayStringBuilder = new StringBuilder(); tagArrayStringBuilder.Append("arr = ["); for (int i = 0; i < tagList.Count; i++) { string fromTag = tagList[i]; tagArrayStringBuilder.Append((i == 0 ? "" : ", \r\n") + "["); Dictionary <string, string> resultRow = new Dictionary <string, string>(); resultRow.Add("tagToTag", fromTag); Dictionary <string, int> tagDic = tagToTagDic.ContainsKey(fromTag) ? tagToTagDic[fromTag] : null; for (int j = 0; j < tagList.Count; j++) { string toTag = tagList[j]; double value = fromTag == toTag ? 0 : (tagDic == null || !tagDic.ContainsKey(toTag) || tagDic[toTag] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)tagDic[toTag])); resultRow.Add(toTag, value.ToString()); tagArrayStringBuilder.Append((j == 0 ? "" : ", ") + value.ToString()); } tagMatrixCW.AddRow(resultRow); tagArrayStringBuilder.Append("]"); } tagArrayStringBuilder.Append("]"); FileHelper.SaveTextToFile(tagArrayStringBuilder.ToString(), tagArrayFilePath); }
private void GetCategoryMenuMaps(ExcelReader cityEr, string city, Dictionary <string, Dictionary <string, string> > allShopDic) { CsvWriter cw = this.CreateCategoryMenuMapsFileWriter(city); string sourceDir = this.RunPage.GetDetailSourceFileDir(); int rowCount = cityEr.GetRowCount(); for (int i = 0; i < rowCount; i++) { Dictionary <string, string> cityShopRow = cityEr.GetFieldValues(i); string detailPageUrl = cityShopRow[SysConfig.DetailPageUrlFieldName]; if (allShopDic.ContainsKey(detailPageUrl)) { Dictionary <string, string> listRow = allShopDic[detailPageUrl]; bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string filePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir); string jsonText = FileHelper.GetTextFromFile(filePath); try { JObject rootJo = JObject.Parse(jsonText); JObject menuJo = rootJo.GetValue("menu") as JObject; bool gotMenu = false; if (menuJo != null) { string bodyJo = menuJo.GetValue("body").ToString(); if (bodyJo != null && bodyJo.Length > 0) { JArray categoryArray = JArray.Parse(bodyJo); if (categoryArray.Count > 0) { gotMenu = true; for (int j = 0; j < categoryArray.Count; j++) { JObject categoryJo = categoryArray[j] as JObject; JArray foodArray = categoryJo.GetValue("foods") as JArray; string categoryId = categoryJo.GetValue("id") == null ? "" : categoryJo.GetValue("id").ToString(); string categoryName = categoryJo.GetValue("name") == null ? "" : categoryJo.GetValue("name").ToString(); string categoryDescription = categoryJo.GetValue("description") == null ? "" : categoryJo.GetValue("description").ToString(); if (foodArray != null) { for (int k = 0; k < foodArray.Count; k++) { JObject foodJo = foodArray[k] as JObject; string foodId = foodJo.GetValue("item_id") == null ? "" : foodJo.GetValue("item_id").ToString(); string foodName = foodJo.GetValue("name") == null ? "" : foodJo.GetValue("name").ToString(); string rating = foodJo.GetValue("rating") == null ? "" : foodJo.GetValue("rating").ToString(); string monthSales = foodJo.GetValue("month_sales") == null ? "" : foodJo.GetValue("month_sales").ToString(); string ratingCount = foodJo.GetValue("rating_count") == null ? "" : foodJo.GetValue("rating_count").ToString(); string statisfyCount = foodJo.GetValue("statisfy_count") == null ? "" : foodJo.GetValue("statisfy_count").ToString(); string statisfyRate = foodJo.GetValue("statisfy_rate") == null ? "" : foodJo.GetValue("statisfy_rate").ToString(); string minPurchase = foodJo.GetValue("min_purchase") == null ? "" : foodJo.GetValue("min_purchase").ToString(); Dictionary <string, string> categoryFoodRow = new Dictionary <string, string>(); categoryFoodRow.Add("id", listRow["id"]); categoryFoodRow.Add("name", listRow["name"]); categoryFoodRow.Add("address", listRow["address"]); categoryFoodRow.Add("description", listRow["description"]); categoryFoodRow.Add("latitude", listRow["latitude"]); categoryFoodRow.Add("longitude", listRow["longitude"]); categoryFoodRow.Add("phone", listRow["phone"]); categoryFoodRow.Add("promotion_info", listRow["promotion_info"]); categoryFoodRow.Add("categoryId", categoryId); categoryFoodRow.Add("categoryName", categoryName); categoryFoodRow.Add("categoryDescription", categoryDescription); categoryFoodRow.Add("foodId", foodId); categoryFoodRow.Add("foodName", foodName); categoryFoodRow.Add("rating", rating); categoryFoodRow.Add("monthSales", monthSales); categoryFoodRow.Add("ratingCount", ratingCount); categoryFoodRow.Add("statisfyCount", statisfyCount); categoryFoodRow.Add("statisfyRate", statisfyRate); categoryFoodRow.Add("minPurchase", minPurchase); cw.AddRow(categoryFoodRow); } } } } } } /* * if (!gotMenu) * { * this.RunPage.InvokeAppendLogText("(" + (i + 1).ToString() + "/" + rowCount.ToString() + ")删除文件 " + filePath, LogLevelType.System, true); * File.Delete(filePath); * }*/ } catch (Exception ex) { this.RunPage.InvokeAppendLogText(ex.Message + ". FilePath = " + filePath, LogLevelType.System, true); } } } } cw.SaveToDisk(); }
public void CsvWriterReaderAccuracyTestShouldHaveAccurateData() { File.Delete("Somefile.csv"); var originalDataList = new List <TestData>(); var data1 = new TestData { Region = "Ealing,\n London", Sales = 10000, DateOpened = new DateTime(2012, 04, 23) }; originalDataList.Add(data1); var data2 = new TestData { Region = "\"\"Glasgow in Scotland\"\"", Sales = 50000, DateOpened = new DateTime(2012, 4, 1, 15, 31, 0) }; originalDataList.Add(data2); var data3 = new TestData { Region = "USA 'Gods own country'", Sales = 40000, DateOpened = new DateTime(2011, 12, 29, 9, 30, 0) }; originalDataList.Add(data3); var exportData = new CsvWriter(); exportData.AddRow(); exportData["Region"] = data1.Region; exportData["Sales"] = Convert.ToString(data1.Sales); exportData["Date Opened"] = Convert.ToString(data1.DateOpened); exportData.AddRow(); exportData["Region"] = data2.Region; exportData["Sales"] = Convert.ToString(data2.Sales); exportData["Date Opened"] = Convert.ToString(data2.DateOpened); exportData.AddRow(); exportData["Region"] = data3.Region; exportData["Sales"] = Convert.ToString(data3.Sales); exportData["Date Opened"] = Convert.ToString(data3.DateOpened); exportData.ExportToFile("Somefile.csv"); bool readingHeader = true; using (var reader = new CsvFileReader("Somefile.csv")) { var row = new CsvRow(); var newDataList = new List <TestData>(); while (reader.ReadRow(row)) { var headersList = new List <string>(); if (readingHeader) { headersList.AddRange(row); headersList.ToArray(); readingHeader = false; } else { var outData = new TestData { Region = row[0], Sales = Convert.ToInt32(row[1]), DateOpened = Convert.ToDateTime(row[2]) }; newDataList.Add(outData); } } var resultData = from newData in newDataList join originalData in originalDataList on newData.Region equals originalData.Region select new { NewData = newData, OriginalData = originalData }; foreach (var data in resultData) { Assert.IsTrue( data.NewData.Sales == data.OriginalData.Sales && data.NewData.Region == data.OriginalData.Region && data.NewData.DateOpened == data.OriginalData.DateOpened); } } }