Example #1
0
        public string Execute()
        {
            using var ws = new ExcelWriter();

            Helper.ApplyDefaultStyling(ws, "Transactions", $"Charges for Sample Place");

            ws.AddRow($"Since: {DateTime.Today.AddMonths(-1):yyyy-MM-dd}");

            ws.AddRow();

            var headerRow = ws.AddHeaderRow()
                            .AddHeader("Customer Id", 12)
                            .AddHeader("First name", 30)
                            .AddHeader("Last name", 30)
                            .AddHeader("DOB", 30)
                            .AddHeader("Trans. Date", 15)
                            .AddHeader("Description", 90)
                            .AddHeader("Amount", 15);

            var row = ws.AddRow()
                      .Add(123456, "@")
                      .Add("Firstname", "@")
                      .Add("Lastname", "@")
                      .Add(new DateTime(1976, 8, 22), Helper.FormatDate, style: s => s.HorizontalAlignment = ExcelHorizontalAlignment.Left)
                      .Add(DateTime.Today, Helper.FormatDate, style: s => s.HorizontalAlignment            = ExcelHorizontalAlignment.Left)
                      .Add("Sample Charge", "@")
                      .Add(123.45, Helper.FormatMoney);

            Helper.ApplyDefaultReportSettings(ws, DateTime.Now, eOrientation.Landscape);

            string filename = ws.SaveCloseAndGetFileName();

            return(filename);
        }
        private void GetWordPageUrls(IListSheet listSheet)
        {
            string      sourceDir             = this.RunPage.GetDetailSourceFileDir();
            ExcelWriter resultEW              = this.CreateResultWriter();
            Dictionary <string, bool> pageDic = new Dictionary <string, bool>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> listRow = listSheet.GetRow(i);

                bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]);
                if (giveUp)
                {
                    string pageUrl = listRow["detailPageUrl"];
                    if (!pageDic.ContainsKey(pageUrl))
                    {
                        pageDic.Add(pageUrl, true);
                        Dictionary <string, string> resultRow = new Dictionary <string, string>();
                        resultRow.Add("detailPageUrl", listRow["detailPageUrl"]);
                        resultRow.Add("detailPageName", listRow["detailPageName"]);
                        resultRow.Add("name", listRow["name"]);
                        resultRow.Add("pageType", listRow["pageType"]);
                        resultEW.AddRow(resultRow);
                    }
                }
                else
                {
                    HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                    HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//ul[@class=\"ulLi120 fsc16\"]/li/a");
                    foreach (HtmlNode linkNode in linkNodes)
                    {
                        string linkUrl     = linkNode.GetAttributeValue("href", "");
                        string fullLinkUrl = "http://www.yitang.org" + linkUrl;

                        if (!pageDic.ContainsKey(fullLinkUrl))
                        {
                            string linkName = CommonUtil.HtmlDecode(linkNode.InnerText).Trim();

                            Dictionary <string, string> resultRow = new Dictionary <string, string>();
                            resultRow.Add("detailPageUrl", fullLinkUrl);
                            resultRow.Add("detailPageName", linkName + "_word");
                            resultRow.Add("giveUpGrab", "N");
                            resultRow.Add("name", linkName);
                            resultRow.Add("pageType", "word");
                            resultEW.AddRow(resultRow);
                        }
                    }
                }
            }

            resultEW.SaveToDisk();
        }
Example #3
0
        private void GetNextPageUrls(IListSheet listSheet)
        {
            ExcelWriter resultEw             = this.CreateNextPageUrlExcelWriter();
            Dictionary <string, bool> urlDic = new Dictionary <string, bool>();
            int rowCount = listSheet.RowCount;

            for (int i = 0; i < rowCount; i++)
            {
                Dictionary <string, string> listRow = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string pageUrl = listRow[SysConfig.DetailPageUrlFieldName];

                    if (!urlDic.ContainsKey(pageUrl))
                    {
                        Dictionary <string, string> oldRow = new Dictionary <string, string>();
                        oldRow.Add(SysConfig.DetailPageUrlFieldName, pageUrl);
                        oldRow.Add(SysConfig.DetailPageNameFieldName, pageUrl);
                        oldRow.Add("linkName", listRow["linkName"]);
                        resultEw.AddRow(oldRow);
                        urlDic.Add(pageUrl, true);
                    }

                    HtmlAgilityPack.HtmlDocument htmlDoc   = this.RunPage.GetLocalHtmlDocument(listSheet, i, Encoding.GetEncoding("gb2312"));
                    HtmlNodeCollection           linkNodes = htmlDoc.DocumentNode.SelectNodes("//a");
                    if (linkNodes != null)
                    {
                        foreach (HtmlNode linkNode in linkNodes)
                        {
                            string linkUrl = linkNode.GetAttributeValue("href", "").Trim();
                            if (linkUrl.StartsWith("http://114.xixik.com/") && !urlDic.ContainsKey(linkUrl))
                            {
                                string linkText = CommonUtil.HtmlDecode(linkNode.InnerText).Trim();
                                Dictionary <string, string> newRow = new Dictionary <string, string>();
                                newRow.Add(SysConfig.DetailPageUrlFieldName, linkUrl);
                                newRow.Add(SysConfig.DetailPageNameFieldName, linkUrl);
                                newRow.Add("linkName", linkText);
                                resultEw.AddRow(newRow);

                                urlDic.Add(linkUrl, true);
                            }
                        }
                    }
                }
            }
            resultEw.SaveToDisk();
        }
Example #4
0
        private void GetListPageUrls(IListSheet listSheet)
        {
            ExcelWriter ew = this.CreateWriter();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> listRow = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    try
                    {
                        HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                        HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"cont\"]/a");
                        for (int j = 0; j < linkNodes.Count; j++)
                        {
                            HtmlNode linkNode = linkNodes[j];
                            string   url      = "http://www.lszj.com" + linkNode.GetAttributeValue("href", "");
                            string   name     = CommonUtil.HtmlDecode(linkNode.InnerText).Trim();
                            Dictionary <string, string> row = new Dictionary <string, string>();
                            row.Add("detailPageUrl", url);
                            row.Add("detailPageName", url);
                            row.Add("name", name);
                            ew.AddRow(row);
                        }
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }
            ew.SaveToDisk();
        }
Example #5
0
        /// <summary>
        /// GetCats
        /// </summary>
        /// <param name="listSheet"></param>
        /// <param name="pageSourceDir"></param>
        /// <param name="resultEW"></param>
        private void GetCats(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW)
        {
            for (int i = 0; i < listSheet.RowCount; i++)
            {
                //listSheet中只有一条记录
                string pageUrl = listSheet.PageUrlList[i];
                Dictionary <string, string> row      = listSheet.GetRow(i);
                string localFilePath                 = this.RunPage.GetFilePath(pageUrl, pageSourceDir);
                HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                HtmlNodeCollection allCat1Nodes      = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"catDiv\"]/div/h5");
                HtmlNodeCollection allCat2GroupNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"catDiv\"]/div/ul");

                for (int j = 0; j < allCat1Nodes.Count; j++)
                {
                    HtmlNode           cat1Node      = allCat1Nodes[j];
                    HtmlNode           cat2GroupNode = allCat2GroupNodes[j];
                    string             cat1Name      = cat1Node.InnerText.Trim();
                    HtmlNodeCollection allCat2Nodes  = cat2GroupNode.SelectNodes("./li");
                    for (int k = 0; k < allCat2Nodes.Count; k++)
                    {
                        HtmlNode cat2Node = allCat2Nodes[k];
                        string   cat2Code = cat2Node.Attributes["catid"].Value;
                        string   cat2Name = cat2Node.InnerText.Trim();

                        Dictionary <string, string> f2vs = new Dictionary <string, string>();
                        f2vs.Add("cat1Name", cat1Name);
                        f2vs.Add("cat2Name", cat2Name);
                        f2vs.Add("cat2Code", cat2Code);
                        resultEW.AddRow(f2vs);
                    }
                }
            }
        }
Example #6
0
        private void GetSubCategoryKeywordPercents(ExcelWriter resultEW, string categoryName, string subCategoryName, ExcelReader er, Dictionary <string, double> keywordsSumValueDic, Dictionary <string, double> keywordsValueDic)
        {
            int rowCount = er.GetRowCount();

            for (int i = 0; i < rowCount; i++)
            {
                Dictionary <string, string> keywordRow = er.GetFieldValues(i);
                string keyword = keywordRow["词汇"];
                //去掉包含空格的
                if (!keyword.Contains(" "))
                {
                    double value    = keywordsValueDic[keyword];
                    double sumValue = keywordsSumValueDic[keyword];
                    double percent  = (double)value / (double)sumValue;
                    if (value > 0.0001)
                    {
                        Dictionary <string, object> resultRow = new Dictionary <string, object>();
                        resultRow.Add("category", categoryName);
                        resultRow.Add("subCategory", subCategoryName);
                        resultRow.Add("keyword", keyword);
                        resultRow.Add("percent", percent);
                        resultEW.AddRow(resultRow);
                    }
                }
            }
        }
Example #7
0
        private void GetList(IListSheet listSheet)
        {
            String exportDir     = this.RunPage.GetExportDir();
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("fromName", 0);
            resultColumnDic.Add("toCode", 1);
            string      resultFilePath = Path.Combine(exportDir, "翻译结果.xlsx");
            ExcelWriter resultEW       = new ExcelWriter(resultFilePath, "List", resultColumnDic, null);

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl = row["detailPageUrl"];
                bool   giveUp    = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    try
                    {
                        string resultTextFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir);

                        CsvReader csvReader = new CsvReader(resultTextFilePath);
                        Dictionary <string, string> f2vs = csvReader.GetFieldValues(0);
                        resultEW.AddRow(f2vs);
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }
            resultEW.SaveToDisk();
        }
        /// <summary>
        /// GetCities
        /// </summary>
        /// <param name="listSheet"></param>
        /// <param name="pageSourceDir"></param>
        /// <param name="resultEW"></param>
        private void GetShopList(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW)
        {
            for (int i = 0; i < listSheet.RowCount; i++)
            {
                string pageUrl = listSheet.PageUrlList[i];
                Dictionary <string, string> row = listSheet.GetRow(i);
                string provinceName             = row["provinceName"];
                string cityName      = row["cityName"];
                string cityCode      = row["cityCode"];
                string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir);
                HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                HtmlNodeCollection allShopNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"cityMapLeft\"]/div/b/a");

                for (int j = 0; j < allShopNodes.Count; j++)
                {
                    HtmlNode shopNode    = allShopNodes[j];
                    string   shopUrl     = shopNode.Attributes["href"].Value;
                    string[] shopPieces  = shopUrl.Split(new string[] { "/" }, StringSplitOptions.RemoveEmptyEntries);
                    string   shopCodeStr = shopPieces[shopPieces.Length - 1];
                    string   shopCode    = shopCodeStr.Substring(0, shopCodeStr.IndexOf("."));
                    string   shopName    = shopNode.InnerText.Trim();

                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                    f2vs.Add("detailPageUrl", shopUrl);
                    f2vs.Add("detailPageName", shopCode + shopName);
                    f2vs.Add("provinceName", provinceName);
                    f2vs.Add("cityCode", cityCode);
                    f2vs.Add("cityName", cityName);
                    f2vs.Add("shopCode", shopCode);
                    f2vs.Add("shopName", shopName);
                    resultEW.AddRow(f2vs);
                }
            }
        }
Example #9
0
        /// <summary>
        /// GetCities
        /// </summary>
        /// <param name="listSheet"></param>
        /// <param name="pageSourceDir"></param>
        /// <param name="resultEW"></param>
        private void GetQuals(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW)
        {
            Dictionary <string, string> codeDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                string pageUrl = listSheet.PageUrlList[i];
                HtmlAgilityPack.HtmlDocument pageHtmlDoc  = this.RunPage.GetLocalHtmlDocument(listSheet, i);
                HtmlNodeCollection           allQualNodes = pageHtmlDoc.DocumentNode.SelectNodes("//input[@class=\"icheck\"]");
                if (allQualNodes != null)
                {
                    for (int j = 0; j < allQualNodes.Count; j++)
                    {
                        String  jsonText = allQualNodes[j].GetAttributeValue("value", "");
                        JObject rootJo   = JObject.Parse(jsonText);
                        string  aptCode  = (rootJo.SelectToken("apt_code") as JValue).ToString().Trim();
                        string  aptScope = (rootJo.SelectToken("apt_scope") as JValue).ToString().Trim();
                        if (!codeDic.ContainsKey(aptCode))
                        {
                            codeDic.Add(aptCode, null);
                            Dictionary <string, string> f2vs = new Dictionary <string, string>();
                            f2vs.Add("aptCode", aptCode);
                            f2vs.Add("aptScope", aptScope);
                            resultEW.AddRow(f2vs);
                        }
                    }
                }
            }
        }
        /// <summary>
        /// 生成车辆详细信息
        /// </summary>
        /// <param name="listSheet"></param>
        /// <returns></returns>
        private bool GenerateUrlListFile()
        {
            bool   succeed       = true;
            string exportDir     = this.RunPage.GetExportDir();
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, int> columnDic = CommonUtil.InitStringIndexDic(new string[] {
                "detailPageUrl",
                "detailPageName",
                "cookie",
                "grabStatus",
                "giveUpGrab",
                "pageNum"
            });
            string      slPath = Path.Combine(exportDir, "绿色建筑列表页.xlsx");
            ExcelWriter slEW   = new ExcelWriter(slPath, "List", columnDic, null);

            for (int pageNum = 0; pageNum < PageCount; pageNum++)
            {
                string pageUrl = this.GetShopSearchPageUrl(pageNum);
                Dictionary <string, string> row = new Dictionary <string, string>();
                row.Add("detailPageUrl", pageUrl);
                row.Add("detailPageName", pageNum.ToString());
                row.Add("pageNum", (pageNum + 1).ToString());
                slEW.AddRow(row);
            }
            slEW.SaveToDisk();
            return(succeed);
        }
Example #11
0
        private void GetCategoryToPageUrls(IListSheet listSheet)
        {
            String exportDir      = this.RunPage.GetExportDir();
            string pageSourceDir  = this.RunPage.GetDetailSourceFileDir();
            string resultFilePath = Path.Combine(exportDir, "美食天下_分类与菜谱列表对照.xlsx");

            ExcelWriter resultEW = this.CreateSubCategoryMapWriter(resultFilePath);

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailPageUrl            = row[SysConfig.DetailPageUrlFieldName];
                string category            = row["category"];
                string subCategory         = row["subCategory"];
                string sourceDir           = this.RunPage.GetDetailSourceFileDir();
                string subCategoryFilePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir);

                ExcelReader er       = new ExcelReader(subCategoryFilePath);
                int         rowCount = er.GetRowCount();
                for (int j = 0; j < rowCount; j++)
                {
                    Dictionary <string, string> subRow = er.GetFieldValues(j);

                    Dictionary <string, string> mapRow = new Dictionary <string, string>();
                    mapRow.Add("category", subRow["category"]);
                    mapRow.Add("subCategory", subRow["subCategory"]);
                    mapRow.Add("name", subRow["name"]);
                    mapRow.Add("url", subRow["url"]);
                    resultEW.AddRow(mapRow);
                }
            }
            resultEW.SaveToDisk();
        }
Example #12
0
        private void GetListPageUrls(IListSheet listSheet)
        {
            String exportDir     = this.RunPage.GetExportDir();
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("detailPageUrl", 0);
            resultColumnDic.Add("detailPageName", 1);
            resultColumnDic.Add("cookie", 2);
            resultColumnDic.Add("grabStatus", 3);
            resultColumnDic.Add("giveUpGrab", 4);
            resultColumnDic.Add("category", 5);
            resultColumnDic.Add("subCategory", 6);
            string      resultFilePath = Path.Combine(exportDir, "美食天下_获取各小类菜谱列表页.xlsx");
            ExcelWriter resultEW       = new ExcelWriter(resultFilePath, "List", resultColumnDic, null);

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl = row["detailPageUrl"];

                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                    try
                    {
                        HtmlNodeCollection categoryDivList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"category_sub clear\"]");

                        foreach (HtmlNode categoryDiv in categoryDivList)
                        {
                            HtmlNode           categoryNameNode    = categoryDiv.SelectSingleNode("./h3");
                            string             categoryName        = CommonUtil.HtmlDecode(categoryNameNode.InnerText).Trim();
                            HtmlNodeCollection subCategoryNodeList = categoryDiv.SelectNodes("./ul/li/a");
                            for (int j = 0; j < subCategoryNodeList.Count; j++)
                            {
                                HtmlNode subCategoryNode    = subCategoryNodeList[j];
                                string   subCategoryName    = subCategoryNode.GetAttributeValue("title", "");
                                string   subCategoryPageUrl = subCategoryNode.GetAttributeValue("href", "");

                                Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                f2vs.Add("detailPageUrl", subCategoryPageUrl);
                                f2vs.Add("detailPageName", subCategoryPageUrl);
                                f2vs.Add("category", categoryName);
                                f2vs.Add("subCategory", subCategoryName);

                                resultEW.AddRow(f2vs);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }
            resultEW.SaveToDisk();
        }
Example #13
0
        /// <summary>
        /// GetCities
        /// </summary>
        /// <param name="listSheet"></param>
        /// <param name="pageSourceDir"></param>
        /// <param name="resultEW"></param>
        private void GetCities(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW)
        {
            for (int i = 0; i < listSheet.RowCount; i++)
            {
                string pageUrl = listSheet.PageUrlList[i];
                Dictionary <string, string> row = listSheet.GetRow(i);
                string areaLevel1Code           = row["areaLevel1Code"];
                string areaLevel1Name           = row["areaLevel1Name"];

                string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir);
                string fileText      = FileHelper.GetTextFromFile(localFilePath);

                int jsonBeginIndex = fileText.IndexOf("{");
                int jsonEndIndex   = fileText.LastIndexOf("}");

                string  jsonStr        = fileText.Substring(jsonBeginIndex, jsonEndIndex - jsonBeginIndex + 1);
                JObject rootJo         = JObject.Parse(jsonStr);
                JArray  allAreaObjects = rootJo.SelectToken("data") as JArray;
                for (int j = 0; j < allAreaObjects.Count; j++)
                {
                    JObject areaObject               = allAreaObjects[j] as JObject;
                    string  areaLevel2Code           = (areaObject.SelectToken("id") as JValue).Value.ToString();
                    string  areaLevel2Name           = (areaObject.SelectToken("name") as JValue).Value.ToString();
                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                    f2vs.Add("detailPageUrl", "http://autobeta.jd.com/queryAreaList?area_lev=3&area_id=" + areaLevel2Code + "&callback=jQuery7711772&_=1469734421125");
                    f2vs.Add("detailPageName", areaLevel2Code + areaLevel2Name);
                    f2vs.Add("areaLevel1Code", areaLevel1Code);
                    f2vs.Add("areaLevel1Name", areaLevel1Name);
                    f2vs.Add("areaLevel2Code", areaLevel2Code);
                    f2vs.Add("areaLevel2Name", areaLevel2Name);
                    resultEW.AddRow(f2vs);
                }
            }
        }
Example #14
0
        /// <summary>
        /// GetProvinces
        /// </summary>
        /// <param name="listSheet"></param>
        /// <param name="pageSourceDir"></param>
        /// <param name="resultEW"></param>
        private void GetProvinces(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW)
        {
            for (int i = 0; i < listSheet.RowCount; i++)
            {
                //listSheet中只有一条记录
                string pageUrl = listSheet.PageUrlList[i];
                Dictionary <string, string> row      = listSheet.GetRow(i);
                string localFilePath                 = this.RunPage.GetFilePath(pageUrl, pageSourceDir);
                HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                HtmlNodeCollection allProvinceNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"listTab\"]/ul[1]/li");

                for (int j = 0; j < allProvinceNodes.Count; j++)
                {
                    HtmlNode provinceNode = allProvinceNodes[j];
                    string   provinceCode = provinceNode.Attributes["data-value"].Value;
                    string   provinceName = provinceNode.InnerText;

                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                    f2vs.Add("detailPageUrl", "http://www.tuhu.cn/Shops/" + provinceCode + ".aspx");
                    f2vs.Add("detailPageName", provinceCode + provinceName);
                    f2vs.Add("provinceCode", provinceCode);
                    f2vs.Add("provinceName", provinceName);
                    resultEW.AddRow(f2vs);
                }
            }
        }
Example #15
0
        /// <summary>
        /// GetCities
        /// </summary>
        /// <param name="listSheet"></param>
        /// <param name="pageSourceDir"></param>
        /// <param name="resultEW"></param>
        private void ReadCityPages(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW)
        {
            for (int i = 0; i < listSheet.RowCount; i++)
            {
                //listSheet中只有一条记录
                Dictionary <string, string> row = listSheet.GetRow(i);
                string pageUrl       = row[SysConfig.DetailPageUrlFieldName];
                string provinceCode  = row["provinceCode"];
                string provinceName  = row["provinceName"];
                string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir);
                HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                HtmlNodeCollection allCityNodes = htmlDoc.DocumentNode.SelectNodes("//div[@id=\"listTab\"]/ul[2]/li/a");

                for (int j = 0; j < allCityNodes.Count; j++)
                {
                    HtmlNode cityNode           = allCityNodes[j];
                    string   cityUrl            = cityNode.Attributes["href"].Value;
                    string[] cityUrlPieces      = cityUrl.Split(new string[] { "/" }, StringSplitOptions.RemoveEmptyEntries);
                    string[] cityPageNamePieces = cityUrlPieces[cityUrlPieces.Length - 1].Split(new string[] { "." }, StringSplitOptions.RemoveEmptyEntries);
                    string   cityCode           = cityPageNamePieces[0];
                    string   cityName           = cityNode.InnerText;

                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                    f2vs.Add("detailPageUrl", cityUrl);
                    f2vs.Add("detailPageName", cityCode + cityName);
                    f2vs.Add("provinceCode", provinceCode);
                    f2vs.Add("provinceName", provinceName);
                    f2vs.Add("cityCode", cityCode);
                    f2vs.Add("cityName", cityName);
                    resultEW.AddRow(f2vs);
                }
            }
        }
        /// <summary>
        /// 生成车辆详细信息
        /// </summary>
        /// <param name="listSheet"></param>
        /// <returns></returns>
        private bool GenerateUrlListFile()
        {
            bool   succeed       = true;
            string exportDir     = this.RunPage.GetExportDir();
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, int> columnDic = CommonUtil.InitStringIndexDic(new string[] {
                "detailPageUrl",
                "detailPageName",
                "cookie",
                "grabStatus",
                "giveUpGrab"
            });
            string      slPath = Path.Combine(exportDir, "Id详情页.xlsx");
            ExcelWriter slEW   = new ExcelWriter(slPath, "List", columnDic, null);

            int totalPageCount = this.GetTotalPageCount();
            int pageIndex      = 1;

            while (pageIndex <= totalPageCount)
            {
                string pageUrl = this.GetShopSearchPageUrl(pageIndex);
                Dictionary <string, string> row = new Dictionary <string, string>();
                row.Add("detailPageUrl", pageUrl);
                row.Add("detailPageName", pageIndex.ToString());
                slEW.AddRow(row);
                pageIndex = pageIndex + 1;
            }
            slEW.SaveToDisk();
            return(succeed);
        }
        private void GetProjectItem(HtmlNodeCollection listNodeList, string pageNum, Dictionary <string, string> projectUrlToNull, ExcelWriter ew)
        {
            for (int j = 0; j < listNodeList.Count; j++)
            {
                HtmlNode listNode = listNodeList[j];

                string projectName = "";
                string projectUrl  = "";

                HtmlNode projectNameNode = listNode.SelectSingleNode("./a[1]");
                projectName = projectNameNode.InnerText.Trim();

                HtmlNode projectUrlNode = listNode.SelectSingleNode("./a[2]");
                projectUrl = "http://www.gbmap.org" + projectUrlNode.GetAttributeValue("href", "");

                Dictionary <string, object> projectInfo = new Dictionary <string, object>();
                if (!projectUrlToNull.ContainsKey(projectUrl))
                {
                    projectUrlToNull.Add(projectUrl, null);
                    projectInfo.Add("detailPageUrl", projectUrl);
                    projectInfo.Add("detailPageName", projectUrl);
                    projectInfo.Add("projectName", projectName);
                    projectInfo.Add("pageNum", pageNum);
                    ew.AddRow(projectInfo);
                }
            }
        }
        private void GetListPageUrls(IListSheet listSheet)
        {
            string      sourceDir = this.RunPage.GetDetailSourceFileDir();
            ExcelWriter resultEW  = this.CreateResultWriter();
            Dictionary <string, bool> pageUrlDic = new Dictionary <string, bool>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string>  listRow = listSheet.GetRow(i);
                HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                HtmlNodeCollection pageUrlNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"gclear pp bt center f14\"]/a");
                foreach (HtmlNode pageUrlNode in pageUrlNodes)
                {
                    string pageUrl     = pageUrlNode.GetAttributeValue("href", "");
                    string fullPageUrl = "https://chengyu.911cha.com/" + pageUrl;
                    if (!pageUrlDic.ContainsKey(fullPageUrl))
                    {
                        pageUrlDic.Add(fullPageUrl, true);
                        Dictionary <string, string> resultRow = new Dictionary <string, string>();
                        resultRow.Add("detailPageUrl", fullPageUrl);
                        resultRow.Add("detailPageName", fullPageUrl);
                        resultEW.AddRow(resultRow);
                    }
                }
            }

            resultEW.SaveToDisk();
        }
        public override bool AfterAllGrab(IListSheet listSheet)
        {
            String exportDir = this.RunPage.GetExportDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("detailPageUrl", 0);
            resultColumnDic.Add("detailPageName", 1);
            resultColumnDic.Add("cookie", 2);
            resultColumnDic.Add("grabStatus", 3);
            resultColumnDic.Add("giveUpGrab", 4);
            resultColumnDic.Add("CompanyId", 5);
            resultColumnDic.Add("企业名称", 6);
            resultColumnDic.Add("统一社会信用代码", 7);
            resultColumnDic.Add("企业法定代表人", 8);
            resultColumnDic.Add("企业登记注册类型", 9);
            resultColumnDic.Add("企业注册属地", 10);
            resultColumnDic.Add("企业经营地址", 11);
            resultColumnDic.Add("addressParts", 12);
            string      resultFilePath = Path.Combine(exportDir, "企业数据_企业工商信息列表页.xlsx");
            ExcelWriter resultEW       = new ExcelWriter(resultFilePath, "List", resultColumnDic, null);

            string detailPageUrlColumnName         = SysConfig.DetailPageUrlFieldName;
            Dictionary <string, string> companyDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string companyName = row["企业名称"].Trim().Replace("造价企业", "").Replace("测试企业", "");

                    if (!companyDic.ContainsKey(companyName))
                    {
                        Dictionary <string, string> f2vs = new Dictionary <string, string>();
                        companyDic.Add(companyName, null);

                        f2vs.Add("detailPageUrl", "https://www.tianyancha.com/search?key=" + companyName);
                        f2vs.Add("detailPageName", row["CompanyId"]);
                        f2vs.Add("CompanyId", row["CompanyId"]);
                        f2vs.Add("企业名称", companyName);
                        f2vs.Add("统一社会信用代码", row["统一社会信用代码"]);
                        f2vs.Add("企业法定代表人", row["企业法定代表人"]);
                        f2vs.Add("企业登记注册类型", row["企业登记注册类型"]);
                        f2vs.Add("企业注册属地", row["企业注册属地"]);
                        f2vs.Add("企业经营地址", row["企业经营地址"]);

                        string addressParts = this.GetAddresParts(row);
                        f2vs.Add("addressParts", addressParts);

                        resultEW.AddRow(f2vs);
                    }
                }
            }

            resultEW.SaveToDisk();

            return(true);
        }
Example #20
0
        private void GenerateListPageUrls()
        {
            string[] parameters     = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
            string   sourceFilePath = parameters[0];
            string   destFilePath   = parameters[1];

            ExcelReader er = new ExcelReader(sourceFilePath);
            ExcelWriter ew = this.GetExcelWriter(destFilePath);

            Dictionary <string, string> companyDic = new Dictionary <string, string>();

            int rowCount = er.GetRowCount();

            for (int i = 0; i < rowCount; i++)
            {
                Dictionary <string, string> sourceRow = er.GetFieldValues(i);

                string companyName = sourceRow["Company Name"];
                if (!companyDic.ContainsKey(companyName))
                {
                    companyDic.Add(companyName, null);

                    string encodeCompanyName            = CommonUtil.UrlEncode(companyName);
                    string pageUrl                      = "https://www.glassdoor.com/Reviews/company-reviews.htm?suggestCount=0&suggestChosen=false&clickSource=searchBtn&typedKeyword=" + encodeCompanyName + "&sc.keyword=" + encodeCompanyName + "&locT=&locId=&jobType=";
                    Dictionary <string, string> destRow = new Dictionary <string, string>();
                    destRow.Add("detailPageUrl", pageUrl);
                    destRow.Add("detailPageName", pageUrl);
                    destRow.Add("cookie", "ARPNTS=1952819392.64288.0000; ARPNTS_AB=115; gdId=94517b85-9d89-47c1-a5ab-2a04b242c067; trs=direct:direct:direct:2018-07-15+23%3A50%3A22.919:undefined:undefined; _ga=GA1.2.216399378.1531723803; __qca=P0-1262345758-1531723804448; G_ENABLED_IDPS=google; __gads=ID=62251b7c5d596d61:T=1531723836:S=ALNI_MZk81H-OcTT9PjdVFK8PYIrVGTx1A; __gdpopuc=1; cto_lwid=8e5c6f44-854b-492e-be0f-09a9dc915819; rm=bGl4aW4xNTUzQGdtYWlsLmNvbToxNTYzMjkyNzgzNzgxOjVhMDQ1MWI1NjBiYjYzYzE3NjM3YmEzOThjNTJlM2Ix; uc=8F0D0CFA50133D96DAB3D34ABA1B873399807652C6C76982808553CADAB58BBB131EFE7DE1E6A4B95851EB3294212EB393007ED539985D9CDE873DE04D4FC71FEE18FB9F0BDE4138B3E34D8411CDEA90F25EDE93274F0D5D5FDED9B003FBA6F43CA9014AC0BB0289EB0204D279873038C3CF7E94AE6F099E0174A86BB3453633759C8511C218159EA514952BE5A78210E84BCCC56AAAAD09; _mibhv=anon-1531735166141-5684441656_6890; JSESSIONID=E01A35E3A52310CD24E42EC5FF252052; _uac=00000164ca63c0a6a0bcb163ea7dc134; GSESSIONID=E01A35E3A52310CD24E42EC5FF252052; _gid=GA1.2.739342608.1532403870; ht=%7B%22quantcast%22%3A%5B%22D%22%5D%7D; JSESSIONID_KYWI_APP=B31D8DA6C274B6196C84875AE7D7942A; JSESSIONID_JX_APP=8E738CEAE7DF1A613C3E7B6006442DE4; cass=1; AWSALB=4plUYq9nqfzCEW/AJ4UDiC11DqFrHS0JteBY5hN5Ok2HoX9iLI04hye/Bpq8j7Syv8PnKkRAsMcWCNGXkxlGMnVvbn1nPp99yMD5TcSM4g+ORjkL9rbNGIoiSAN4qYGv/Ir11PRBDXxXJIp8E0TRnpuNo3fcuCImeBiC/rzpGYMOeTyaTw32g+C3rlEvLCmoohAKaUTrzpDwu1OXM6sBFG9S5jfl0NtU/cmqv5muVCjMBcqr8FoqD9WjlkUkNe0=");
                    destRow.Add("Company_Name", companyName);
                    ew.AddRow(destRow);
                }
            }
            ew.SaveToDisk();
        }
        private void GetRenWuInfos(IListSheet listSheet)
        {
            string      sourceDir = this.RunPage.GetDetailSourceFileDir();
            ExcelWriter resultEW  = this.CreateRenWuResultWriter();
            Dictionary <string, bool> pageUrlDic = new Dictionary <string, bool>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string>  listRow = listSheet.GetRow(i);
                HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);
                try
                {
                    HtmlNode mainInfoNode    = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"info_txt2 clearfix\"]");
                    HtmlNode titleNode       = mainInfoNode.SelectSingleNode("./h2");
                    string   renWuTitle      = CommonUtil.HtmlDecode(titleNode.InnerText).Trim();
                    HtmlNode descriptionNode = mainInfoNode.SelectSingleNode("./p");
                    string   description     = descriptionNode == null ? "" : CommonUtil.HtmlDecode(descriptionNode.InnerText).Trim();

                    Dictionary <string, string> resultRow = new Dictionary <string, string>();
                    resultRow.Add("人物", listRow["renWu"]);
                    resultRow.Add("时代", listRow["shiDai"]);
                    resultRow.Add("人物页面标题", renWuTitle);
                    resultRow.Add("简介", description);
                    resultRow.Add("url", listRow[SysConfig.DetailPageUrlFieldName]);
                    resultEW.AddRow(resultRow);
                }
                catch (Exception ex)
                {
                    throw ex;
                }
            }

            resultEW.SaveToDisk();
        }
Example #22
0
        public void Test()
        {
            Dictionary <string, int> columnNameToIndex = CommonUtil.InitStringIndexDic(new string[] {
                "word"
            });

            ExcelWriter ew = new ExcelWriter("f:\\c.xlsx", "List", columnNameToIndex);

            string[] ssArray = new string[] {
                "sina.com.cn",
                "xinhua.com",
                "twitter.com",
                "amazon.com",
                "baidu.com",
                "nytimes.com",
                "jd.com",
                "tmall.com",
                "sohu.com",
                "qq.com",
                "taobao.com",
                "tianya.com",
                "bustbuy.com"
            };
            var listCount = ssArray.Length;

            for (int i = 1; i < listCount; i++)
            {
                string   word = ssArray[i];
                string[] ws   = word.Split(new string[] { "\t" }, StringSplitOptions.RemoveEmptyEntries);
                Dictionary <string, string> listRow = new Dictionary <string, string>();
                listRow["word"] = ws[0];
                ew.AddRow(listRow);
            }
            ew.SaveToDisk();
        }
Example #23
0
        private void GetShiShuDetailPageUrls(IListSheet listSheet)
        {
            string      sourceDir = this.RunPage.GetDetailSourceFileDir();
            ExcelWriter resultEW  = this.CreateResultWriter();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string>  listRow = listSheet.GetRow(i);
                HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"info_cate clearfix\"]/dl/dd/a");
                foreach (HtmlNode linkNode in linkNodes)
                {
                    string juanName = CommonUtil.HtmlDecode(linkNode.InnerText).Trim();

                    string juanPageUrl     = linkNode.GetAttributeValue("href", "");
                    string fullJuanPageUrl = "http://www.guoxuedashi.com" + juanPageUrl;
                    Dictionary <string, string> resultRow = new Dictionary <string, string>();
                    resultRow.Add("detailPageUrl", fullJuanPageUrl);
                    resultRow.Add("detailPageName", fullJuanPageUrl);
                    resultRow.Add("shiShu", listRow["shiShu"]);
                    resultRow.Add("leiXing", listRow["leiXing"]);
                    resultRow.Add("juan", juanName);
                    resultEW.AddRow(resultRow);
                }
            }

            resultEW.SaveToDisk();
        }
        private void GetYearInfos(IListSheet listSheet)
        {
            string sourceDir = this.RunPage.GetDetailSourceFileDir();

            ExcelWriter resultEW = this.CreateResultWriter();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> listRow = listSheet.GetRow(i);
                bool   giveUp        = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]);
                string detailPageUrl = listRow[SysConfig.DetailPageUrlFieldName];
                if (!giveUp)
                {
                    try
                    {
                        string localFilePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir);
                        string html          = FileHelper.GetTextFromFile(localFilePath, Encoding.UTF8);
                        if (!html.Contains("您所访问的页面不存在"))
                        {
                            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                            htmlDoc.LoadHtml(html);
                            HtmlNode mainInfoNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"lemma-summary\"]");
                            if (mainInfoNode == null)
                            {
                                this.RunPage.InvokeAppendLogText("此词条不存在摘要信息, pageUrl = " + detailPageUrl, LogLevelType.Error, true);
                            }
                            else
                            {
                                HtmlNode itemBaseInfoNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"lemmaWgt-promotion-rightPreciseAd\"]");
                                string   itemId           = itemBaseInfoNode.GetAttributeValue("data-lemmaid", "");
                                string   itemName         = itemBaseInfoNode.GetAttributeValue("data-lemmatitle", "");

                                string mainInfo = CommonUtil.HtmlDecode(mainInfoNode.InnerText).Trim();

                                Dictionary <string, string> newRow = new Dictionary <string, string>();
                                newRow.Add("url", detailPageUrl);
                                newRow.Add("yearValue", listRow["yearValue"]);
                                newRow.Add("yearName", listRow["yearName"]);
                                newRow.Add("itemId", itemId);
                                newRow.Add("itemName", itemName);
                                newRow.Add("mainInfo", mainInfo);
                                resultEW.AddRow(newRow);
                            }
                        }
                        else
                        {
                            this.RunPage.InvokeAppendLogText("放弃解析此页, 所访问的页面不存在, pageUrl = " + detailPageUrl, LogLevelType.Error, true);
                        }
                    }
                    catch (Exception ex)
                    {
                        this.RunPage.InvokeAppendLogText(ex.Message + ". 解析出错, pageUrl = " + detailPageUrl, LogLevelType.Error, true);
                        throw ex;
                    }
                }
            }

            resultEW.SaveToDisk();
        }
        private void GetCityList(IListSheet listSheet)
        {
            String exportDir     = this.RunPage.GetExportDir();
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("code", 0);
            resultColumnDic.Add("name", 1);
            resultColumnDic.Add("url", 2);
            string      resultFilePath = Path.Combine(exportDir, "安居客城市列表.xlsx");
            ExcelWriter resultEW       = new ExcelWriter(resultFilePath, "List", resultColumnDic, null);

            Dictionary <string, string> urlDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl = row["detailPageUrl"];
                bool   giveUp    = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir);

                    try
                    {
                        HtmlAgilityPack.HtmlDocument htmlDoc      = this.RunPage.GetLocalHtmlDocument(listSheet, i);
                        HtmlNodeCollection           allCityNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"cl-c-list\"]/ul[@class=\"cl-c-l-ul\"]/li[@class=\"cl-c-l-li\"]/a");

                        for (int j = 0; j < allCityNodes.Count; j++)
                        {
                            HtmlNode cityNode          = allCityNodes[j];
                            string   url               = cityNode.GetAttributeValue("href", "");
                            int      cityCodeFromIndex = url.IndexOf("com/") + 4;
                            int      cityCodeEndIndex  = url.IndexOf("/commu");
                            if (cityCodeEndIndex > 0)
                            {
                                string code = url.Substring(cityCodeFromIndex, cityCodeEndIndex - cityCodeFromIndex);
                                string name = CommonUtil.HtmlDecode(cityNode.InnerText.Trim()).Trim();
                                if (!urlDic.ContainsKey(url))
                                {
                                    urlDic.Add(url, null);
                                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                    f2vs.Add("code", code);
                                    f2vs.Add("name", name);
                                    f2vs.Add("url", url);
                                    resultEW.AddRow(f2vs);
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }
            resultEW.SaveToDisk();
        }
        private bool GetProvinceCompCountList(IListSheet listSheet)
        {
            String exportDir = this.RunPage.GetExportDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("regionId", 0);
            resultColumnDic.Add("regionName", 1);
            resultColumnDic.Add("regionFullName", 2);
            resultColumnDic.Add("aptCode", 3);
            resultColumnDic.Add("aptScope", 4);
            resultColumnDic.Add("companyCount", 5);

            string resultFilePath = Path.Combine(exportDir, "各省企业个数.xlsx");

            Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>();

            resultColumnFormat.Add("companyCount", "#,##0");
            ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, resultColumnFormat);

            string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName;

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string url              = row[detailPageUrlColumnName];
                    string provinceId       = row["regionId"];
                    string provinceName     = row["regionName"];
                    string provinceFullName = row["regionFullName"];
                    string aptCode          = row["aptCode"];
                    string aptScope         = row["aptScope"];
                    string cookie           = row["cookie"];

                    HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);
                    string pageText        = pageHtmlDoc.DocumentNode.SelectSingleNode("//form[@class=\"pagingform\"]").NextSibling.NextSibling.InnerText;
                    int    totalStartIndex = pageText.IndexOf("\"$total\":") + 9;
                    int    totalEndIndex   = pageText.IndexOf(",", totalStartIndex);
                    string totalCountStr   = pageText.Substring(totalStartIndex, totalEndIndex - totalStartIndex);
                    int    companyCount    = int.Parse(totalCountStr);

                    Dictionary <string, object> f2vs = new Dictionary <string, object>();
                    f2vs.Add("regionId", provinceId);
                    f2vs.Add("regionName", provinceName);
                    f2vs.Add("regionFullName", provinceFullName);
                    f2vs.Add("aptCode", aptCode);
                    f2vs.Add("aptScope", aptScope);
                    f2vs.Add("companyCount", companyCount);
                    resultEW.AddRow(f2vs);
                }
            }

            resultEW.SaveToDisk();

            return(true);
        }
Example #27
0
        private void SaveShopsToPointFile(string subCategoryFilePath, string detailPageUrl, int pageCount, string pointShopDir, string urlFormat, string lat, string lng)
        {
            ExcelWriter pointShopsEW           = this.CreatePointShopsWriter(subCategoryFilePath);
            Dictionary <string, string> urlDic = new Dictionary <string, string>();

            for (int i = 0; i < pageCount; i++)
            {
                int    pageIndex       = i;
                string nextListPageUrl = this.GetNextListPageUrl(urlFormat, lat, lng, pageIndex);
                string localPath       = this.RunPage.GetFilePath(nextListPageUrl, pointShopDir);
                string pageText        = FileHelper.GetTextFromFile(localPath);

                JObject rootJo    = JObject.Parse(pageText);
                JArray  itemArray = rootJo.GetValue("items") as JArray;

                for (int j = 0; j < itemArray.Count; j++)
                {
                    try
                    {
                        JObject itemJo = (itemArray[j] as JObject).GetValue("restaurant") as JObject;
                        if (itemJo != null)
                        {
                            string address        = itemJo.GetValue("address").ToString();
                            string description    = itemJo.GetValue("description").ToString();
                            string id             = itemJo.GetValue("id").ToString();
                            string latitude       = itemJo.GetValue("latitude").ToString();
                            string longitude      = itemJo.GetValue("longitude").ToString();
                            string name           = itemJo.GetValue("name").ToString();
                            string phone          = itemJo.GetValue("phone") == null ? "" : itemJo.GetValue("phone").ToString();
                            string promotion_info = itemJo.GetValue("promotion_info") == null ? "" : itemJo.GetValue("promotion_info").ToString();


                            if (!urlDic.ContainsKey(id))
                            {
                                urlDic.Add(id, null);

                                Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                f2vs.Add("address", address);
                                f2vs.Add("description", description);
                                f2vs.Add("id", id);
                                f2vs.Add("latitude", latitude);
                                f2vs.Add("longitude", longitude);
                                f2vs.Add("name", name);
                                f2vs.Add("phone", phone);
                                f2vs.Add("promotion_info", promotion_info);

                                pointShopsEW.AddRow(f2vs);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }
            pointShopsEW.SaveToDisk();
        }
Example #28
0
        private void GetList(IListSheet listSheet)
        {
            String exportDir     = this.RunPage.GetExportDir();
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("detailPageUrl", 0);
            resultColumnDic.Add("detailPageName", 1);
            resultColumnDic.Add("cookie", 2);
            resultColumnDic.Add("grabStatus", 3);
            resultColumnDic.Add("giveUpGrab", 4);
            resultColumnDic.Add("学科", 5);
            resultColumnDic.Add("学科id", 6);
            resultColumnDic.Add("门类", 7);
            resultColumnDic.Add("门类id", 8);
            string      resultFilePath = Path.Combine(exportDir, "教育_本科_专业_jhcee_com.xlsx");
            ExcelWriter resultEW       = new ExcelWriter(resultFilePath, "List", resultColumnDic, null);

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl = row["detailPageUrl"];
                bool   giveUp    = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir);

                    try
                    {
                        string  pageFileText = FileHelper.GetTextFromFile(localFilePath);
                        JObject rootJo       = JObject.Parse(pageFileText);

                        JArray itemJsons = rootJo["data"] as JArray;
                        foreach (JObject itemJson in itemJsons)
                        {
                            string name     = itemJson["name"].ToString();
                            string id       = itemJson["id"].ToString();
                            string parentId = itemJson["parentId"].ToString();

                            Dictionary <string, string> f2vs = new Dictionary <string, string>();
                            f2vs.Add("detailPageUrl", "http://www.jhcee.com/specialized/loadByParentId.json?parentId=" + id);
                            f2vs.Add("detailPageName", id);
                            f2vs.Add("门类", name);
                            f2vs.Add("门类id", id);
                            f2vs.Add("学科", row["name"]);
                            f2vs.Add("学科id", row["id"]);
                            resultEW.AddRow(f2vs);
                        }
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }
            resultEW.SaveToDisk();
        }
Example #29
0
        private bool GetBuildingListPageUrls(IListSheet listSheet)
        {
            String exportDir = this.RunPage.GetExportDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("detailPageUrl", 0);
            resultColumnDic.Add("detailPageName", 1);
            resultColumnDic.Add("cookie", 2);
            resultColumnDic.Add("grabStatus", 3);
            resultColumnDic.Add("giveUpGrab", 4);
            resultColumnDic.Add("projectId", 5);
            resultColumnDic.Add("projectName", 6);
            resultColumnDic.Add("pageIndex", 7);
            string resultFilePath = Path.Combine(exportDir, "济南楼盘_楼列表页.xlsx");
            Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>();
            ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, resultColumnFormat);

            string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName;

            Dictionary <string, string> loupanDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string url         = row[detailPageUrlColumnName];
                    string projectId   = row["projectId"];
                    string projectName = row["projectName"];

                    HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);
                    HtmlNode pageCountNode = pageHtmlDoc.DocumentNode.SelectSingleNode("//input[@id=\"allpage\"]");
                    if (pageCountNode != null)
                    {
                        int pageCount = int.Parse(pageCountNode.GetAttributeValue("value", ""));

                        for (int j = 0; j < pageCount; j++)
                        {
                            int    pageIndex                 = j + 1;
                            string detailPageUrl             = "http://www.jnfdc.gov.cn/onsaling/show_" + pageIndex.ToString() + ".shtml?prjno=" + projectId;
                            Dictionary <string, object> f2vs = new Dictionary <string, object>();
                            f2vs.Add("detailPageUrl", detailPageUrl);
                            f2vs.Add("detailPageName", projectId + "_" + pageIndex.ToString());
                            f2vs.Add("projectId", projectId);
                            f2vs.Add("projectName", projectName);
                            f2vs.Add("pageIndex", pageIndex.ToString());
                            resultEW.AddRow(f2vs);
                        }
                    }
                }
            }

            resultEW.SaveToDisk();

            return(true);
        }
Example #30
0
        private bool GetLoupanDetailInfos(IListSheet listSheet)
        {
            String exportDir = this.RunPage.GetExportDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("项目ID", 0);
            resultColumnDic.Add("项目名称", 1);
            resultColumnDic.Add("项目地址", 2);
            resultColumnDic.Add("企业名称", 3);
            resultColumnDic.Add("所在区县", 4);
            resultColumnDic.Add("项目规模", 5);
            resultColumnDic.Add("总栋数", 6);
            resultColumnDic.Add("可售套数", 7);
            string resultFilePath = Path.Combine(exportDir, "济南楼盘_楼盘详情.xlsx");
            Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>();
            ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, resultColumnFormat);

            string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName;

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string url       = row[detailPageUrlColumnName];
                    string projectId = row["projectId"];
                    string sellable  = row["sellable"];

                    HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);
                    HtmlNodeCollection           trNodeList  = pageHtmlDoc.DocumentNode.SelectNodes("//table[@class=\"message_table\"]/tr");


                    string projectName   = trNodeList[1].SelectNodes("./td")[1].InnerText.Trim();
                    string address       = trNodeList[1].SelectNodes("./td")[3].InnerText.Trim();
                    string companyName   = trNodeList[2].SelectNodes("./td")[1].InnerText.Trim();
                    string scope         = trNodeList[2].SelectNodes("./td")[3].InnerText.Trim();
                    string projectSize   = trNodeList[3].SelectNodes("./td")[1].InnerText.Trim();
                    string buildingCount = trNodeList[3].SelectNodes("./td")[3].InnerText.Trim();

                    Dictionary <string, object> f2vs = new Dictionary <string, object>();
                    f2vs.Add("项目ID", projectId);
                    f2vs.Add("项目名称", projectName);
                    f2vs.Add("项目地址", address);
                    f2vs.Add("企业名称", companyName);
                    f2vs.Add("所在区县", scope);
                    f2vs.Add("项目规模", projectSize);
                    f2vs.Add("总栋数", buildingCount);
                    f2vs.Add("可售套数", sellable);
                    resultEW.AddRow(f2vs);
                }
            }

            resultEW.SaveToDisk();

            return(true);
        }