Пример #1
0
        private void GetAllInfos(IListSheet listSheet)
        {
            CsvWriter cw = this.GetCsvExcelWriter();

            string detailPageUrlColumnName         = SysConfig.DetailPageUrlFieldName;
            Dictionary <string, string> companyDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailPageUrl            = row[SysConfig.DetailPageUrlFieldName];
                string detailPageName           = row[SysConfig.DetailPageNameFieldName];
                string companyId = row["companyId"];

                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                    HtmlNodeCollection trNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//table/tbody/tr");
                    if (trNodeList != null)
                    {
                        for (int j = 0; j < trNodeList.Count; j++)
                        {
                            try
                            {
                                HtmlNode           trNode     = trNodeList[j];
                                HtmlNodeCollection tdNodeList = trNode.SelectNodes("./td");
                                HtmlNode           indexNode  = tdNodeList[0];
                                if (indexNode.GetAttributeValue("data-header", "") == "序号")
                                {
                                    try
                                    {
                                        Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                        f2vs.Add("CompanyId", companyId);
                                        f2vs.Add("资质类别", tdNodeList.Count < 2 ? "" : tdNodeList[1].InnerText.Trim());
                                        f2vs.Add("资质证书号", tdNodeList.Count < 3 ? "" : tdNodeList[2].InnerText.Trim());
                                        f2vs.Add("资质名称", tdNodeList.Count < 4 ? "" : tdNodeList[3].InnerText.Trim());
                                        f2vs.Add("发证日期", tdNodeList.Count < 5 ? "" : tdNodeList[4].InnerText.Trim());
                                        f2vs.Add("证件有效期", tdNodeList.Count < 6 ? "" : tdNodeList[5].InnerText.Trim());
                                        f2vs.Add("发证机关", tdNodeList.Count < 7 ? "" : tdNodeList[6].InnerText.Trim());
                                        cw.AddRow(f2vs);
                                    }
                                    catch (Exception ex)
                                    {
                                        throw ex;
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                throw ex;
                            }
                        }
                    }
                }
            }

            cw.SaveToDisk();
        }
Пример #2
0
        public override void GetDataByOtherAccessType(Dictionary <string, string> listRow)
        {
            string        pageSourceDir = this.RunPage.GetDetailSourceFileDir();
            string        fromNameStr   = listRow["fromName"];
            List <string> toNameList    = new List <string>();
            List <string> fromNameList  = new List <string>();

            string[] fromNames = fromNameStr.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < fromNames.Length; i++)
            {
                string fromName    = fromNames[i].Trim();
                string fromNameLow = fromName.ToLower();
                if (this.Dic.ContainsKey(fromNameLow))
                {
                    string toName = this.Dic[fromNameLow];
                    toNameList.Add(toName);

                    fromNameList.Add(fromName);
                }
                else
                {
                    throw new Exception("无法翻译, fromName = " + fromName);
                }
            }

            CsvWriter tempCsvWriter         = this.GetCsvWriter(listRow);
            Dictionary <string, string> row = new Dictionary <string, string>();

            row.Add("fromName", CommonUtil.StringArrayToString(fromNameList.ToArray(), ", "));
            row.Add("toCode", CommonUtil.StringArrayToString(toNameList.ToArray(), ", "));
            tempCsvWriter.AddRow(row);
            tempCsvWriter.SaveToDisk();
        }
Пример #3
0
        private void GetItemTagsTypes(IListSheet listSheet)
        {
            string[] parameters     = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
            string   sourceFilePath = parameters[0];
            string   destFilePath   = parameters[1];

            ExcelReader er             = new ExcelReader(sourceFilePath);
            int         sourceRowCount = er.GetRowCount();

            List <string> tagList = new List <string>();

            for (int i = 0; i < sourceRowCount; i++)
            {
                Dictionary <string, string> sourceRow = er.GetFieldValues(i);
                string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string itemTag in itemTags)
                {
                    if (!tagList.Contains(itemTag))
                    {
                        tagList.Add(itemTag);
                    }
                }
            }
            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("url", 0);
            resultColumnDic.Add("itemId", 1);
            resultColumnDic.Add("itemName", 2);
            resultColumnDic.Add("tags", 3);
            for (int i = 0; i < tagList.Count; i++)
            {
                resultColumnDic.Add(tagList[i], i + 4);
            }


            CsvWriter itemTagMatrixCW = new CsvWriter(destFilePath, resultColumnDic);

            for (int i = 0; i < sourceRowCount; i++)
            {
                Dictionary <string, string> sourceRow = er.GetFieldValues(i);

                Dictionary <string, string> resultRow = new Dictionary <string, string>();
                resultRow.Add("url", sourceRow["url"]);
                resultRow.Add("itemId", sourceRow["itemId"]);
                resultRow.Add("itemName", sourceRow["itemName"]);
                resultRow.Add("tags", sourceRow["tags"]);
                string   tagsStr  = sourceRow["tags"];
                string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);

                foreach (string itemTag in itemTags)
                {
                    resultRow.Add(itemTag, "1");
                }
                itemTagMatrixCW.AddRow(resultRow);
            }

            itemTagMatrixCW.SaveToDisk();
        }
Пример #4
0
 private void SavePointsToFile(List <Dictionary <string, string> > points, CsvWriter resultEW)
 {
     for (int i = 0; i < points.Count; i++)
     {
         Dictionary <string, string> f2vs = points[i];
         resultEW.AddRow(f2vs);
     }
     resultEW.SaveToDisk();
 }
Пример #5
0
        public override bool AfterAllGrab(IListSheet listSheet)
        {
            string[] parameters     = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
            string   sourceFilePath = parameters[0];
            string   exportDir      = parameters[1];
            string   cityName       = parameters[2];

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("city", 0);
            resultColumnDic.Add("distrctName", 1);
            resultColumnDic.Add("shopName", 2);
            resultColumnDic.Add("shopCode", 3);
            resultColumnDic.Add("address", 4);
            resultColumnDic.Add("tel", 5);
            resultColumnDic.Add("shopType", 6);
            resultColumnDic.Add("commentNum", 7);
            resultColumnDic.Add("lat", 8);
            resultColumnDic.Add("lng", 9);
            resultColumnDic.Add("人均", 10);
            resultColumnDic.Add("口味", 11);
            resultColumnDic.Add("环境", 12);
            resultColumnDic.Add("服务", 13);
            string resultFilePath = Path.Combine(exportDir, "大众点评店铺信息" + cityName + ".xlsx");
            Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>();

            resultColumnFormat.Add("reviewNum", "#,##0");
            resultColumnFormat.Add("lat", "#,##0.000000");
            resultColumnFormat.Add("lng", "#,##0.000000");
            resultColumnFormat.Add("人均", "#,##0.00");
            resultColumnFormat.Add("环境", "#,##0.0");
            resultColumnFormat.Add("口味", "#,##0.0");
            resultColumnFormat.Add("服务", "#,##0.0");

            CsvWriter resultEW = new CsvWriter(resultFilePath, resultColumnDic);

            CsvReader cr             = new CsvReader(sourceFilePath);
            int       sourceRowCount = cr.GetRowCount();

            for (int i = 0; i < sourceRowCount; i++)
            {
                Dictionary <string, string> sourceRow = cr.GetFieldValues(i);
                string city = sourceRow["city"];
                if (city == cityName)
                {
                    resultEW.AddRow(sourceRow);
                }
            }

            resultEW.SaveToDisk();

            return(true);
        }
Пример #6
0
        private void MergeInfoFile(string exportDir, IListSheet listSheet)
        {
            string        allBlockInfoPath = Path.Combine(exportDir, "爬取结果.csv");
            StringBuilder ss = new StringBuilder();

            Dictionary <string, int> allBlockInfoDic = new Dictionary <string, int>();

            allBlockInfoDic.Add("x", 0);
            allBlockInfoDic.Add("y", 1);
            allBlockInfoDic.Add("z", 2);
            allBlockInfoDic.Add("xp", 3);
            allBlockInfoDic.Add("yp", 4);
            allBlockInfoDic.Add("v", 5);
            allBlockInfoDic.Add("time", 6);
            CsvWriter allBlockInfoCW = new CsvWriter(allBlockInfoPath, allBlockInfoDic);

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> listRow = listSheet.GetRow(i);
                string    x             = listRow["x"];
                string    y             = listRow["y"];
                string    z             = listRow["z"];
                string    time          = listRow["time"];
                string    tempDir       = Path.Combine(exportDir, "temp");
                string    blockInfoPath = Path.Combine(tempDir, x + "_" + y + "_" + z + "_" + time + ".csv");
                CsvReader csvReader     = new CsvReader(blockInfoPath);
                int       pCount        = csvReader.GetRowCount();
                if (pCount > 0)
                {
                    for (int pIndex = 0; pIndex < pCount; pIndex++)
                    {
                        Dictionary <string, string> pValues = csvReader.GetFieldValues(pIndex);
                        string xp = pValues["xp"];
                        string yp = pValues["yp"];
                        string v  = pValues["v"];
                        allBlockInfoCW.AddRow(pValues);

                        if (ss.Length != 0)
                        {
                            ss.Append(",");
                            ss.AppendLine();
                        }
                        ss.Append("    {\"x\":" + x.ToString() + "." + xp + ",\"y\":" + y.ToString() + "." + yp + ", \"lng\":" + this.ToBaiduX(int.Parse(x), int.Parse(xp), int.Parse(z)) + ",\"lat\":" + this.ToBaiduY(int.Parse(y), int.Parse(yp), int.Parse(z)) + ",\"count\":" + double.Parse(v).ToString() + "}");
                    }
                }
            }
            allBlockInfoCW.SaveToDisk();

            string allBlockInfoTextPath = Path.Combine(exportDir, "爬取结果.txt");

            FileHelper.SaveTextToFile(ss.ToString(), allBlockInfoTextPath);
        }
Пример #7
0
        private void SaveSkuInfoToLocalFile(string filePath, Dictionary <string, string> skuInfo)
        {
            Dictionary <string, int> columnNameToIndex = new Dictionary <string, int>();

            columnNameToIndex.Add("name", 0);
            columnNameToIndex.Add("price", 1);
            columnNameToIndex.Add("transportFee", 2);
            columnNameToIndex.Add("monthSellCount", 3);
            columnNameToIndex.Add("district", 4);
            columnNameToIndex.Add("commentCount", 5);
            CsvWriter cw = new CsvWriter(filePath, columnNameToIndex);

            cw.AddRow(skuInfo);
            cw.SaveToDisk();
        }
Пример #8
0
        public async Task <DownloadResult> DownloadObjects([NotNull] string objectIdentifier, [NotNull][FromBody] ObjectSearchRequest query)
        {
            var type          = schemaRegistry.GetTypeByTypeIdentifier(objectIdentifier);
            var schema        = schemaRegistry.GetSchemaByTypeIdentifier(objectIdentifier);
            var downloadLimit = schema.Description.DownloadLimit;
            var count         = await schemaRegistry.GetConnector(objectIdentifier).Count(query.GetFilters(), downloadLimit + 1).ConfigureAwait(false);

            if (count > downloadLimit)
            {
                return new DownloadResult
                       {
                           File       = null,
                           Count      = (int)count,
                           CountLimit = downloadLimit,
                       }
            }
            ;

            var results = await schemaRegistry.GetConnector(objectIdentifier).Search(query.GetFilters(), query.GetSorts(), 0, downloadLimit).ConfigureAwait(false);

            var properties = new List <string>();
            var getters    = new List <Func <object, object> >();

            PropertyHelpers.BuildGettersForProperties(type, "", x => x, properties, getters);

            var excludedIndices    = properties.Select((x, i) => (x, i)).Where(x => query.ExcludedFields.Contains(x.x)).Select(x => x.i).ToArray();
            var filteredProperties = properties.Where((x, i) => !excludedIndices.Contains(i)).ToArray();
            var filteredGetters    = getters.Where((x, i) => !excludedIndices.Contains(i)).ToArray();

            var csvWriter = new CsvWriter(filteredProperties);

            foreach (var item in results)
            {
                csvWriter.AddRow(filteredGetters.Select(f => PropertyHelpers.ToString(f, item)).ToArray());
            }

            return(new DownloadResult
            {
                Count = count ?? 0,
                CountLimit = downloadLimit,
                File = new FileInfo
                {
                    Content = csvWriter.GetBytes(),
                    ContentType = "text/csv",
                    Name = $"{objectIdentifier}-{DateTime.UtcNow:yyyy-MM-dd-HHmm}.csv"
                }
            });
        }
Пример #9
0
        public static int AddText(string text)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(0);
            }
            int id = ++max_id;

            langdic.Add(id, text);
            langdic_id.Add(text, id);
            writer.AddRow(new string[] { id.ToString(), text });
            string contents = writer.Write();

            File.WriteAllText(Application.dataPath + filepath, contents, encoding);
            return(id);
        }
Пример #10
0
        private void Search()
        {
            string[] parameters       = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
            string   sourceFilePath   = parameters[0];
            string   destFilePath     = parameters[1];
            string   keywordsGroupStr = parameters[2];

            string[]        keywordsGroup = keywordsGroupStr.Split(new string[] { "$" }, StringSplitOptions.RemoveEmptyEntries);
            List <String[]> keywordsList  = new List <string[]>();

            foreach (string keywordsStr in keywordsGroup)
            {
                keywordsList.Add(keywordsStr.Split(new string[] { "|" }, StringSplitOptions.RemoveEmptyEntries));
            }

            CsvWriter cw = this.GetCsvWriter(destFilePath);

            CsvReader cr       = new CsvReader(sourceFilePath);
            int       rowCount = cr.GetRowCount();

            for (int i = 0; i < rowCount; i++)
            {
                Dictionary <string, string> row = cr.GetFieldValues(i);
                string announcementTitle        = row["announcementTitle"];

                for (int j = 0; j < keywordsList.Count; j++)
                {
                    bool     matched  = true;
                    string[] keywords = keywordsList[j];
                    foreach (string keyword in keywords)
                    {
                        if (!announcementTitle.Contains(keyword))
                        {
                            matched = false;
                            break;
                        }
                    }
                    if (matched)
                    {
                        cw.AddRow(row);
                        break;
                    }
                }
            }
            cw.SaveToDisk();
        }
Пример #11
0
        private void SaveKeywordShopInfoToLocalFile(string filePath, List <Dictionary <string, string> > allKeywordShops, string keyword)
        {
            Dictionary <string, int> columnNameToIndex = new Dictionary <string, int>();

            columnNameToIndex.Add("name", 0);
            columnNameToIndex.Add("subscribe", 1);
            columnNameToIndex.Add("mark", 2);
            columnNameToIndex.Add("jdSelf", 3);
            columnNameToIndex.Add("keyword", 4);
            CsvWriter cw = new CsvWriter(filePath, columnNameToIndex);

            foreach (Dictionary <string, string> skuInfo in allKeywordShops)
            {
                skuInfo.Add("keyword", keyword);
                cw.AddRow(skuInfo);
            }
            cw.SaveToDisk();
        }
Пример #12
0
        private void SaveShopProductInfoToLocalFile(string filePath, List <Dictionary <string, string> > allProducts, string shopName)
        {
            Dictionary <string, int> columnNameToIndex = new Dictionary <string, int>();

            columnNameToIndex.Add("shopName", 0);
            columnNameToIndex.Add("productName", 1);
            columnNameToIndex.Add("price", 2);
            columnNameToIndex.Add("commentNum", 3);
            columnNameToIndex.Add("goodMark", 4);
            CsvWriter cw = new CsvWriter(filePath, columnNameToIndex);

            foreach (Dictionary <string, string> skuInfo in allProducts)
            {
                skuInfo.Add("shopName", shopName);
                cw.AddRow(skuInfo);
            }
            cw.SaveToDisk();
        }
Пример #13
0
        private void GetCityToShops(IListSheet listSheet)
        {
            String exportDir      = this.RunPage.GetExportDir();
            string pageSourceDir  = this.RunPage.GetDetailSourceFileDir();
            string resultFilePath = Path.Combine(exportDir, "饿了么_城市与店铺对照.csv");

            CsvWriter resultEW = this.CreateCityToShopsWriter(resultFilePath);

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailPageUrl            = row[SysConfig.DetailPageUrlFieldName];
                string elemeCity = row["elemeCity"];
                string searchLat = row["lat"];
                string searchLng = row["lng"];

                string sourceDir     = this.RunPage.GetDetailSourceFileDir();
                string shopsFilePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir);

                ExcelReader er       = new ExcelReader(shopsFilePath);
                int         rowCount = er.GetRowCount();
                for (int j = 0; j < rowCount; j++)
                {
                    Dictionary <string, string> subRow = er.GetFieldValues(j);

                    Dictionary <string, string> mapRow = new Dictionary <string, string>();
                    mapRow.Add("address", subRow["address"]);
                    mapRow.Add("description", subRow["description"]);
                    mapRow.Add("id", subRow["id"]);
                    mapRow.Add("latitude", subRow["latitude"]);
                    mapRow.Add("longitude", subRow["longitude"]);
                    mapRow.Add("name", subRow["name"]);
                    mapRow.Add("phone", subRow["phone"]);
                    mapRow.Add("promotion_info", subRow["promotion_info"]);
                    mapRow.Add("searchLat", searchLat);
                    mapRow.Add("searchLng", searchLng);
                    mapRow.Add("elemeCity", elemeCity);
                    resultEW.AddRow(mapRow);
                }
            }
            resultEW.SaveToDisk();
        }
Пример #14
0
        public static void Init(int client_id)
        {
            encoding = GetFileEncodeType(Application.dataPath + filepath);
            string    csv    = File.ReadAllText(Application.dataPath + filepath, encoding);
            CsvReader reader = new CsvReader();

            langdic    = new Dictionary <int, string> ();
            langdic_id = new Dictionary <string, int> ();
            writer     = new CsvWriter();
            if (!string.IsNullOrEmpty(csv.Trim()))
            {
                foreach (var row in reader.Read(csv))
                {
                    string value;
                    int    id = int.Parse(row [0]);
                    if (id > client_id && id < client_id + 1000000)
                    {
                        max_id = Mathf.Max(max_id, id);
                    }
                    langdic.TryGetValue(id, out value);
                    if (string.IsNullOrEmpty(value))
                    {
                        langdic.Add(id, row[1]);
                        langdic_id.Add(row[1], id);
                        writer.AddRow(row);
                    }
                    else
                    {
                        Debug.LogError("language.csv 中有重复id:" + id);
                    }
                }
            }
            if (max_id == 0)
            {
                max_id = client_id;
            }
        }
Пример #15
0
        public void GetPart(string destFilePath, int fromRowIndex, int rowCount)
        {
            Dictionary <string, int> columnName2Index = CR.GetColumnNameToIndex();
            CsvWriter cw            = new CsvWriter(destFilePath, columnName2Index);
            int       rightRowCount = CR.GetRowCount() - fromRowIndex;

            if (rightRowCount < 0)
            {
                throw new Exception("获取csv部分数据时, 起始行超出总行数");
            }
            else if (rightRowCount < rowCount)
            {
                rowCount = rightRowCount;
            }

            int toRowIndex = rowCount + fromRowIndex - 1;

            for (int i = fromRowIndex; i <= toRowIndex; i++)
            {
                Dictionary <string, string> row = CR.GetFieldValues(i);
                cw.AddRow(row);
            }
            cw.SaveToDisk();
        }
Пример #16
0
        private void GetCityXiaoquList(IListSheet listSheet)
        {
            String exportDir     = this.RunPage.GetExportDir();
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("cityCode", 0);
            resultColumnDic.Add("cityName", 1);
            resultColumnDic.Add("level1AreaCode", 2);
            resultColumnDic.Add("level1AreaName", 3);
            resultColumnDic.Add("level2AreaCode", 4);
            resultColumnDic.Add("level2AreaName", 5);
            resultColumnDic.Add("name", 6);
            resultColumnDic.Add("address", 7);
            resultColumnDic.Add("sale_num", 8);
            resultColumnDic.Add("build_year", 9);
            resultColumnDic.Add("mid_price", 10);
            resultColumnDic.Add("url", 11);

            string    resultFilePath           = Path.Combine(exportDir, "安居客小区列表.csv");
            CsvWriter resultEW                 = new CsvWriter(resultFilePath, resultColumnDic);
            Dictionary <string, string> urlDic = new Dictionary <string, string>();



            for (int i = 0; i < listSheet.RowCount; i++)
            {
                if (i % 100 == 0)
                {
                    this.RunPage.InvokeAppendLogText("正在输出CSV文件... " + ((double)(i * 100) / (double)listSheet.RowCount).ToString("0.00") + "%", LogLevelType.System, true);
                }

                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl      = row["detailPageUrl"];
                string cityName       = row["cityName"];
                string cityCode       = row["cityCode"];
                string level1AreaName = row["level1AreaName"];
                string level1AreaCode = row["level1AreaCode"];
                string level2AreaCode = row["level2AreaCode"];
                string level2AreaName = row["level2AreaName"];
                bool   giveUp         = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string localFilePath    = this.RunPage.GetFilePath(detailUrl, pageSourceDir);
                    string fileText         = FileHelper.GetTextFromFile(localFilePath);
                    int    requestPageCount = int.Parse(fileText);
                    for (int j = 0; j < requestPageCount; j++)
                    {
                        string pageLocalFilePath = this.RunPage.GetFilePath(detailUrl + "?p=" + (j + 1).ToString(), pageSourceDir);
                        string pageFileText      = FileHelper.GetTextFromFile(pageLocalFilePath);
                        try
                        {
                            JObject rootJo          = JObject.Parse(pageFileText);
                            JArray  xiaoquJsonArray = rootJo["data"] as JArray;
                            for (int k = 0; k < xiaoquJsonArray.Count; k++)
                            {
                                JObject xiaoquJson = xiaoquJsonArray[k] as JObject;
                                string  name       = CommonUtil.HtmlDecode(xiaoquJson["name"].ToString());
                                string  area       = CommonUtil.HtmlDecode(xiaoquJson["area"].ToString());
                                string  address    = CommonUtil.HtmlDecode(xiaoquJson["address"].ToString());
                                string  sale_num   = xiaoquJson["sale_num"].ToString();
                                string  build_year = CommonUtil.HtmlDecode(xiaoquJson["build_year"].ToString());
                                string  mid_price  = xiaoquJson["mid_price"].ToString();
                                string  url        = CommonUtil.HtmlDecode(xiaoquJson["url"].ToString());
                                if (!urlDic.ContainsKey(url))
                                {
                                    urlDic.Add(url, null);
                                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                    //f2vs.Add("detailPageUrl", url);
                                    //f2vs.Add("detailPageName", url);
                                    f2vs.Add("cityCode", cityCode);
                                    f2vs.Add("cityName", cityName);
                                    f2vs.Add("level1AreaCode", cityCode);
                                    f2vs.Add("level1AreaName", cityName);
                                    f2vs.Add("level2AreaCode", cityCode);
                                    f2vs.Add("level2AreaName", cityName);
                                    f2vs.Add("name", name);
                                    f2vs.Add("address", address);
                                    f2vs.Add("sale_num", sale_num);
                                    f2vs.Add("build_year", build_year);
                                    f2vs.Add("mid_price", mid_price);
                                    f2vs.Add("url", url);
                                    resultEW.AddRow(f2vs);
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            throw ex;
                        }
                    }
                }
            }
            resultEW.SaveToDisk();
            this.RunPage.InvokeAppendLogText("完成输出CSV文件... 100%", LogLevelType.System, true);
        }
Пример #17
0
        private void GetGongGaoListAllPagesToCsv(IListSheet listSheet)
        {
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            CsvWriter ew = this.GetCsvWriter();
            Dictionary <string, string> announcementDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl     = row["detailPageUrl"];
                string orgId         = row["orgId"];
                string pinyin        = row["pinyin"];
                string code          = row["code"];
                string zwjc          = row["zwjc"];
                string category      = row["category"];
                string stockExchange = row["stockExchange"];

                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string     localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir);
                    TextReader tr            = null;

                    try
                    {
                        tr = new StreamReader(localFilePath, Encoding.UTF8);
                        string js = tr.ReadToEnd();

                        JObject rootJo         = JObject.Parse(js);
                        JArray  itemArrayJsons = rootJo.SelectToken("announcements") as JArray;
                        for (int j = 0; j < itemArrayJsons.Count; j++)
                        {
                            JObject itemJson            = itemArrayJsons[j] as JObject;
                            string  announcementId      = itemJson.GetValue("announcementId").ToString().Trim();
                            string  announcementTitle   = CommonUtil.HtmlDecode(itemJson.GetValue("announcementTitle").ToString().Trim());
                            string  announcementTimeStr = itemJson.GetValue("announcementTime").ToString().Trim();
                            string  adjunctType         = itemJson.GetValue("adjunctType").ToString().Trim();
                            string  adjunctUrl          = itemJson.GetValue("adjunctUrl").ToString().Trim();

                            DateTime announcementTime       = (new DateTime(1970, 1, 1)).AddMilliseconds(long.Parse(announcementTimeStr)).ToLocalTime();
                            string   outAnnouncementTimeStr = announcementTime.ToString("yyyy-MM-dd HH:mm:ss");

                            if (!announcementDic.ContainsKey(announcementId))
                            {
                                announcementDic.Add(announcementId, null);

                                Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                f2vs.Add("fileUrl", "http://www.cninfo.com.cn/" + adjunctUrl);
                                f2vs.Add("announcementId", announcementId);
                                f2vs.Add("zwjc", zwjc);
                                f2vs.Add("code", code);
                                f2vs.Add("pinyin", pinyin);
                                f2vs.Add("orgId", orgId);
                                f2vs.Add("category", category);
                                f2vs.Add("stockExchange", stockExchange);
                                f2vs.Add("announcementTitle", announcementTitle);
                                f2vs.Add("announcementTime", outAnnouncementTimeStr);
                                f2vs.Add("adjunctType", adjunctType);
                                ew.AddRow(f2vs);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                    finally
                    {
                        if (tr != null)
                        {
                            tr.Close();
                            tr.Dispose();
                        }
                    }
                }
            }
            ew.SaveToDisk();
        }
        /// <summary>
        /// 期刊每期目录首页
        /// </summary>
        /// <param name="listSheet"></param>
        private void GetAllPerioIndexPageUrls(IListSheet listSheet)
        {
            String    exportDir        = this.RunPage.GetExportDir();
            string    pageSourceDir    = this.RunPage.GetDetailSourceFileDir();
            int       allListFileIndex = 1;
            CsvWriter ew = null;
            Dictionary <string, string> idDic = new Dictionary <string, string>();
            int paperCount = 0;

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                if (paperCount % 1000 == 0)
                {
                    this.RunPage.InvokeAppendLogText("已处理到: fileIndex = " + allListFileIndex.ToString() + ", paperIndex = " + paperCount.ToString(), LogLevelType.System, true);
                }

                if (paperCount >= 500000)
                {
                    if (ew != null)
                    {
                        ew.SaveToDisk();
                    }
                    ew = this.GetAllPerioIndexPageCsvWriter(allListFileIndex);
                    allListFileIndex++;
                    paperCount = 0;
                }

                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl = row["detailPageUrl"];

                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir);

                    try
                    {
                        string pageFileText  = FileHelper.GetTextFromFile(localFilePath);
                        JArray itemJsonArray = JObject.Parse(pageFileText).GetValue("pageRow") as JArray;
                        if (itemJsonArray != null && itemJsonArray.Count > 0)
                        {
                            for (int j = 0; j < itemJsonArray.Count; j++)
                            {
                                JObject itemJson = itemJsonArray[j] as JObject;
                                string  id       = itemJson.GetValue("id").ToString();
                                if (!idDic.ContainsKey(id))
                                {
                                    idDic.Add(id, null);

                                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                    this.GetAttributeValue(itemJson, "id", f2vs);
                                    this.GetAttributeValue(itemJson, "publish_year", f2vs);
                                    this.GetAttributeValue(itemJson, "fund_info02", f2vs);
                                    this.GetAttributeValue(itemJson, "page_range", f2vs);
                                    this.GetAttributeValue(itemJson, "keywords", f2vs);
                                    this.GetAttributeValue(itemJson, "auto_keys", f2vs);
                                    this.GetAttributeValue(itemJson, "page_cnt", f2vs);
                                    this.GetAttributeValue(itemJson, "doc_num", f2vs);
                                    this.GetAttributeValue(itemJson, "perio_id", f2vs);
                                    this.GetAttributeValue(itemJson, "language", f2vs);
                                    this.GetAttributeValue(itemJson, "refdoc_cnt", f2vs);
                                    this.GetAttributeValue(itemJson, "abstract_url", f2vs);
                                    this.GetAttributeValue(itemJson, "scholar_id", f2vs);
                                    this.GetAttributeValue(itemJson, "auto_classcode", f2vs);
                                    this.GetAttributeValue(itemJson, "authors_name", f2vs);
                                    this.GetAttributeValue(itemJson, "share_num", f2vs);
                                    this.GetAttributeValue(itemJson, "trans_column", f2vs);
                                    this.GetAttributeValue(itemJson, "is_fulltext", f2vs);
                                    this.GetAttributeValue(itemJson, "issue_num", f2vs);
                                    this.GetAttributeValue(itemJson, "pro_pub_date", f2vs);
                                    this.GetAttributeValue(itemJson, "hxkbj_pku", f2vs);
                                    this.GetAttributeValue(itemJson, "perio_title02", f2vs);
                                    this.GetAttributeValue(itemJson, "cite_num", f2vs);
                                    this.GetAttributeValue(itemJson, "unit_name", f2vs);
                                    this.GetAttributeValue(itemJson, "linkdoc_cnt", f2vs);
                                    this.GetAttributeValue(itemJson, "issn", f2vs);
                                    this.GetAttributeValue(itemJson, "unit_name02", f2vs);
                                    this.GetAttributeValue(itemJson, "data_state", f2vs);
                                    this.GetAttributeValue(itemJson, "random_id", f2vs);
                                    this.GetAttributeValue(itemJson, "cited_cnt", f2vs);
                                    this.GetAttributeValue(itemJson, "doi", f2vs);
                                    this.GetAttributeValue(itemJson, "fund_info", f2vs);
                                    this.GetAttributeValue(itemJson, "trans_authors", f2vs);
                                    this.GetAttributeValue(itemJson, "literature_code", f2vs);
                                    this.GetAttributeValue(itemJson, "data_sort", f2vs);
                                    this.GetAttributeValue(itemJson, "new_org", f2vs);
                                    this.GetAttributeValue(itemJson, "core_perio", f2vs);
                                    this.GetAttributeValue(itemJson, "publish_year02", f2vs);
                                    this.GetAttributeValue(itemJson, "auth_area", f2vs);
                                    this.GetAttributeValue(itemJson, "article_id", f2vs);
                                    this.GetAttributeValue(itemJson, "tag_num", f2vs);
                                    this.GetAttributeValue(itemJson, "abstract_reading_num", f2vs);
                                    this.GetAttributeValue(itemJson, "auto_classcode_level", f2vs);
                                    this.GetAttributeValue(itemJson, "first_authors", f2vs);
                                    this.GetAttributeValue(itemJson, "full_pubdate", f2vs);
                                    this.GetAttributeValue(itemJson, "hxkbj_istic", f2vs);
                                    this.GetAttributeValue(itemJson, "common_year", f2vs);
                                    this.GetAttributeValue(itemJson, "authors_unit", f2vs);
                                    this.GetAttributeValue(itemJson, "thirdparty_links_num", f2vs);
                                    this.GetAttributeValue(itemJson, "abst_webdate", f2vs);
                                    this.GetAttributeValue(itemJson, "article_seq", f2vs);
                                    this.GetAttributeValue(itemJson, "import_num", f2vs);
                                    this.GetAttributeValue(itemJson, "common_sort_time", f2vs);
                                    this.GetAttributeValue(itemJson, "issue_id", f2vs);
                                    this.GetAttributeValue(itemJson, "full_url", f2vs);
                                    this.GetAttributeValue(itemJson, "orig_pub_date", f2vs);
                                    this.GetAttributeValue(itemJson, "source_db", f2vs);
                                    this.GetAttributeValue(itemJson, "column_name", f2vs);
                                    this.GetAttributeValue(itemJson, "cn", f2vs);
                                    this.GetAttributeValue(itemJson, "collection_num", f2vs);
                                    this.GetAttributeValue(itemJson, "download_num", f2vs);
                                    this.GetAttributeValue(itemJson, "orig_classcode", f2vs);
                                    this.GetAttributeValue(itemJson, "service_model", f2vs);
                                    this.GetAttributeValue(itemJson, "first_publish", f2vs);
                                    this.GetAttributeValue(itemJson, "is_oa", f2vs);
                                    this.GetAttributeValue(itemJson, "subject_class_codes", f2vs);
                                    this.GetAttributeValue(itemJson, "fulltext_reading_num", f2vs);
                                    this.GetAttributeValue(itemJson, "note_num", f2vs);
                                    this.GetAttributeValue(itemJson, "updatetime", f2vs);
                                    this.GetAttributeValue(itemJson, "head_words", f2vs);
                                    this.GetAttributeValue(itemJson, "subject_classcode_level", f2vs);
                                    this.GetAttributeValue(itemJson, "trans_title", f2vs);
                                    this.GetAttributeValue(itemJson, "perio_title_en", f2vs);
                                    this.GetAttributeValue(itemJson, "title", f2vs);
                                    this.GetAttributeValue(itemJson, "summary", f2vs);
                                    this.GetAttributeValue(itemJson, "perio_title", f2vs);
                                    this.GetAttributeValue(itemJson, "class_type", f2vs);
                                    this.GetAttributeValue(itemJson, "doct_collect", f2vs);

                                    paperCount++;

                                    ew.AddRow(f2vs);
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        this.RunPage.InvokeAppendLogText(ex.Message + ". detailUrl = " + detailUrl, LogLevelType.Error, true);
                        throw ex;
                    }
                }
            }
            ew.SaveToDisk();
        }
Пример #19
0
        private void GetBlockInfo(string exportDir, Dictionary <string, string> listRow, string pageSourceDir)
        {
            string detailUrl = listRow["detailPageUrl"];
            string x         = listRow["x"];
            string y         = listRow["y"];
            string z         = listRow["z"];
            string time      = listRow["time"];

            string tempDir       = Path.Combine(exportDir, "temp");
            string blockInfoPath = Path.Combine(tempDir, x + "_" + y + "_" + z + "_" + time + ".csv");

            if (!File.Exists(blockInfoPath))
            {
                string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir);

                Dictionary <string, int> blockInfoDic = new Dictionary <string, int>();
                blockInfoDic.Add("x", 0);
                blockInfoDic.Add("y", 1);
                blockInfoDic.Add("z", 2);
                blockInfoDic.Add("xp", 3);
                blockInfoDic.Add("yp", 4);
                blockInfoDic.Add("v", 5);
                blockInfoDic.Add("time", 6);

                CsvWriter blockInfoCW = new CsvWriter(blockInfoPath, blockInfoDic);



                int          blackSize         = 16;
                Bitmap       img               = new Bitmap(localFilePath);
                List <Point> allPoints         = new List <Point>();
                Dictionary <Point, float> p2hs = new Dictionary <Point, float>();

                for (int xx = 0; xx < img.Width; xx = xx + blackSize)
                {
                    for (int yy = 0; yy < img.Width; yy = yy + blackSize)
                    {
                        float        sumH        = 0;
                        int          X           = xx + blackSize / 2;
                        int          Y           = yy + blackSize / 2;
                        List <Point> sameHPoints = new List <Point>();

                        for (int i = 0; i < blackSize; i++)
                        {
                            if (xx + i < img.Width)
                            {
                                for (int j = 0; j < blackSize; j++)
                                {
                                    if (yy + j < img.Height)
                                    {
                                        Color c = img.GetPixel(xx + i, yy + j);
                                        float h = 360 - c.GetHue();
                                        if (h > 0 && h < 360)
                                        {
                                            sumH += h;
                                        }

                                        /*
                                         * if (h > maxH && h != 360)
                                         * {
                                         *  maxH = h;
                                         *  X = xx + i;
                                         *  Y = yy + j;
                                         *  sameHPoints.Clear();
                                         *  sameHPoints.Add(new Point(X, Y));
                                         * }
                                         * else if (h == maxH && h != 360)
                                         * {
                                         *  sameHPoints.Add(new Point(xx + i, yy + j));
                                         * }*/
                                    }
                                }
                            }
                        }
                        float avgH = sumH / (blackSize * blackSize);
                        if (avgH < 360 && avgH > 0)
                        {
                            Point p = new Point(X, Y);
                            allPoints.Add(p);
                            p2hs.Add(p, avgH);
                        }
                    }
                }

                /*
                 * List<Point> remainPoints = new List<Point>();
                 * while (allPoints.Count > 0)
                 * {
                 *  Point maxHP = getMaxHPoint(allPoints, p2hs);
                 *  remainPoints.Add(maxHP);
                 *  allPoints.Remove(maxHP);
                 *  List<Point> deletePoints = new List<Point>();
                 *  foreach (Point p in allPoints)
                 *  {
                 *      if ((maxHP.X - p.X) * (maxHP.X - p.X) + (maxHP.Y - p.Y) * (maxHP.Y - p.Y) < blackSize / 2 * blackSize / 2)
                 *      {
                 *          deletePoints.Add(p);
                 *      }
                 *  }
                 *  foreach (Point p in deletePoints)
                 *  {
                 *      allPoints.Remove(p);
                 *  }
                 * }*/
                foreach (Point p in allPoints)
                {
                    float h = p2hs[p];
                    Dictionary <string, string> cityReport = new Dictionary <string, string>();
                    cityReport.Add("x", x);
                    cityReport.Add("y", y);
                    cityReport.Add("z", z);
                    cityReport.Add("xp", p.X.ToString());
                    cityReport.Add("yp", p.Y.ToString());
                    cityReport.Add("v", h.ToString());
                    cityReport.Add("time", time);
                    blockInfoCW.AddRow(cityReport);
                }

                blockInfoCW.SaveToDisk();
            }
        }
Пример #20
0
        public override bool AfterAllGrab(IListSheet listSheet)
        {
            String exportDir = this.RunPage.GetExportDir();

            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("city", 0);
            resultColumnDic.Add("distrctName", 1);
            resultColumnDic.Add("shopName", 2);
            resultColumnDic.Add("shopCode", 3);
            resultColumnDic.Add("address", 4);
            resultColumnDic.Add("tel", 5);
            resultColumnDic.Add("shopType", 6);
            resultColumnDic.Add("commentNum", 7);
            resultColumnDic.Add("lat", 8);
            resultColumnDic.Add("lng", 9);
            resultColumnDic.Add("人均", 10);
            resultColumnDic.Add("口味", 11);
            resultColumnDic.Add("环境", 12);
            resultColumnDic.Add("服务", 13);
            string resultFilePath = Path.Combine(exportDir, "大众点评店铺信息.xlsx");
            Dictionary <string, string> resultColumnFormat = new Dictionary <string, string>();

            resultColumnFormat.Add("reviewNum", "#,##0");
            resultColumnFormat.Add("lat", "#,##0.000000");
            resultColumnFormat.Add("lng", "#,##0.000000");
            resultColumnFormat.Add("人均", "#,##0.00");
            resultColumnFormat.Add("环境", "#,##0.0");
            resultColumnFormat.Add("口味", "#,##0.0");
            resultColumnFormat.Add("服务", "#,##0.0");

            CsvWriter resultEW = new CsvWriter(resultFilePath, resultColumnDic);

            string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName;

            Dictionary <string, string> shopDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    try
                    {
                        string             url           = row[detailPageUrlColumnName];
                        string             city          = row["city"];
                        string             distrctName   = row["rName"];
                        string             shopName      = row["shopName"];
                        string             shopCode      = row["shopCode"];
                        string             shopType      = row["gName"];
                        string             commentNumStr = row["reviewNum"];
                        Nullable <int>     commentNum    = commentNumStr == null || commentNumStr.Length == 0 ? (Nullable <int>)null : int.Parse(row["reviewNum"]);
                        Nullable <decimal> lat           = null;
                        Nullable <decimal> lng           = null;
                        string             address       = "";
                        string             tel           = "";
                        Nullable <decimal> renJun        = null;
                        Nullable <decimal> kouWei        = null;
                        Nullable <decimal> huanJing      = null;
                        Nullable <decimal> fuWu          = null;



                        HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);
                        string pageText = pageHtmlDoc.DocumentNode.InnerHtml;

                        int latNameBeginIndex = pageText.IndexOf("shopGlat:");
                        if (latNameBeginIndex > 0)
                        {
                            int latBeginIndex = pageText.IndexOf("\"", latNameBeginIndex);
                            int latEndIndex   = pageText.IndexOf("\"", latBeginIndex + 1);
                            if (latEndIndex - latBeginIndex > 1)
                            {
                                decimal latValue = 0;
                                if (decimal.TryParse(pageText.Substring(latBeginIndex + 1, latEndIndex - latBeginIndex - 1), out latValue))
                                {
                                    lat = latValue;
                                }
                            }
                        }
                        int lngNameBeginIndex = pageText.IndexOf("shopGlng:");
                        if (lngNameBeginIndex > 0)
                        {
                            int lngBeginIndex = pageText.IndexOf("\"", lngNameBeginIndex);
                            int lngEndIndex   = pageText.IndexOf("\"", lngBeginIndex + 1);
                            if (lngEndIndex - lngBeginIndex > 1)
                            {
                                decimal lngValue = 0;
                                if (decimal.TryParse(pageText.Substring(lngBeginIndex + 1, lngEndIndex - lngBeginIndex - 1), out lngValue))
                                {
                                    lng = lngValue;
                                }
                            }
                        }

                        /*
                         * HtmlNode preMapScriptNode = pageHtmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"J_midas-4\"]");
                         * if (preMapScriptNode != null)
                         * {
                         *  HtmlNode mapScriptNode = preMapScriptNode.PreviousSibling;
                         *  while (mapScriptNode != null && mapScriptNode.Name != "script")
                         *  {
                         *      mapScriptNode = mapScriptNode.PreviousSibling;
                         *  }
                         *  if (mapScriptNode != null)
                         *  {
                         *      string scriptString = mapScriptNode.InnerText;
                         *      int lngBeginIndex = scriptString.LastIndexOf("{lng:") + 5;
                         *      int lngEndIndex = scriptString.LastIndexOf(",lat:");
                         *      int latBeginIndex = lngEndIndex + 5;
                         *      int latEndIndex = scriptString.LastIndexOf("});");
                         *      lng = decimal.Parse(scriptString.Substring(lngBeginIndex, lngEndIndex - lngBeginIndex));
                         *      lat = decimal.Parse(scriptString.Substring(latBeginIndex, latEndIndex - latBeginIndex));
                         *  }
                         * }
                         * */

                        HtmlNode addressNode = pageHtmlDoc.DocumentNode.SelectSingleNode("//span[@itemprop=\"street-address\"]");
                        if (addressNode != null)
                        {
                            address = addressNode.Attributes["title"].Value;
                        }

                        HtmlNodeCollection allTelNodes = pageHtmlDoc.DocumentNode.SelectNodes("//span[@itemprop=\"tel\"]");
                        if (allTelNodes != null)
                        {
                            StringBuilder tels = new StringBuilder();
                            foreach (HtmlNode telNode in allTelNodes)
                            {
                                tels.Append((tels.Length == 0 ? "" : ",") + telNode.InnerText);
                            }
                            tel = tels.ToString();
                        }

                        HtmlNodeCollection allBriefNodes = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"brief-info\"]/span");
                        foreach (HtmlNode briefNode in allBriefNodes)
                        {
                            string briefText = briefNode.InnerText;
                            if (briefText.StartsWith("人均:"))
                            {
                                string briefValue = briefText.Substring(3, briefText.Length - 4).Trim();
                                renJun = briefValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(briefValue);
                            }
                        }

                        HtmlNodeCollection allScoreNodes = pageHtmlDoc.DocumentNode.SelectNodes("//span[@id=\"comment_score\"]/span");
                        if (allScoreNodes != null)
                        {
                            foreach (HtmlNode scoreNode in allScoreNodes)
                            {
                                string scoreText = scoreNode.InnerText;
                                if (scoreText.StartsWith("口味:"))
                                {
                                    string scoreValue = scoreText.Substring(3).Trim();
                                    kouWei = scoreValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(scoreValue);
                                }
                                else if (scoreText.StartsWith("环境:"))
                                {
                                    string scoreValue = scoreText.Substring(3).Trim();
                                    huanJing = scoreValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(scoreValue);
                                }
                                else if (scoreText.StartsWith("服务:"))
                                {
                                    string scoreValue = scoreText.Substring(3).Trim();
                                    fuWu = scoreValue.Length == 0 ? (Nullable <decimal>)null : decimal.Parse(scoreValue);
                                }
                            }
                        }

                        Dictionary <string, string> f2vs = new Dictionary <string, string>();
                        f2vs.Add("city", city);
                        f2vs.Add("distrctName", distrctName);
                        f2vs.Add("shopName", shopName);
                        f2vs.Add("shopCode", shopCode);
                        f2vs.Add("address", address);
                        f2vs.Add("shopType", shopType);
                        f2vs.Add("commentNum", commentNum.ToString());
                        f2vs.Add("lat", lat.ToString());
                        f2vs.Add("lng", lng.ToString());
                        f2vs.Add("人均", renJun.ToString());
                        f2vs.Add("tel", tel);
                        f2vs.Add("口味", kouWei.ToString());
                        f2vs.Add("服务", fuWu.ToString());
                        f2vs.Add("环境", huanJing.ToString());
                        resultEW.AddRow(f2vs);
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }

            resultEW.SaveToDisk();

            return(true);
        }
Пример #21
0
        private void GetYearWordsMatrixCount(IListSheet listSheet)
        {
            try
            {
                string[] parameters          = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                string   allKeywordsFilePath = parameters[0];
                string   exportDirPath       = parameters[2];


                ExcelReader er            = new ExcelReader(allKeywordsFilePath);
                int         inputRowCount = er.GetRowCount();


                List <string> keywordList = new List <string>();
                Dictionary <int, Dictionary <string, List <string> > > yearSourceWordList = new Dictionary <int, Dictionary <string, List <string> > >();
                for (int i = 0; i < inputRowCount; i++)
                {
                    Dictionary <string, string> row = er.GetFieldValues(i);
                    string source  = row["source"];
                    int    year    = int.Parse(row["year"]);
                    string keyword = row["keyword"];

                    if (!keywordList.Contains(keyword))
                    {
                        keywordList.Add(keyword);
                    }

                    if (!yearSourceWordList.ContainsKey(year))
                    {
                        yearSourceWordList.Add(year, new Dictionary <string, List <string> >());
                    }
                    Dictionary <string, List <string> > sourceWordList = yearSourceWordList[year];
                    if (!sourceWordList.ContainsKey(source))
                    {
                        sourceWordList.Add(source, new List <string>());
                    }
                    List <string> wordList = sourceWordList[source];
                    if (!wordList.Contains(keyword))
                    {
                        wordList.Add(keyword);
                    }
                }

                Dictionary <string, Dictionary <string, int> > totalYearMartixDataDic = new Dictionary <string, Dictionary <string, int> >();

                foreach (int year in yearSourceWordList.Keys)
                {
                    Dictionary <string, Dictionary <string, int> > yearMatrixDataDic = new Dictionary <string, Dictionary <string, int> >();
                    Dictionary <string, List <string> >            sourceWordList    = yearSourceWordList[year];
                    foreach (string source in sourceWordList.Keys)
                    {
                        List <string> kwList = sourceWordList[source];
                        for (int i = 0; i < kwList.Count; i++)
                        {
                            string kw_i = kwList[i];
                            if (!yearMatrixDataDic.ContainsKey(kw_i))
                            {
                                yearMatrixDataDic.Add(kw_i, new Dictionary <string, int>());
                            }
                            Dictionary <string, int> iDic = yearMatrixDataDic[kw_i];

                            if (!iDic.ContainsKey(kw_i))
                            {
                                iDic.Add(kw_i, 1);
                            }
                            else
                            {
                                iDic[kw_i] = iDic[kw_i] + 1;
                            }

                            /*
                             * if (!totalYearMartixDataDic.ContainsKey(kw_i))
                             * {
                             *  totalYearMartixDataDic.Add(kw_i, new Dictionary<string, int>());
                             * }
                             * Dictionary<string, int> iTotalDic = totalYearMartixDataDic[kw_i];
                             * if (!iTotalDic.ContainsKey(kw_i))
                             * {
                             *  iTotalDic.Add(kw_i, 1);
                             * }
                             * else
                             * {
                             *  iTotalDic[kw_i] = iTotalDic[kw_i] + 1;
                             * }
                             * */

                            for (int j = 0; j < kwList.Count; j++)
                            {
                                string kw_j = kwList[j];
                                if (kw_i != kw_j)
                                {
                                    if (!iDic.ContainsKey(kw_j))
                                    {
                                        iDic.Add(kw_j, 1);
                                    }
                                    else
                                    {
                                        iDic[kw_j] = iDic[kw_j] + 1;
                                    }

                                    /*
                                     * if (!iTotalDic.ContainsKey(kw_j))
                                     * {
                                     *  iTotalDic.Add(kw_j, 1);
                                     * }
                                     * else
                                     * {
                                     *  iTotalDic[kw_i] = iTotalDic[kw_j] + 1;
                                     * }
                                     */
                                }
                            }
                        }
                    }

                    CsvWriter resultWriter = this.GetMatrixCsvWriter(exportDirPath, year, keywordList);

                    for (int i = 0; i < keywordList.Count; i++)
                    {
                        Dictionary <string, string> matrixRow = new Dictionary <string, string>();
                        string kw_i = keywordList[i];
                        matrixRow["keywordMatrix"] = kw_i;
                        Dictionary <string, int> iMatrixDataDic = yearMatrixDataDic.ContainsKey(kw_i) ? yearMatrixDataDic[kw_i] : null;
                        for (int j = 0; j < keywordList.Count; j++)
                        {
                            string kw_j = keywordList[j];
                            if (iMatrixDataDic == null)
                            {
                                matrixRow.Add(kw_j, "0");
                            }
                            else
                            {
                                matrixRow.Add(kw_j, iMatrixDataDic.ContainsKey(kw_j) ? iMatrixDataDic[kw_j].ToString() : "0");
                            }
                        }
                        resultWriter.AddRow(matrixRow);
                    }
                    resultWriter.SaveToDisk();

                    foreach (string kw_i in yearMatrixDataDic.Keys)
                    {
                        if (!totalYearMartixDataDic.ContainsKey(kw_i))
                        {
                            totalYearMartixDataDic.Add(kw_i, new Dictionary <string, int>());
                        }
                        Dictionary <string, int> iTotalDataDic = totalYearMartixDataDic[kw_i];
                        Dictionary <string, int> iDataDic      = yearMatrixDataDic[kw_i];
                        foreach (string kw_j in iDataDic.Keys)
                        {
                            if (!iTotalDataDic.ContainsKey(kw_j))
                            {
                                iTotalDataDic.Add(kw_j, iDataDic[kw_j]);
                            }
                            else
                            {
                                iTotalDataDic[kw_j] = iTotalDataDic[kw_j] + iDataDic[kw_j];
                            }
                        }
                    }
                }

                CsvWriter totalRresultWriter = this.GetMatrixCsvWriter(exportDirPath, 0, keywordList);

                for (int i = 0; i < keywordList.Count; i++)
                {
                    Dictionary <string, string> matrixRow = new Dictionary <string, string>();
                    string kw_i = keywordList[i];
                    matrixRow["keywordMatrix"] = kw_i;
                    Dictionary <string, int> iMatrixDataDic = totalYearMartixDataDic.ContainsKey(kw_i) ? totalYearMartixDataDic[kw_i] : null;
                    for (int j = 0; j < keywordList.Count; j++)
                    {
                        string kw_j = keywordList[j];
                        if (iMatrixDataDic == null)
                        {
                            matrixRow.Add(kw_j, "0");
                        }
                        else
                        {
                            matrixRow.Add(kw_j, iMatrixDataDic.ContainsKey(kw_j) ? iMatrixDataDic[kw_j].ToString() : "0");
                        }
                    }
                    totalRresultWriter.AddRow(matrixRow);
                }
                totalRresultWriter.SaveToDisk();
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
Пример #22
0
        private bool GetAllPages(IListSheet listSheet)
        {
            CsvWriter mainCW   = this.GetMainCsvWriter();
            CsvWriter ztbCW    = this.GetZtbCsvWriter();
            CsvWriter sgtscCW  = this.GetSgtscCsvWriter();
            CsvWriter htbaCW   = this.GetHtbaCsvWriter();
            CsvWriter sgxkCW   = this.GetSgxkCsvWriter();
            CsvWriter jgysbaCW = this.GetJgysbaCsvWriter();
            string    detailPageUrlColumnName      = SysConfig.DetailPageUrlFieldName;
            Dictionary <string, string> projectDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailPageUrl            = row[SysConfig.DetailPageUrlFieldName];
                string detailPageName           = row[SysConfig.DetailPageNameFieldName];
                try
                {
                    bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                    if (!giveUp)
                    {
                        HtmlAgilityPack.HtmlDocument pageHtmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                        #region 基础信息
                        string   xmmc       = "";
                        string   xmbh       = "";
                        string   sjxmbh     = "";
                        string   szqh       = "";
                        string   jsdw       = "";
                        string   jsdwzzjgdm = "";
                        string   xmfl       = "";
                        string   jsxz       = "";
                        string   gcyt       = "";
                        string   ztz        = "";
                        string   zmj        = "";
                        string   lxjb       = "";
                        string   lxwh       = "";
                        HtmlNode xmmcNode   = pageHtmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"user_info spmtop\"]");
                        if (xmmcNode == null)
                        {
                            throw new Exception("没有找到项目名称节点");
                        }
                        else
                        {
                            xmmc = CommonUtil.HtmlDecode(xmmcNode.InnerText.Trim()).Trim();
                        }

                        HtmlNodeCollection projectFieldNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"query_info_box \"]/div/div[@class=\"activeTinyTabContent\"]/dl/dd");
                        if (projectFieldNodeList != null)
                        {
                            for (int j = 0; j < projectFieldNodeList.Count; j++)
                            {
                                HtmlNode projectFieldNode = projectFieldNodeList[j];
                                string   fieldText        = projectFieldNode.InnerText.Trim();
                                int      sIndex           = fieldText.IndexOf(":");
                                string   fieldName        = CommonUtil.HtmlDecode(fieldText.Substring(0, sIndex)).Trim();
                                string   fieldValue       = CommonUtil.HtmlDecode(fieldText.Substring(sIndex + 1)).Trim();
                                switch (fieldName)
                                {
                                case "项目编号":
                                    xmbh = fieldValue;
                                    break;

                                case "省级项目编号":
                                    sjxmbh = fieldValue;
                                    break;

                                case "所在区划":
                                    szqh = fieldValue;
                                    break;

                                case "建设单位":
                                    jsdw = fieldValue;
                                    break;

                                case "建设单位组织机构代码(统一社会信用代码)":
                                    jsdwzzjgdm = fieldValue;
                                    break;

                                case "项目分类":
                                    xmfl = fieldValue;
                                    break;

                                case "建设性质":
                                    jsxz = fieldValue;
                                    break;

                                case "工程用途":
                                    gcyt = fieldValue;
                                    break;

                                case "总投资":
                                    ztz = fieldValue;
                                    break;

                                case "总面积":
                                    zmj = fieldValue;
                                    break;

                                case "立项级别":
                                    lxjb = fieldValue;
                                    break;

                                case "立项文号":
                                    lxwh = fieldValue;
                                    break;
                                }
                            }
                        }
                        else
                        {
                            throw new Exception("无法获取项目基本信息属性值");
                        }

                        Dictionary <string, string> f2vs = new Dictionary <string, string>();
                        f2vs.Add("项目编号", xmbh);
                        f2vs.Add("省级项目编号", sjxmbh);
                        f2vs.Add("项目名称", xmmc);
                        f2vs.Add("所在区划", szqh);
                        f2vs.Add("建设单位", jsdw);
                        f2vs.Add("建设单位组织机构代码(统一社会信用代码)", jsdwzzjgdm);
                        f2vs.Add("项目分类", xmfl);
                        f2vs.Add("建设性质", jsxz);
                        f2vs.Add("工程用途", gcyt);
                        f2vs.Add("总投资", ztz);
                        f2vs.Add("总面积", zmj);
                        f2vs.Add("立项级别", lxjb);
                        f2vs.Add("立项文号", lxwh);
                        mainCW.AddRow(f2vs);
                        #endregion

                        #region 招投标
                        HtmlNodeCollection ztbNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_ztb\"]/table/tbody/tr[@class=\"row\"]");
                        if (ztbNodeList != null)
                        {
                            foreach (HtmlNode ztbNode in ztbNodeList)
                            {
                                HtmlNodeCollection          ztbFieldNodeList = ztbNode.SelectNodes("./td");
                                Dictionary <string, string> ztbF2vs          = new Dictionary <string, string>();
                                ztbF2vs.Add("项目编码", xmbh);
                                ztbF2vs.Add("招标类型", CommonUtil.HtmlDecode(ztbFieldNodeList[1].InnerText.Trim()));
                                ztbF2vs.Add("招标方式", CommonUtil.HtmlDecode(ztbFieldNodeList[2].InnerText.Trim()));
                                ztbF2vs.Add("中标单位名称", CommonUtil.HtmlDecode(ztbFieldNodeList[3].InnerText.Trim()));
                                ztbF2vs.Add("中标日期", CommonUtil.HtmlDecode(ztbFieldNodeList[4].InnerText.Trim()));
                                ztbF2vs.Add("中标金额(万元)", CommonUtil.HtmlDecode(ztbFieldNodeList[5].InnerText.Trim()));
                                ztbF2vs.Add("中标通知书编号", CommonUtil.HtmlDecode(ztbFieldNodeList[6].InnerText.Trim()));
                                ztbF2vs.Add("省级中标通知书编号", CommonUtil.HtmlDecode(ztbFieldNodeList[7].InnerText.Trim()));
                                ztbCW.AddRow(ztbF2vs);
                            }
                        }
                        #endregion

                        #region 施工图审查
                        HtmlNodeCollection sgtscNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_sgtsc\"]/table/tbody/tr[@class=\"row\"]");
                        if (sgtscNodeList != null)
                        {
                            foreach (HtmlNode sgtscNode in sgtscNodeList)
                            {
                                HtmlNodeCollection          sgtscFieldNodeList = sgtscNode.SelectNodes("./td");
                                Dictionary <string, string> sgtscF2vs          = new Dictionary <string, string>();
                                sgtscF2vs.Add("项目编码", xmbh);
                                sgtscF2vs.Add("施工图审查合格书编号", CommonUtil.HtmlDecode(sgtscFieldNodeList[1].InnerText.Trim()));
                                sgtscF2vs.Add("省级施工图审查合格书编号", CommonUtil.HtmlDecode(sgtscFieldNodeList[2].InnerText.Trim()));
                                sgtscF2vs.Add("勘察单位名称", CommonUtil.HtmlDecode(sgtscFieldNodeList[3].InnerText.Trim()));
                                sgtscF2vs.Add("设计单位名称", CommonUtil.HtmlDecode(sgtscFieldNodeList[4].InnerText.Trim()));
                                sgtscF2vs.Add("施工图审查机构名称", CommonUtil.HtmlDecode(sgtscFieldNodeList[5].InnerText.Trim()));
                                sgtscF2vs.Add("审查完成日期", CommonUtil.HtmlDecode(sgtscFieldNodeList[6].InnerText.Trim()));
                                sgtscCW.AddRow(sgtscF2vs);
                            }
                        }
                        #endregion

                        #region 合同备案
                        HtmlNodeCollection htbaNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_htba\"]/table/tbody/tr[@class=\"row\"]");
                        if (htbaNodeList != null)
                        {
                            foreach (HtmlNode htbaNode in htbaNodeList)
                            {
                                HtmlNodeCollection          htbaFieldNodeList = htbaNode.SelectNodes("./td");
                                Dictionary <string, string> htbaF2vs          = new Dictionary <string, string>();
                                htbaF2vs.Add("项目编码", xmbh);
                                htbaF2vs.Add("合同类别", CommonUtil.HtmlDecode(htbaFieldNodeList[1].InnerText.Trim()));
                                htbaF2vs.Add("合同备案编号", CommonUtil.HtmlDecode(htbaFieldNodeList[2].InnerText.Trim()));
                                htbaF2vs.Add("省级合同备案编号", CommonUtil.HtmlDecode(htbaFieldNodeList[3].InnerText.Trim()));
                                htbaF2vs.Add("合同金额(万元)", CommonUtil.HtmlDecode(htbaFieldNodeList[4].InnerText.Trim()));
                                htbaF2vs.Add("合同签订日期", CommonUtil.HtmlDecode(htbaFieldNodeList[5].InnerText.Trim()));
                                htbaCW.AddRow(htbaF2vs);
                            }
                        }
                        #endregion

                        #region 施工许可
                        HtmlNodeCollection sgxkNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_sgxk\"]/table/tbody/tr[@class=\"row\"]");
                        if (sgxkNodeList != null)
                        {
                            foreach (HtmlNode sgxkNode in sgxkNodeList)
                            {
                                HtmlNodeCollection          sgxkFieldNodeList = sgxkNode.SelectNodes("./td");
                                Dictionary <string, string> sgxkF2vs          = new Dictionary <string, string>();
                                sgxkF2vs.Add("项目编码", xmbh);
                                sgxkF2vs.Add("施工许可证编号", CommonUtil.HtmlDecode(sgxkFieldNodeList[1].InnerText.Trim()));
                                sgxkF2vs.Add("省级施工许可证编号", CommonUtil.HtmlDecode(sgxkFieldNodeList[2].InnerText.Trim()));
                                sgxkF2vs.Add("合同金额(万元)", CommonUtil.HtmlDecode(sgxkFieldNodeList[3].InnerText.Trim()));
                                sgxkF2vs.Add("面积(平方米)", CommonUtil.HtmlDecode(sgxkFieldNodeList[4].InnerText.Trim()));
                                sgxkF2vs.Add("发证日期", CommonUtil.HtmlDecode(sgxkFieldNodeList[5].InnerText.Trim()));
                                sgxkCW.AddRow(sgxkF2vs);
                            }
                        }
                        #endregion

                        #region 竣工验收备案
                        HtmlNodeCollection jgysbaNodeList = pageHtmlDoc.DocumentNode.SelectNodes("//div[@id=\"tab_jgysba\"]/table/tbody/tr[@class=\"row\"]");
                        if (jgysbaNodeList != null)
                        {
                            foreach (HtmlNode jgysbaNode in jgysbaNodeList)
                            {
                                HtmlNodeCollection          jgysbaFieldNodeList = jgysbaNode.SelectNodes("./td");
                                Dictionary <string, string> jgysbaF2vs          = new Dictionary <string, string>();
                                jgysbaF2vs.Add("项目编码", xmbh);
                                jgysbaF2vs.Add("竣工备案编号", CommonUtil.HtmlDecode(jgysbaFieldNodeList[1].InnerText.Trim()));
                                jgysbaF2vs.Add("省级竣工备案编号", CommonUtil.HtmlDecode(jgysbaFieldNodeList[2].InnerText.Trim()));
                                jgysbaF2vs.Add("实际造价(万元)", CommonUtil.HtmlDecode(jgysbaFieldNodeList[3].InnerText.Trim()));
                                jgysbaF2vs.Add("实际面积(平方米)", CommonUtil.HtmlDecode(jgysbaFieldNodeList[4].InnerText.Trim()));
                                jgysbaF2vs.Add("实际开工日期", CommonUtil.HtmlDecode(jgysbaFieldNodeList[5].InnerText.Trim()));
                                jgysbaF2vs.Add("实际竣工验收日期", CommonUtil.HtmlDecode(jgysbaFieldNodeList[6].InnerText.Trim()));
                                jgysbaCW.AddRow(jgysbaF2vs);
                            }
                        }
                        #endregion
                    }
                }
                catch (Exception ex)
                {
                    //throw ex;
                    string dir       = this.RunPage.GetDetailSourceFileDir();
                    string toDir     = Path.Combine(Path.GetDirectoryName(dir), "deleted");
                    string fileUrl   = this.RunPage.GetFilePath(detailPageUrl, dir);
                    string toFileUrl = this.RunPage.GetFilePath(detailPageUrl, toDir);
                    File.Move(fileUrl, toFileUrl);
                    this.RunPage.InvokeAppendLogText("文件不完整,删除", LogLevelType.Error, true);
                }
            }

            mainCW.SaveToDisk();
            ztbCW.SaveToDisk();
            sgtscCW.SaveToDisk();
            htbaCW.SaveToDisk();
            sgxkCW.SaveToDisk();
            jgysbaCW.SaveToDisk();
            return(true);
        }
        private void GetPTsMatrix(IListSheet listSheet)
        {
            string[] parameters     = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
            string   sourceFilePath = parameters[0];
            string   destFilePath   = parameters[1];

            ExcelReader er             = new ExcelReader(sourceFilePath);
            int         sourceRowCount = er.GetRowCount();

            Dictionary <string, int> allPTCountDic = new Dictionary <string, int>();

            List <string> allPTList = new List <string>();

            for (int i = 0; i < sourceRowCount; i++)
            {
                Dictionary <string, string> sourceRow = er.GetFieldValues(i);
                string[] itemPTs = sourceRow["pts"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string itemPT in itemPTs)
                {
                    if (allPTCountDic.ContainsKey(itemPT))
                    {
                        allPTCountDic[itemPT] = allPTCountDic[itemPT] + 1;
                    }
                    else
                    {
                        allPTList.Add(itemPT);
                        allPTCountDic.Add(itemPT, 1);
                    }
                }
            }

            //如果出现少于等于2次,那么忽略此属性
            int                       ignoreNum = 10;
            List <string>             ptList    = new List <string>();
            Dictionary <string, bool> ptListDic = new Dictionary <string, bool>();

            foreach (string itemPT in allPTList)
            {
                if (allPTCountDic[itemPT] > ignoreNum)
                {
                    ptList.Add(itemPT);
                    ptListDic.Add(itemPT, true);
                }
            }

            int maxTime = 1;

            Dictionary <string, Dictionary <string, int> > ptToPTDic = new Dictionary <string, Dictionary <string, int> >();

            for (int i = 0; i < sourceRowCount; i++)
            {
                Dictionary <string, string> sourceRow = er.GetFieldValues(i);
                string[] itemPTs = sourceRow["pts"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string fromItemPT in itemPTs)
                {
                    if (ptListDic.ContainsKey(fromItemPT))
                    {
                        if (!ptToPTDic.ContainsKey(fromItemPT))
                        {
                            ptToPTDic.Add(fromItemPT, new Dictionary <string, int>());
                        }
                        Dictionary <string, int> ptDic = ptToPTDic[fromItemPT];

                        if (!ptDic.ContainsKey(fromItemPT))
                        {
                            ptDic.Add(fromItemPT, 1);
                        }
                        else
                        {
                            ptDic[fromItemPT] = ptDic[fromItemPT] + 1;
                        }

                        foreach (string toItemPT in itemPTs)
                        {
                            if (ptListDic.ContainsKey(toItemPT))
                            {
                                if (fromItemPT != toItemPT)
                                {
                                    if (!ptDic.ContainsKey(toItemPT))
                                    {
                                        ptDic.Add(toItemPT, 1);
                                    }
                                    else
                                    {
                                        int tmpValue = ptDic[toItemPT] + 1;
                                        ptDic[toItemPT] = tmpValue;
                                        if (tmpValue > maxTime)
                                        {
                                            maxTime = tmpValue;
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }



            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("ptToPT", 0);
            for (int i = 0; i < ptList.Count; i++)
            {
                resultColumnDic.Add(ptList[i], i + 1);
            }

            CsvWriter ptMatrixCW = new CsvWriter(destFilePath, resultColumnDic);

            foreach (string fromPT in ptList)
            {
                Dictionary <string, string> resultRow = new Dictionary <string, string>();
                resultRow.Add("ptToPT", fromPT);
                Dictionary <string, int> propertyDic = ptToPTDic.ContainsKey(fromPT) ? ptToPTDic[fromPT] : null;
                foreach (string toPT in ptList)
                {
                    double value = fromPT == toPT ? 0 : (propertyDic == null || !propertyDic.ContainsKey(toPT) || propertyDic[toPT] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)propertyDic[toPT]));
                    resultRow.Add(toPT, value.ToString());
                }
                ptMatrixCW.AddRow(resultRow);
            }

            ptMatrixCW.SaveToDisk();

            string allPTNameFilePath = destFilePath + "_AllPTName.xlsx";
            Dictionary <string, int> allPTNameColumnDic = new Dictionary <string, int>();

            allPTNameColumnDic.Add("name", 0);
            allPTNameColumnDic.Add("count", 1);
            Dictionary <string, string> allPTNameColumnFormats = new Dictionary <string, string>();

            allPTNameColumnFormats.Add("count", "#0");
            ExcelWriter allPTNameEW = new ExcelWriter(allPTNameFilePath, "List", allPTNameColumnDic, allPTNameColumnFormats);

            for (int i = 0; i < allPTList.Count; i++)
            {
                string fromPT = allPTList[i];
                Dictionary <string, object> resultRow = new Dictionary <string, object>();
                resultRow.Add("name", fromPT);
                resultRow.Add("count", allPTCountDic[fromPT]);
                allPTNameEW.AddRow(resultRow);
            }
            allPTNameEW.SaveToDisk();

            string ptNameFilePath = destFilePath + "_PTName.xlsx";
            Dictionary <string, int> ptNameColumnDic = new Dictionary <string, int>();

            ptNameColumnDic.Add("name", 0);
            ExcelWriter ptNameEW = new ExcelWriter(ptNameFilePath, "List", ptNameColumnDic);

            for (int i = 0; i < ptList.Count; i++)
            {
                string fromPT = ptList[i];
                Dictionary <string, string> resultRow = new Dictionary <string, string>();
                resultRow.Add("name", fromPT);
                ptNameEW.AddRow(resultRow);
            }
            ptNameEW.SaveToDisk();


            string        ptArrayFilePath      = destFilePath + "_Array.txt";
            StringBuilder ptArrayStringBuilder = new StringBuilder();

            ptArrayStringBuilder.Append("arr = [");
            for (int i = 0; i < ptList.Count; i++)
            {
                string fromPT = ptList[i];
                ptArrayStringBuilder.Append((i == 0 ? "" : ", \r\n") + "[");
                Dictionary <string, string> resultRow = new Dictionary <string, string>();
                resultRow.Add("ptToPT", fromPT);
                Dictionary <string, int> ptDic = ptToPTDic.ContainsKey(fromPT) ? ptToPTDic[fromPT] : null;
                for (int j = 0; j < ptListDic.Count; j++)
                {
                    string toPT  = ptList[j];
                    double value = fromPT == toPT ? 0 : (ptDic == null || !ptDic.ContainsKey(toPT) || ptDic[toPT] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)ptDic[toPT]));
                    resultRow.Add(toPT, value.ToString());
                    ptArrayStringBuilder.Append((j == 0 ? "" : ", ") + value.ToString());
                }
                ptMatrixCW.AddRow(resultRow);
                ptArrayStringBuilder.Append("]");
            }
            ptArrayStringBuilder.Append("]");
            FileHelper.SaveTextToFile(ptArrayStringBuilder.ToString(), ptArrayFilePath);
        }
Пример #24
0
        private bool GetHouseListInfos(IListSheet listSheet)
        {
            string detailPageUrlColumnName = SysConfig.DetailPageUrlFieldName;
            string pageSourceDir           = this.RunPage.GetDetailSourceFileDir();

            Dictionary <string, string> houseBuildingDic = new Dictionary <string, string>();

            CsvWriter resultEW = this.CreateResultCsvWriter();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string  pageUrl       = listSheet.PageUrlList[i];
                    string  localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir);
                    string  fileText      = FileHelper.GetTextFromFile(localFilePath);
                    JObject rootJo        = JObject.Parse(fileText);
                    string  houseStatusNo = rootJo["housestatus"].ToString();
                    string  fid           = rootJo["fid"].ToString();
                    string  unitArea      = rootJo["unitarea"].ToString();
                    string  apportioArea  = rootJo["apportioarea"].ToString();
                    string  usedTypeNo    = rootJo["usedtypeno"].ToString();
                    string  houseArea     = rootJo["housearea"].ToString();
                    string  houseStatus   = "";
                    switch (houseStatusNo)
                    {
                    case "15701":
                        houseStatus = "可售";
                        break;

                    case "15702":
                        houseStatus = "已预订";
                        break;

                    case "15703":
                        houseStatus = "已备案";
                        break;

                    case "15704":
                        houseStatus = "已签约";
                        break;

                    case "15705":
                        houseStatus = "可租";
                        break;

                    case "15707":
                        houseStatus = "不可租售";
                        break;

                    case "15709":
                        houseStatus = "已预订";
                        break;

                    case "15710":
                        houseStatus = "查封";
                        break;

                    case "15711":
                        houseStatus = "冻结";
                        break;

                    default:
                        houseStatus = "可售";
                        break;
                    }


                    Dictionary <string, string> f2vs = new Dictionary <string, string>();
                    f2vs.Add("projectId", row["projectId"]);
                    f2vs.Add("项目名称", row["项目名称"]);
                    f2vs.Add("buildingId", row["buildingId"]);
                    f2vs.Add("楼名称", row["楼名称"]);
                    f2vs.Add("是否住宅房屋", row["是否住宅房屋"]);
                    f2vs.Add("单元号", row["单元号"]);
                    f2vs.Add("顺序号", row["顺序号"]);
                    f2vs.Add("楼层", row["楼层"]);
                    f2vs.Add("houseId", row["houseId"]);
                    f2vs.Add("houseName", row["houseName"]);
                    f2vs.Add("房屋面积", houseArea);
                    f2vs.Add("套内面积", unitArea);
                    f2vs.Add("公摊面积", apportioArea);
                    f2vs.Add("房屋用途", usedTypeNo);
                    f2vs.Add("销售状态编码", houseStatusNo);
                    f2vs.Add("销售状态", houseStatus);
                    resultEW.AddRow(f2vs);
                }
            }

            resultEW.SaveToDisk();

            return(true);
        }
Пример #25
0
        private void GetCities(IListSheet listSheet)
        {
            string pageSourceDir = this.RunPage.GetDetailSourceFileDir();

            CsvWriter ew = this.GetCsvWriter();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                string detailUrl  = row["detailPageUrl"];
                string detailName = row["detailPageName"];
                string year       = row["year"];
                string parentCode = row["code"];
                string parentName = row["name"];

                //添加父节点到下一级文件
                Dictionary <string, string> parentF2vs = new Dictionary <string, string>();
                parentF2vs.Add("detailPageUrl", detailUrl);
                parentF2vs.Add("detailPageName", detailName);
                parentF2vs.Add("year", year);
                parentF2vs.Add("code", parentCode);
                parentF2vs.Add("name", parentName);
                parentF2vs.Add("giveUpGrab", "Y");
                ew.AddRow(parentF2vs);

                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    Uri    uri         = new Uri(detailUrl);
                    string queryString = uri.Query;
                    string baseUrl     = detailUrl.Substring(0, detailUrl.Length - queryString.Length);
                    baseUrl = baseUrl.Substring(0, baseUrl.Length - uri.Segments[uri.Segments.Length - 1].Length);

                    HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i, Encoding.GetEncoding("gb2312"));
                    try
                    {
                        HtmlNodeCollection cityNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"citytr\"]");
                        if (cityNodeList != null)
                        {
                            for (int j = 0; j < cityNodeList.Count; j++)
                            {
                                HtmlNode           cityNode          = cityNodeList[j];
                                HtmlNodeCollection cityFieldNodeList = cityNode.SelectNodes("./td");
                                HtmlNode           cityCodeNode      = cityFieldNodeList[0];
                                HtmlNode           cityNameNode      = cityFieldNodeList[1];
                                string             cityCode          = cityCodeNode.InnerText.Trim();
                                string             cityName          = cityNameNode.InnerText.Trim();
                                HtmlNode           linkNode          = cityCodeNode.SelectSingleNode("./a");

                                string hrefValue = "";
                                if (linkNode != null)
                                {
                                    hrefValue = linkNode.GetAttributeValue("href", "");
                                }

                                Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                f2vs.Add("detailPageUrl", baseUrl + hrefValue);
                                f2vs.Add("detailPageName", year + "_" + cityCode);
                                f2vs.Add("year", year);
                                f2vs.Add("code", cityCode);
                                f2vs.Add("name", cityName);
                                f2vs.Add("giveUpGrab", hrefValue.Length == 0 ? "Y" : "");
                                ew.AddRow(f2vs);
                            }
                        }

                        HtmlNodeCollection townNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"towntr\"]");
                        if (townNodeList != null)
                        {
                            for (int j = 0; j < townNodeList.Count; j++)
                            {
                                HtmlNode           townNode          = townNodeList[j];
                                HtmlNodeCollection townFieldNodeList = townNode.SelectNodes("./td");
                                HtmlNode           townCodeNode      = townFieldNodeList[0];
                                HtmlNode           townNameNode      = townFieldNodeList[1];
                                string             townCode          = townCodeNode.InnerText.Trim();
                                string             townName          = townNameNode.InnerText.Trim();
                                HtmlNode           linkNode          = townCodeNode.SelectSingleNode("./a");

                                string hrefValue = "";
                                if (linkNode != null)
                                {
                                    hrefValue = linkNode.GetAttributeValue("href", "");
                                }

                                Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                f2vs.Add("detailPageUrl", baseUrl + hrefValue);
                                f2vs.Add("detailPageName", year + "_" + townCode);
                                f2vs.Add("year", year);
                                f2vs.Add("code", townCode);
                                f2vs.Add("name", townName);
                                f2vs.Add("giveUpGrab", hrefValue.Length == 0 ? "Y" : "");
                                ew.AddRow(f2vs);
                            }
                        }

                        HtmlNodeCollection countyNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"countytr\"]");
                        if (countyNodeList != null)
                        {
                            for (int j = 0; j < countyNodeList.Count; j++)
                            {
                                HtmlNode           countyNode          = countyNodeList[j];
                                HtmlNodeCollection countyFieldNodeList = countyNode.SelectNodes("./td");
                                HtmlNode           countyCodeNode      = countyFieldNodeList[0];
                                HtmlNode           countyNameNode      = countyFieldNodeList[1];
                                string             countyCode          = countyCodeNode.InnerText.Trim();
                                string             countyName          = countyNameNode.InnerText.Trim();
                                HtmlNode           linkNode            = countyCodeNode.SelectSingleNode("./a");

                                string hrefValue = "";
                                if (linkNode != null)
                                {
                                    hrefValue = linkNode.GetAttributeValue("href", "");
                                }

                                Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                f2vs.Add("detailPageUrl", baseUrl + hrefValue);
                                f2vs.Add("detailPageName", year + "_" + countyCode);
                                f2vs.Add("year", year);
                                f2vs.Add("code", countyCode);
                                f2vs.Add("name", countyName);
                                f2vs.Add("giveUpGrab", hrefValue.Length == 0 ? "Y" : "");
                                ew.AddRow(f2vs);
                            }
                        }

                        HtmlNodeCollection villageNodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class=\"villagetr\"]");
                        if (villageNodeList != null)
                        {
                            for (int j = 0; j < villageNodeList.Count; j++)
                            {
                                HtmlNode           villageNode          = villageNodeList[j];
                                HtmlNodeCollection villageFieldNodeList = villageNode.SelectNodes("./td");
                                HtmlNode           villageCodeNode      = villageFieldNodeList[0];
                                HtmlNode           villageTypeNode      = villageFieldNodeList[1];
                                HtmlNode           villageNameNode      = villageFieldNodeList[2];
                                string             villageCode          = villageCodeNode.InnerText.Trim();
                                string             villageType          = villageTypeNode.InnerText.Trim();
                                string             villageName          = villageNameNode.InnerText.Trim();

                                Dictionary <string, string> f2vs = new Dictionary <string, string>();
                                f2vs.Add("detailPageUrl", year + "_" + villageCode);
                                f2vs.Add("detailPageName", year + "_" + villageCode);
                                f2vs.Add("year", year);
                                f2vs.Add("code", villageCode);
                                f2vs.Add("name", villageName);
                                f2vs.Add("giveUpGrab", "Y");
                                f2vs.Add("城乡分类代码", villageType);
                                ew.AddRow(f2vs);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                }
            }

            ew.SaveToDisk();
        }
Пример #26
0
        private void GetTagsMatrix(IListSheet listSheet)
        {
            string[] parameters     = this.Parameters.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
            string   sourceFilePath = parameters[0];
            string   destFilePath   = parameters[1];

            ExcelReader er             = new ExcelReader(sourceFilePath);
            int         sourceRowCount = er.GetRowCount();

            List <string> tagList = new List <string>();

            for (int i = 0; i < sourceRowCount; i++)
            {
                Dictionary <string, string> sourceRow = er.GetFieldValues(i);
                string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string itemTag in itemTags)
                {
                    if (!tagList.Contains(itemTag))
                    {
                        tagList.Add(itemTag);
                    }
                }
            }

            int maxTime = 1;

            Dictionary <string, Dictionary <string, int> > tagToTagDic = new Dictionary <string, Dictionary <string, int> >();

            for (int i = 0; i < sourceRowCount; i++)
            {
                Dictionary <string, string> sourceRow = er.GetFieldValues(i);
                string[] itemTags = sourceRow["tags"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string fromTag in itemTags)
                {
                    if (!tagToTagDic.ContainsKey(fromTag))
                    {
                        tagToTagDic.Add(fromTag, new Dictionary <string, int>());
                    }
                    Dictionary <string, int> tagDic = tagToTagDic[fromTag];

                    if (!tagDic.ContainsKey(fromTag))
                    {
                        tagDic.Add(fromTag, 1);
                    }
                    else
                    {
                        tagDic[fromTag] = tagDic[fromTag] + 1;
                    }

                    foreach (string toTag in itemTags)
                    {
                        if (fromTag != toTag)
                        {
                            if (!tagDic.ContainsKey(toTag))
                            {
                                tagDic.Add(toTag, 1);
                            }
                            else
                            {
                                int tmpValue = tagDic[toTag] + 1;
                                tagDic[toTag] = tmpValue;
                                if (tmpValue > maxTime)
                                {
                                    maxTime = tmpValue;
                                }
                            }
                        }
                    }
                }
            }



            Dictionary <string, int> resultColumnDic = new Dictionary <string, int>();

            resultColumnDic.Add("tagToTag", 0);
            for (int i = 0; i < tagList.Count; i++)
            {
                resultColumnDic.Add(tagList[i], i + 1);
            }

            CsvWriter tagMatrixCW = new CsvWriter(destFilePath, resultColumnDic);

            foreach (string fromTag in tagList)
            {
                Dictionary <string, string> resultRow = new Dictionary <string, string>();
                resultRow.Add("tagToTag", fromTag);
                Dictionary <string, int> tagDic = tagToTagDic.ContainsKey(fromTag) ? tagToTagDic[fromTag] : null;
                foreach (string toTag in tagList)
                {
                    double value = fromTag == toTag ? 0 : (tagDic == null || !tagDic.ContainsKey(toTag) || tagDic[toTag] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)tagDic[toTag]));
                    resultRow.Add(toTag, value.ToString());
                }
                tagMatrixCW.AddRow(resultRow);
            }

            tagMatrixCW.SaveToDisk();

            string tagNameFilePath = destFilePath + "_TagName.xlsx";
            Dictionary <string, int> tagNameColumnDic = new Dictionary <string, int>();

            tagNameColumnDic.Add("name", 0);
            ExcelWriter tagNameEW = new ExcelWriter(tagNameFilePath, "List", tagNameColumnDic);

            for (int i = 0; i < tagList.Count; i++)
            {
                string fromTag = tagList[i];
                Dictionary <string, string> resultRow = new Dictionary <string, string>();
                resultRow.Add("name", fromTag);
                tagNameEW.AddRow(resultRow);
            }
            tagNameEW.SaveToDisk();


            string        tagArrayFilePath      = destFilePath + "_Array.txt";
            StringBuilder tagArrayStringBuilder = new StringBuilder();

            tagArrayStringBuilder.Append("arr = [");
            for (int i = 0; i < tagList.Count; i++)
            {
                string fromTag = tagList[i];
                tagArrayStringBuilder.Append((i == 0 ? "" : ", \r\n") + "[");
                Dictionary <string, string> resultRow = new Dictionary <string, string>();
                resultRow.Add("tagToTag", fromTag);
                Dictionary <string, int> tagDic = tagToTagDic.ContainsKey(fromTag) ? tagToTagDic[fromTag] : null;
                for (int j = 0; j < tagList.Count; j++)
                {
                    string toTag = tagList[j];
                    double value = fromTag == toTag ? 0 : (tagDic == null || !tagDic.ContainsKey(toTag) || tagDic[toTag] == 0 ? 2 * (double)maxTime : ((double)maxTime / (double)tagDic[toTag]));
                    resultRow.Add(toTag, value.ToString());
                    tagArrayStringBuilder.Append((j == 0 ? "" : ", ") + value.ToString());
                }
                tagMatrixCW.AddRow(resultRow);
                tagArrayStringBuilder.Append("]");
            }
            tagArrayStringBuilder.Append("]");
            FileHelper.SaveTextToFile(tagArrayStringBuilder.ToString(), tagArrayFilePath);
        }
Пример #27
0
        private void GetCategoryMenuMaps(ExcelReader cityEr, string city, Dictionary <string, Dictionary <string, string> > allShopDic)
        {
            CsvWriter cw        = this.CreateCategoryMenuMapsFileWriter(city);
            string    sourceDir = this.RunPage.GetDetailSourceFileDir();
            int       rowCount  = cityEr.GetRowCount();

            for (int i = 0; i < rowCount; i++)
            {
                Dictionary <string, string> cityShopRow = cityEr.GetFieldValues(i);
                string detailPageUrl = cityShopRow[SysConfig.DetailPageUrlFieldName];
                if (allShopDic.ContainsKey(detailPageUrl))
                {
                    Dictionary <string, string> listRow = allShopDic[detailPageUrl];
                    bool giveUp = "Y".Equals(listRow[SysConfig.GiveUpGrabFieldName]);
                    if (!giveUp)
                    {
                        string filePath = this.RunPage.GetFilePath(detailPageUrl, sourceDir);

                        string jsonText = FileHelper.GetTextFromFile(filePath);
                        try
                        {
                            JObject rootJo  = JObject.Parse(jsonText);
                            JObject menuJo  = rootJo.GetValue("menu") as JObject;
                            bool    gotMenu = false;
                            if (menuJo != null)
                            {
                                string bodyJo = menuJo.GetValue("body").ToString();
                                if (bodyJo != null && bodyJo.Length > 0)
                                {
                                    JArray categoryArray = JArray.Parse(bodyJo);
                                    if (categoryArray.Count > 0)
                                    {
                                        gotMenu = true;

                                        for (int j = 0; j < categoryArray.Count; j++)
                                        {
                                            JObject categoryJo          = categoryArray[j] as JObject;
                                            JArray  foodArray           = categoryJo.GetValue("foods") as JArray;
                                            string  categoryId          = categoryJo.GetValue("id") == null ? "" : categoryJo.GetValue("id").ToString();
                                            string  categoryName        = categoryJo.GetValue("name") == null ? "" : categoryJo.GetValue("name").ToString();
                                            string  categoryDescription = categoryJo.GetValue("description") == null ? "" : categoryJo.GetValue("description").ToString();
                                            if (foodArray != null)
                                            {
                                                for (int k = 0; k < foodArray.Count; k++)
                                                {
                                                    JObject foodJo        = foodArray[k] as JObject;
                                                    string  foodId        = foodJo.GetValue("item_id") == null ? "" : foodJo.GetValue("item_id").ToString();
                                                    string  foodName      = foodJo.GetValue("name") == null ? "" : foodJo.GetValue("name").ToString();
                                                    string  rating        = foodJo.GetValue("rating") == null ? "" : foodJo.GetValue("rating").ToString();
                                                    string  monthSales    = foodJo.GetValue("month_sales") == null ? "" : foodJo.GetValue("month_sales").ToString();
                                                    string  ratingCount   = foodJo.GetValue("rating_count") == null ? "" : foodJo.GetValue("rating_count").ToString();
                                                    string  statisfyCount = foodJo.GetValue("statisfy_count") == null ? "" : foodJo.GetValue("statisfy_count").ToString();
                                                    string  statisfyRate  = foodJo.GetValue("statisfy_rate") == null ? "" : foodJo.GetValue("statisfy_rate").ToString();
                                                    string  minPurchase   = foodJo.GetValue("min_purchase") == null ? "" : foodJo.GetValue("min_purchase").ToString();

                                                    Dictionary <string, string> categoryFoodRow = new Dictionary <string, string>();
                                                    categoryFoodRow.Add("id", listRow["id"]);
                                                    categoryFoodRow.Add("name", listRow["name"]);
                                                    categoryFoodRow.Add("address", listRow["address"]);
                                                    categoryFoodRow.Add("description", listRow["description"]);
                                                    categoryFoodRow.Add("latitude", listRow["latitude"]);
                                                    categoryFoodRow.Add("longitude", listRow["longitude"]);
                                                    categoryFoodRow.Add("phone", listRow["phone"]);
                                                    categoryFoodRow.Add("promotion_info", listRow["promotion_info"]);

                                                    categoryFoodRow.Add("categoryId", categoryId);
                                                    categoryFoodRow.Add("categoryName", categoryName);
                                                    categoryFoodRow.Add("categoryDescription", categoryDescription);
                                                    categoryFoodRow.Add("foodId", foodId);
                                                    categoryFoodRow.Add("foodName", foodName);
                                                    categoryFoodRow.Add("rating", rating);
                                                    categoryFoodRow.Add("monthSales", monthSales);
                                                    categoryFoodRow.Add("ratingCount", ratingCount);
                                                    categoryFoodRow.Add("statisfyCount", statisfyCount);
                                                    categoryFoodRow.Add("statisfyRate", statisfyRate);
                                                    categoryFoodRow.Add("minPurchase", minPurchase);
                                                    cw.AddRow(categoryFoodRow);
                                                }
                                            }
                                        }
                                    }
                                }
                            }

                            /*
                             * if (!gotMenu)
                             * {
                             *  this.RunPage.InvokeAppendLogText("(" + (i + 1).ToString() + "/" + rowCount.ToString() + ")删除文件 " + filePath, LogLevelType.System, true);
                             *  File.Delete(filePath);
                             * }*/
                        }
                        catch (Exception ex)
                        {
                            this.RunPage.InvokeAppendLogText(ex.Message + ". FilePath = " + filePath, LogLevelType.System, true);
                        }
                    }
                }
            }
            cw.SaveToDisk();
        }
Пример #28
0
        public void CsvWriterReaderAccuracyTestShouldHaveAccurateData()
        {
            File.Delete("Somefile.csv");

            var originalDataList = new List <TestData>();
            var data1            = new TestData
            {
                Region = "Ealing,\n London", Sales = 10000, DateOpened = new DateTime(2012, 04, 23)
            };

            originalDataList.Add(data1);
            var data2 = new TestData {
                Region = "\"\"Glasgow in Scotland\"\"", Sales = 50000, DateOpened = new DateTime(2012, 4, 1, 15, 31, 0)
            };

            originalDataList.Add(data2);
            var data3 = new TestData {
                Region = "USA 'Gods own country'", Sales = 40000, DateOpened = new DateTime(2011, 12, 29, 9, 30, 0)
            };

            originalDataList.Add(data3);

            var exportData = new CsvWriter();

            exportData.AddRow();
            exportData["Region"]      = data1.Region;
            exportData["Sales"]       = Convert.ToString(data1.Sales);
            exportData["Date Opened"] = Convert.ToString(data1.DateOpened);

            exportData.AddRow();
            exportData["Region"]      = data2.Region;
            exportData["Sales"]       = Convert.ToString(data2.Sales);
            exportData["Date Opened"] = Convert.ToString(data2.DateOpened);

            exportData.AddRow();
            exportData["Region"]      = data3.Region;
            exportData["Sales"]       = Convert.ToString(data3.Sales);
            exportData["Date Opened"] = Convert.ToString(data3.DateOpened);

            exportData.ExportToFile("Somefile.csv");

            bool readingHeader = true;

            using (var reader = new CsvFileReader("Somefile.csv"))
            {
                var row = new CsvRow();

                var newDataList = new List <TestData>();
                while (reader.ReadRow(row))
                {
                    var headersList = new List <string>();
                    if (readingHeader)
                    {
                        headersList.AddRange(row);

                        headersList.ToArray();
                        readingHeader = false;
                    }
                    else
                    {
                        var outData = new TestData
                        {
                            Region     = row[0],
                            Sales      = Convert.ToInt32(row[1]),
                            DateOpened = Convert.ToDateTime(row[2])
                        };

                        newDataList.Add(outData);
                    }
                }

                var resultData = from newData in newDataList
                                 join originalData in originalDataList on newData.Region equals originalData.Region
                                 select new { NewData = newData, OriginalData = originalData };

                foreach (var data in resultData)
                {
                    Assert.IsTrue(
                        data.NewData.Sales == data.OriginalData.Sales &&
                        data.NewData.Region == data.OriginalData.Region &&
                        data.NewData.DateOpened == data.OriginalData.DateOpened);
                }
            }
        }