/// <summary> /// step 1: check Config info /// step 2: get postdata from website /// step 3: extract data from website /// </summary> private List <ELWExtractEntity> ExtractDataFromWebsite(List <string> listRic) { List <ELWExtractEntity> listELWEntity = new List <ELWExtractEntity>(); //string strStartDate = startDate.ToString("yyyyMMdd"); string strStartDate = ""; string strEndDate = endDate.ToString("yyyyMMdd"); if (listRic == null || listRic.Count == 0) { Logger.Log(string.Format("can't get ric from txt file. please check txt file.")); return(null); } if (listRic.Count == 1 && listRic[0].Trim() == "") { Logger.Log(string.Format("txt file is empty.")); return(null); } foreach (string strRic in listRic) { if (string.IsNullOrEmpty(strRic.Trim())) { Logger.Log(string.Format("This ric is invalid.")); continue; } ELWExtractEntity elwTmp = GetELWExtractEntityStepOne(strStartDate, strEndDate, strRic); if (elwTmp == null) { Logger.Log(string.Format("error when search Ric{0} from {1} to {2}.", strRic, strStartDate, strEndDate)); continue; } listELWEntity.Add(elwTmp); } return(listELWEntity); }
private ELWExtractEntity GetELWExtractEntityStepOne(string strStartDate, string strEndDate, string strRic) { string strUrl = @"http://isin.krx.co.kr/srch/srch.do?method=srchList"; string strPostData = string.Format("std_cd_grnt_start_dd={0}&std_cd_grnt_end_dd={1}&std_cd_grnt=1&searchRadio2=11&searchRadio1=11&searchRadio=11&list_start_dd=&list_end_dd=&listRadio=1&isu_start_dd=&isu_end_dd=&isuRadio=1&isur_cd=&isur_nm={2}&com_nm=", strStartDate, strEndDate, strRic); string strPageSource = string.Empty; ELWExtractEntity tmp = new ELWExtractEntity(); try { AdvancedWebClient wc = new AdvancedWebClient(); HtmlDocument htc = new HtmlDocument(); strPageSource = WebClientUtil.GetPageSource(wc, strUrl, 300000, strPostData); if (string.IsNullOrEmpty(strPageSource)) { Logger.Log(string.Format("return response is null,when query ric:{0}", strRic)); if (!listRicNoResponse.Contains(strRic)) { listRicNoResponse.Add(strRic); } return(null); } htc.LoadHtml(strPageSource); HtmlNodeCollection tables = htc.DocumentNode.SelectNodes(".//table"); HtmlNode table = tables[1]; HtmlNodeCollection trs = table.SelectNodes(".//tr"); if (trs.Count == 1) { Logger.Log(string.Format("can't get ric in strPageSource,when query ric:{0}", strRic)); if (!listRicNoResponse.Contains(strRic)) { listRicNoResponse.Add(strRic); } return(null); } else if (trs.Count >= 2) { string std_cd = trs[1].SelectNodes(".//td")[1].InnerText.Replace(" ", "").Replace("\n", "").Replace("\r", "").Replace("\t", "").Trim(); string isu_nm = trs[1].SelectNodes(".//td")[3].InnerText.Replace(" ", "").Replace("\n", "").Replace("\r", "").Replace("\t", "").Trim(); tmp = GetGetELWExtractEntityStepTwo(std_cd, isu_nm); } return(tmp); } catch (Exception ex) { if (!listRicError.Contains(strRic)) { listRicError.Add(strRic); } Logger.Log(string.Format("Error found in function: {0}. Exception message: {1}", "GetELWExtractEntityStepOne", ex.Message)); return(null); } }
private ELWExtractEntity GetGetELWExtractEntityStepTwo(string std_cd, string isu_nm) { string strUrl = @"http://isin.krx.co.kr/srch/srch.do?method=srchPopup11"; string stdcd_type = "11"; string mod_del_cd = ""; string pershr_isu_prc = ""; string isu_shrs = ""; string strPostData = string.Format("stdcd_type={0}&std_cd={1}&mod_del_cd={2}&isu_nm={3}&pershr_isu_prc={4}&isu_shrs={5}", stdcd_type, std_cd, mod_del_cd, isu_nm, pershr_isu_prc, isu_shrs); string strPageSource = string.Empty; ELWExtractEntity tmp = new ELWExtractEntity(); try { AdvancedWebClient wc = new AdvancedWebClient(); HtmlDocument htc = new HtmlDocument(); strPageSource = WebClientUtil.GetPageSource(wc, strUrl, 300000, strPostData); if (string.IsNullOrEmpty(strPageSource)) { Logger.Log(string.Format("return response is null,when query ric:{0}", std_cd)); if (!listRicNoResponse.Contains(std_cd)) { listRicNoResponse.Add(std_cd); } return(null); } htc.LoadHtml(strPageSource); HtmlNodeCollection tables = htc.DocumentNode.SelectNodes(".//table"); if (tables.Count < 5) { Logger.Log(string.Format("tables.count<5,so missing data in the pageSource ,current ric:", isu_nm)); if (!listRicNoResponse.Contains(isu_nm)) { listRicNoResponse.Add(isu_nm); } return(null); } HtmlNode table1 = tables[1]; HtmlNodeCollection trs1 = table1.SelectNodes(".//tr"); //11*2=22 HtmlNode table2 = tables[2]; HtmlNodeCollection trs2 = table2.SelectNodes(".//tr"); //0*2+1*1+2*1=4 HtmlNode table3 = tables[3]; HtmlNodeCollection trs3 = table3.SelectNodes(".//tr"); //3*1=3 HtmlNode table4 = tables[4]; HtmlNodeCollection trs4 = table4.SelectNodes(".//tr"); //1*1=1 if (trs1.Count < 11 || trs2.Count < 3 || trs3.Count < 3 || trs4.Count < 1) { Logger.Log(string.Format("trs1.count is too small,so missing data in the pageSource ,current ric:", isu_nm)); if (!listRicNoResponse.Contains(isu_nm)) { listRicNoResponse.Add(isu_nm); } return(null); } //first table left on ELW website tmp.IssuingAuthorityCongenial = FormatInnerText(trs1[0].SelectNodes(".//td")[0].InnerText); tmp.StandardCongenial = FormatInnerText(trs1[1].SelectNodes(".//td")[0].InnerText); tmp.KoreanProjectName = FormatInnerText(trs1[2].SelectNodes(".//td")[0].InnerText); tmp.FinancialProducts = FormatInnerText(trs1[3].SelectNodes(".//td")[0].InnerText); tmp.ListedOrNot = FormatInnerText(trs1[4].SelectNodes(".//td")[0].InnerText); tmp.Listed = FormatInnerText(trs1[5].SelectNodes(".//td")[0].InnerText); tmp.IssueNumber = FormatInnerText(trs1[6].SelectNodes(".//td")[0].InnerText); tmp.ReleaseTheUnitPrice = FormatInnerText(trs1[7].SelectNodes(".//td")[0].InnerText); tmp.Money = FormatInnerText(trs1[8].SelectNodes(".//td")[0].InnerText); tmp.ReleaseForm = FormatInnerText(trs1[9].SelectNodes(".//td")[0].InnerText); tmp.StandardNonStandard = FormatInnerText(trs1[10].SelectNodes(".//td")[0].InnerText); //first table right on ELW website tmp.IssuingAuthority = FormatInnerText(trs1[0].SelectNodes(".//td")[1].InnerText); tmp.ShortenTheCongenial = FormatInnerText(trs1[1].SelectNodes(".//td")[1].InnerText); tmp.TheProjectNameEnglish = FormatInnerText(trs1[2].SelectNodes(".//td")[1].InnerText); tmp.ToIssue = FormatInnerText(trs1[3].SelectNodes(".//td")[1].InnerText); tmp.WhetherTheActivity = FormatInnerText(trs1[4].SelectNodes(".//td")[1].InnerText); tmp.ListedUntil = FormatInnerText(trs1[5].SelectNodes(".//td")[1].InnerText); tmp.TheDate = FormatInnerText(trs1[6].SelectNodes(".//td")[1].InnerText); tmp.TheExpirationOfThe = FormatInnerText(trs1[7].SelectNodes(".//td")[1].InnerText); tmp.IssueToDistinguish = FormatInnerText(trs1[8].SelectNodes(".//td")[1].InnerText); tmp.ConversionRatio = FormatInnerText(trs1[9].SelectNodes(".//td")[1].InnerText); tmp.ThePowerForm = FormatInnerText(trs1[10].SelectNodes(".//td")[1].InnerText); //second table on ELW website tmp.UnderlyingAssetTypes = FormatInnerText(trs2[0].SelectNodes(".//td")[0].InnerText); tmp.SovereignIssuingAuthority = FormatInnerText(trs2[0].SelectNodes(".//td")[1].InnerText); tmp.StockIndexTypes = FormatInnerText(trs2[1].SelectNodes(".//td")[0].InnerText); tmp.UnderlyingAssetGuitar = FormatInnerText(trs2[2].SelectNodes(".//td")[0].InnerText); //third table on ELW website tmp.TheRightType = FormatInnerText(trs3[0].SelectNodes(".//td")[0].InnerText); tmp.TheIssueOfTheSpecialConditions = FormatInnerText(trs3[0].SelectNodes(".//td")[1].InnerText); tmp.TheExerciseOfTheRightWay = FormatInnerText(trs3[1].SelectNodes(".//td")[0].InnerText); //fourth table on ELW website tmp.CFI = FormatInnerText(trs4[0].SelectNodes(".//td")[0].InnerText); return(tmp); } catch (Exception ex) { if (!listRicError.Contains(std_cd)) { listRicError.Add(std_cd); } Logger.Log(string.Format("Error found in function: {0}. Exception message: {1}", "GetELWExtractEntityStepOne", ex.Message)); return(null); } }