private List <string> GetSourceFileLinks(string baseUrl) { holidayList = HolidayManager.SelectHoliday(5); List <string> linkList = new List <string>(); string pageSource = WebClientUtil.GetPageSource(baseUrl, 1800000); HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(pageSource); var tableList = htmlDoc.DocumentNode.SelectNodes("//table[@class = 'styleShiryo']"); var nodeDailyList = tableList[1].SelectNodes("tr/td"); string dailyExcelLink = "http://www.tse.or.jp" + MiscUtil.GetCleanTextFromHtml(nodeDailyList[2].SelectSingleNode("a").Attributes["href"].Value); linkList.Add(dailyExcelLink); if (DateTime.Today == MiscUtil.GetNextWeeklyTradingDay(DateTime.Today, holidayList, 2) || specialCase.Equals("yes")) { var nodeWeeklyList = tableList[2].SelectNodes("tr/td"); string titleWeekly = MiscUtil.GetCleanTextFromHtml(nodeWeeklyList[0].InnerText); if (IsLastWeekInfo(titleWeekly) || specialCase.Equals("yes")) { string weeklyExcelLink = "http://www.tse.or.jp" + MiscUtil.GetCleanTextFromHtml(nodeWeeklyList[2].SelectSingleNode("a").Attributes["href"].Value); linkList.Add(weeklyExcelLink); } } return(linkList); }
public string GetPDFUrl(string id) { string pdfUrl = string.Empty; string postData = getPostData(id); string Uri = "http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main.aspx"; try { string pageSource = WebClientUtil.GetPageSource(Uri, 24000, postData); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(pageSource); HtmlAgilityPack.HtmlNode pdfLinkNode = htmlDoc.DocumentNode.SelectSingleNode("//a[contains(@href, '.pdf')]"); if (pdfLinkNode == null) { Logger.Log("There's no PDF file for ric " + id); return(null); } else { pdfUrl = "http://www.hkexnews.hk"; pdfUrl += pdfLinkNode.Attributes["href"].Value; } } catch (Exception ex) { string errInfo = ex.ToString(); } return(pdfUrl); }
private String GetMatDate(String isin) { String matdate = String.Empty; try { // http://isin.krx.co.kr/jsp/BA_VW021.jsp?isu_cd=KRA631193158&modi=f&req_no=201105240094 String uri = String.Format("http://isin.krx.co.kr/jsp/BA_VW021.jsp?isu_cd={0}&modi=f&req_no=", isin); String pageSource = WebClientUtil.GetPageSource(uri, 300000); if (!String.IsNullOrEmpty(pageSource)) { HtmlDocument htc = new HtmlDocument(); htc.LoadHtml(pageSource); if (htc != null) { HtmlNode node = htc.DocumentNode.SelectSingleNode(".//tr[6]/td[4]"); if (node != null) { matdate = node.InnerText.Trim().ToString(); } } } } catch (Exception ex) { String msg = "Error found in GetMatDate() : \r\n" + ex.ToString(); Logger.Log(msg, Logger.LogType.Error); } return(matdate); }
private List <List <string> > GrabTodayData() { //string postData = string.Format("method=searchPubofrProgComSub¤tPageSize=50&pageIndex=1&orderMode=1&orderStat=D&searchMode=1&searchCodeType=&isurCd=&repIsuSrtCd=&bzProcsNo=&detailMarket=&forward=pubofrprogcom_sub&marketType=&searchCorpName=&fromDate={0}&toDate={1}", // DateTime.Now.AddMonths(-6).ToString("yyyy-MM-dd"), DateTime.Now.ToString("yyyy-MM-dd")); ////string source = WebClientUtil.GetPageSource(null, "http://kind.krx.co.kr/listinvstg/pubofrprogcom.do", 40000, postData, Encoding.UTF8); //string source = WebClientUtil.GetPageSource(null, "http://kind.krx.co.kr/listinvstg/pubofrprogcom.do?method=searchPubofrProgComMain", 40000, postData, Encoding.UTF8); //HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); //document.LoadHtml(source); string url = @"http://kind.krx.co.kr/listinvstg/pubofrprogcom.do?method=searchPubofrProgComMain"; HtmlDocument document = new HtmlDocument(); HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest; request.Timeout = 300000; request.Method = "GET"; request.CookieContainer = cookies; request.Host = @"kind.krx.co.kr"; request.Accept = @"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; request.Headers["Accept-Encoding"] = @"gzip,deflate,sdch"; request.Headers["Accept-Language"] = @"en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4"; request.Headers["Cache-Control"] = @"max-age=0"; request.KeepAlive = true; request.UserAgent = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); StreamReader sr = new StreamReader(response.GetResponseStream()); string st = sr.ReadToEnd(); url = @"http://203.235.1.91/tracker.jsp?u=4&XU=&EXEN=5&dr=&XDR=&dt=%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD%20%EB%8C%80%ED%91%9C%20%EA%B8%B0%EC%97%85%EA%B3%B5%EC%8B%9C%EC%B1%84%EB%84%90%20KIND&du=http%3A%2F%2Fkind.krx.co.kr%2Flistinvstg%2Fpubofrprogcom.do%3Fmethod=searchPubofrProgComMain&SX=M&js=Y&ss=1280x1024&cd=32&ce=Y&je=Y&tzo=-480&tye=2014&tmo=7&tda=8&tho=16&tmi=4&tse=34"; request = WebRequest.Create(url) as HttpWebRequest; request.Timeout = 300000; request.Method = "GET"; request.CookieContainer = cookies; request.Headers["Accept-Encoding"] = @"gzip,deflate,sdch"; request.Headers["Accept-Language"] = @"en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4"; request.Headers["Cache-Control"] = @"max-age=0"; request.Host = @"203.235.1.91"; request.Referer = @"http://kind.krx.co.kr/listinvstg/pubofrprogcom.do?method=searchPubofrProgComMain"; request.UserAgent = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36"; request.Accept = @"image/webp,*/*;q=0.8"; response = (HttpWebResponse)request.GetResponse(); sr = new StreamReader(response.GetResponseStream()); st = sr.ReadToEnd(); string postData = string.Format("method=searchPubofrProgComSub¤tPageSize=50&pageIndex=1&orderMode=1&orderStat=D&searchMode=1&searchCodeType=&isurCd=&repIsuSrtCd=&bzProcsNo=&detailMarket=&forward=pubofrprogcom_sub&marketType=&searchCorpName=&fromDate={0}&toDate={1}", DateTime.Now.AddMonths(-6).ToString("yyyy-MM-dd"), DateTime.Now.ToString("yyyy-MM-dd")); string source = WebClientUtil.GetPageSource(null, "http://kind.krx.co.kr/listinvstg/pubofrprogcom.do", 40000, postData, Encoding.UTF8); document.LoadHtml(source); List <List <string> > records = GetHtmlDataNeeded(document.DocumentNode); return(records); }
private void getSZSEIndexList() { string pageSource = WebClientUtil.GetPageSource(configObj.SzseBaseUri, 180000, ""); var doc = WebClientUtil.GetHtmlDocument(string.Format("{0}/main/marketdata/hqcx/zsybg/", configObj.SzseBaseUri), 180000, "", Encoding.GetEncoding("gb2312")); string szseIndexSourceFileUrl = MiscUtil.GetCleanTextFromHtml(doc.DocumentNode.SelectNodes("//td[@align='right']/a")[0].Attributes["href"].Value); downloadAndParseIndexFile(string.Format("{0}{1}", configObj.SzseBaseUri, szseIndexSourceFileUrl)); updateSZSEIndexListWithRic(); }
private HtmlNode GetHtmlSource() { string source = WebClientUtil.GetPageSource(null, "http://isin.twse.com.tw/isin/C_public.jsp?strMode=6", 180000, "strMode=6", Encoding.GetEncoding("big5")); var document = new HtmlDocument(); document.LoadHtml(source); return(document.DocumentNode); }
private static List <ISINTemp> getISINListFromSinglePage(string uri, string postData) { List <ISINTemp> isinList = new List <ISINTemp>(); AdvancedWebClient wc = new AdvancedWebClient(); HtmlDocument htc = new HtmlDocument(); string pageSource = WebClientUtil.GetPageSource(wc, uri, 300000, postData); isinList = getISINListFromSinglePage(pageSource); return(isinList); }
private List <List <string> > GrabTodayData() { string postData = string.Format("method=searchPubofrProgComSub¤tPageSize=50&pageIndex=1&orderMode=1&orderStat=D&searchMode=1&searchCodeType=&isurCd=&repIsuSrtCd=&bzProcsNo=&detailMarket=&forward=pubofrprogcom_sub&marketType=&searchCorpName=&fromDate={0}&toDate={1}", DateTime.Now.AddMonths(-6).ToString("yyyy-MM-dd"), DateTime.Now.ToString("yyyy-MM-dd")); string source = WebClientUtil.GetPageSource(null, "http://kind.krx.co.kr/listinvstg/pubofrprogcom.do", 40000, postData, Encoding.UTF8); HtmlDocument document = new HtmlDocument(); document.LoadHtml(source); return(GetHtmlDataNeeded(document.DocumentNode)); }
private ELWExtractEntity GetELWExtractEntityStepOne(string strStartDate, string strEndDate, string strRic) { string strUrl = @"http://isin.krx.co.kr/srch/srch.do?method=srchList"; string strPostData = string.Format("std_cd_grnt_start_dd={0}&std_cd_grnt_end_dd={1}&std_cd_grnt=1&searchRadio2=11&searchRadio1=11&searchRadio=11&list_start_dd=&list_end_dd=&listRadio=1&isu_start_dd=&isu_end_dd=&isuRadio=1&isur_cd=&isur_nm={2}&com_nm=", strStartDate, strEndDate, strRic); string strPageSource = string.Empty; ELWExtractEntity tmp = new ELWExtractEntity(); try { AdvancedWebClient wc = new AdvancedWebClient(); HtmlDocument htc = new HtmlDocument(); strPageSource = WebClientUtil.GetPageSource(wc, strUrl, 300000, strPostData); if (string.IsNullOrEmpty(strPageSource)) { Logger.Log(string.Format("return response is null,when query ric:{0}", strRic)); if (!listRicNoResponse.Contains(strRic)) { listRicNoResponse.Add(strRic); } return(null); } htc.LoadHtml(strPageSource); HtmlNodeCollection tables = htc.DocumentNode.SelectNodes(".//table"); HtmlNode table = tables[1]; HtmlNodeCollection trs = table.SelectNodes(".//tr"); if (trs.Count == 1) { Logger.Log(string.Format("can't get ric in strPageSource,when query ric:{0}", strRic)); if (!listRicNoResponse.Contains(strRic)) { listRicNoResponse.Add(strRic); } return(null); } else if (trs.Count >= 2) { string std_cd = trs[1].SelectNodes(".//td")[1].InnerText.Replace(" ", "").Replace("\n", "").Replace("\r", "").Replace("\t", "").Trim(); string isu_nm = trs[1].SelectNodes(".//td")[3].InnerText.Replace(" ", "").Replace("\n", "").Replace("\r", "").Replace("\t", "").Trim(); tmp = GetGetELWExtractEntityStepTwo(std_cd, isu_nm); } return(tmp); } catch (Exception ex) { if (!listRicError.Contains(strRic)) { listRicError.Add(strRic); } Logger.Log(string.Format("Error found in function: {0}. Exception message: {1}", "GetELWExtractEntityStepOne", ex.Message)); return(null); } }
private string LandWebSite() { try { string st = WebClientUtil.GetPageSource(url, 300000); return(st); } catch (System.Exception ex) { Logger.Log(string.Format("Failed to land the website.{0}", ex.Message)); return(null); } }
//baseUrl ="http://isin.krx.co.kr/jsp/realBoard07.jsp"; public static List <ISINTemp> getISINListFromISINWebPage(ISINQuery query) { List <ISINTemp> isinList = new List <ISINTemp>(); //string startDate = ""; //string endDate = ""; //if (query.StartDate != null) //{ // startDate = query.StartDate.ToString("yyyyMMdd", new System.Globalization.CultureInfo("en-US")); //} //if(query.EndDate!=null) //{ // endDate = query.EndDate.ToString("yyyyMMdd", new System.Globalization.CultureInfo("en-US")); //} try { int pageCount = 0; String issuername = HttpUtility.UrlEncode(query.IssueCompany, Encoding.GetEncoding("euc-kr")); String num = HttpUtility.UrlEncode(query.Code.EndsWith("호") ? query.Code : string.Format(query.Code, "호"), Encoding.GetEncoding("euc-kr")); string postData = string.Format("kind=&ef_std_cd_grnt_dt_from={0}&ef_std_cd_grnt_dt_to={1}&secuGubun={2}&lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={3}&ef_iss_inst_cd=&ef_isu_nm={4}&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=", query.StartDate, query.EndDate, query.Category, issuername, num); //String uri = "http://isin.krx.co.kr/jsp/BA_LT113.jsp"; Website Change string uri = string.Format("http://isin.krx.co.kr/jsp/realBoard{0}.jsp", query.Category); postData = "kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun=06&lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word=&ef_iss_inst_cd=&ef_isu_nm=%B4%EB%BE%E7%B1%DD%BC%D3+2WR&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to="; AdvancedWebClient wc = new AdvancedWebClient(); string pageSource = WebClientUtil.GetPageSource(wc, uri, 300000, postData); pageCount = GetTotalPageCount(postData); isinList.AddRange(getISINListFromSinglePage(pageSource)); if (pageCount == 0) { pageCount += 1; } for (var i = 2; i <= pageCount; i++) { postData = string.Format("pg_no={0}&lst_yn1=Y&lst_yn2=N&lst_yn3=D&df_gb1=df1&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={1}&ef_isu_nm={2}&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=&ef_std_cd_grnt_dt_from={3}&ef_std_cd_grnt_dt_to={4}", i.ToString("D2"), issuername, num, query.StartDate, query.EndDate); HtmlDocument htc = new HtmlDocument(); pageSource = WebClientUtil.GetPageSource(uri, 300000, postData); isinList.AddRange(getISINListFromSinglePage(pageSource)); } } catch (Exception ex) { String msg = "Error found in GrabDataFromWebpage() : \r\n" + ex.ToString(); } return(isinList); }
public static HtmlNodeCollection SearchISIN(string companyName, bool onlyEquity, bool onlyNonListing) { string securityScope = onlyEquity ? "01" : "99"; onlyNonListing = onlyEquity && onlyNonListing; // no choice for all security scope string listScope = onlyNonListing ? "lst_yn2=N" : "lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D"; companyName = HttpUtility.UrlEncode(companyName, Encoding.GetEncoding("euc-kr")); string postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun={0}" + "&{1}&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on" + "&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1" + "&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={2}" + "&ef_iss_inst_cd=&ef_isu_nm=&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=", securityScope, listScope, companyName); AdvancedWebClient wc = new AdvancedWebClient(); string pageSource = WebClientUtil.GetPageSource(wc, queryURL, 300000, postData, Encoding.GetEncoding("euc-kr")); if (string.IsNullOrEmpty(pageSource)) { return(null); } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(pageSource); if (doc.DocumentNode.SelectNodes("//table").Count < 2) { return(null); } HtmlNodeCollection records = doc.DocumentNode.SelectNodes("//table")[1].SelectNodes(".//tr"); if (records.Count == 1) { return(null); } records.RemoveAt(0); return(records); }
//Get all the newly generated ISIN for goverment bond //http://www.chinaclear.cn/isin/user/userApplyLogin.do?m=enter //http://www.chinaclear.cn/isin/user/userApplyLogin.do?m=queryFast public List <RicISINInfo> GetAllISINForGoverBond() { AdvancedWebClient wc = new AdvancedWebClient(); string postData = "loginName=&password=&securitiesName=%BC%C7%D5%CB%CA%BD%B8%BD%CF%A2&securitiesCode="; string url = "http://www.chinaclear.cn/isin/user/userApplyLogin.do?m=enter"; string pageSource = WebClientUtil.GetPageSource(wc, url, 18000, postData); pageSource = WebClientUtil.GetPageSource(wc, "http://www.chinaclear.cn/isin/user/userApplyLogin.do?m=queryFast", 18000, "", Encoding.GetEncoding("gb2312")); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(pageSource); var nodeList = htmlDoc.DocumentNode.SelectNodes("//tr[@class='td5']"); return(nodeList.Select(node => new RicISINInfo { ISIN = MiscUtil.GetCleanTextFromHtml(node.ChildNodes[1 * 2 + 1].InnerText), Name = MiscUtil.GetCleanTextFromHtml(node.ChildNodes[2 * 2 + 1].InnerText) }).ToList()); }
/// <summary> /// ISIN Website. Get target url by isin. /// </summary> /// <param name="isin">isin</param> /// <returns>url</returns> private static string GetUrlByIsin(string isin) { string postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun=99&lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word=&ef_iss_inst_cd=&ef_isu_nm={0}&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=", isin); string uri = "http://isin.krx.co.kr/jsp/realBoard99.jsp"; string pageSource = null; int retries = 3; while (pageSource == null && retries-- > 0) { try { AdvancedWebClient wc = new AdvancedWebClient(); pageSource = WebClientUtil.GetPageSource(wc, uri, 180000, postData, Encoding.GetEncoding("EUC-KR")); } catch { System.Threading.Thread.Sleep(5000); } } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(pageSource); HtmlNodeCollection trs = null; try { trs = doc.DocumentNode.SelectNodes("/html[1]/body[1]/table[1]/tr[2]/td[1]/table[1]/tr"); } catch { return(null); } HtmlNode tr = trs[1]; HtmlNode td = tr.SelectNodes("./td")[1]; HtmlNode aNode = td.SelectSingleNode(".//a"); string checkMod = aNode.Attributes["href"].Value.Trim(); string type = checkMod.Split(',')[2].Trim(new char[] { '\'', ')', ';' }); string targetPage = GetTargetPageCode(type); string url = string.Format("http://isin.krx.co.kr/jsp/{0}?isu_cd={1}&modi=f&req_no=", targetPage, isin); return(url); }
private void GetISINFromWebpage() { try { // string uri = "http://isin.krx.co.kr/jsp/BA_LT113.jsp"; string uri = "http://isin.krx.co.kr/jsp/realBoard07.jsp"; foreach (var item in dropList) { string postData = string.Empty; string issuername = HttpUtility.UrlEncode(item.Issuername, Encoding.GetEncoding("euc-kr")); string num = HttpUtility.UrlEncode(item.Num, Encoding.GetEncoding("euc-kr")); // %c7%f6%b4%eb %c1%a61730%c8%a3 //postData = string.Format("kind=W&pg_no=1&cb_search_column=co_nm&ef_key_word={0}&ef_isu_nm={1}&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&chk_bs410=W", issuername, num); postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun=07&lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={0}&ef_iss_inst_cd=&ef_isu_nm={1}&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst", issuername, num); AdvancedWebClient wc = new AdvancedWebClient(); string pageSource = WebClientUtil.GetPageSource(wc, uri, 300000, postData); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); if (!string.IsNullOrEmpty(pageSource)) { doc.LoadHtml(pageSource); } if (doc != null) { HtmlNode node = doc.DocumentNode.SelectNodes("//table/tr/td/table/tr")[1].SelectNodes("td")[1]; string isin = string.Empty; if (node != null) { isin = node.InnerText.Trim().ToString(); } if (!string.IsNullOrEmpty(isin)) { item.ISIN = isin; } } } } catch (Exception ex) { string msg = "Error found in GetISINFromWebpage() : \r\n" + ex.ToString(); Logger.Log(msg, Logger.LogType.Error); } }
/// <summary> /// ISIN Website. Get different table by url. /// </summary> /// <param name="url">url</param> /// <returns>table html node</returns> public static HtmlNode GetTargetTableByUrl(string url) { if (string.IsNullOrEmpty(url)) { return(null); } string pageSource = null; int retry = 3; while (pageSource == null && retry-- > 0) { try { pageSource = WebClientUtil.GetPageSource(null, url, 18000, "", Encoding.GetEncoding("EUC-KR")); } catch { continue; } } if (pageSource == null) { return(null); } HtmlDocument isinRoot = new HtmlDocument(); isinRoot.LoadHtml(pageSource); if (isinRoot == null) { return(null); } HtmlNode isinTable = isinRoot.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[1]/tr[3]/td[1]/table[1]"); if (isinTable == null) { return(null); } return(isinTable); }
private List <FID> getFidList(string url, DateTime startDate, DateTime endDate, int tier2GroupNum, int tier2Num, string type) { List <FID> FIDList = new List <FID>(); AdvancedWebClient wc = new AdvancedWebClient(); string postData = string.Format("__VIEWSTATEENCRYPTED=&ctl00%24txt_today=20130221&ctl00%24hfStatus=AEM&ctl00%24hfAlert=&ctl00%24txt_stock_code=&ctl00%24txt_stock_name=&ctl00%24rdo_SelectDocType=rbAfter2006&ctl00%24sel_tier_1=5&ctl00%24sel_DocTypePrior2006=-1&ctl00%24sel_tier_2_group={0}&ctl00%24sel_tier_2={1}&ctl00%24ddlTierTwo=23%2C1%2C3&ctl00%24ddlTierTwoGroup=10%2C2&ctl00%24txtKeyWord=&ctl00%24rdo_SelectDateOfRelease=rbManualRange&ctl00%24sel_DateOfReleaseFrom_d={2}&ctl00%24sel_DateOfReleaseFrom_m={3}&ctl00%24sel_DateOfReleaseFrom_y={4}&ctl00%24sel_DateOfReleaseTo_d={5}&ctl00%24sel_DateOfReleaseTo_m={6}&ctl00%24sel_DateOfReleaseTo_y={7}&ctl00%24sel_defaultDateRange=SevenDays&ctl00%24rdo_SelectSortBy=rbDateTime", tier2GroupNum, tier2Num, startDate.ToString("dd"), startDate.ToString("MM"), startDate.ToString("yyyy"), endDate.ToString("dd"), endDate.ToString("MM"), endDate.ToString("yyyy")); string viewState = getViewStateValue(WebClientUtil.GetPageSource(wc, url, 1800000, postData)); postData = string.Format("__VIEWSTATE={0}&{1}", viewState, postData); string pageSource = WebClientUtil.GetPageSource(wc, url, 1800000, postData); FIDList.AddRange(getPerPageFIDList(pageSource, type)); while (true) { if (pageSource.Contains("ctl00_gvMain_ctl24_btnNext")) { viewState = getViewStateValue(pageSource); postData = "__VIEWSTATEENCRYPTED=&ctl00%24gvMain%24ctl24%24btnNext.x=15&ctl00%24gvMain%24ctl24%24btnNext.y=12"; postData = string.Format("__VIEWSTATE={0}&{1}", viewState, postData); pageSource = WebClientUtil.GetPageSource(wc, url, 1800000, postData); FIDList.AddRange(getPerPageFIDList(pageSource, type)); } break; } if (type == "CBBC") { AddResult("CBBC_" + downloadTime, Path.Combine(configObj.CBBC_DOWNLOAD_FOLDER, ("CBBC_" + downloadTime)), "Have Finished Download CBBC Files"); } if (type == "Warrant") { AddResult("Warrant_" + downloadTime, Path.Combine(configObj.WARRANT_DOWNLOAD_FOLDER, ("Warrant_" + downloadTime)), "Have Finished Download Warrant Files"); } return(FIDList); }
/// <summary> /// ISIN Website. Get target page source by isin. /// </summary> /// <param name="isin">isin</param> /// <returns>page source</returns> public static string GetTargetPageSourceByIsin(string isin) { string url = GetUrlByIsin(isin); if (string.IsNullOrEmpty(url)) { return(null); } string pageSource = null; int retry = 3; while (pageSource == null && retry-- > 0) { try { pageSource = WebClientUtil.GetPageSource(null, url, 18000, "", Encoding.GetEncoding("EUC-KR")); } catch { continue; } } return(pageSource); }
public static Dictionary <string, string> DownloadNameRules(Logger logger) { Dictionary <string, string> namesAbbs = new Dictionary <string, string>(); string url = @"http://dataops.datastream.com/cgi-bin/readfile.pl?filename=H:/Production/Loaders/Global/DataStream/Tools/Abbreviation/Mload/abbreviations.rep&warnold=1"; HtmlDocument doc = null; int retry = 3; while (doc == null && retry-- > 0) { try { string pageSource = WebClientUtil.GetPageSource(null, url, 180000, null, Encoding.GetEncoding("ISO-8859-1")); if (pageSource != null) { doc = new HtmlDocument(); doc.LoadHtml(pageSource); } } catch (Exception ex) { string msg = "At DownloadNameRules(). Error found when downloading the name abbreviations file. " + ex.Message; logger.Log(msg); } } HtmlNode xmpNode = doc.DocumentNode.SelectSingleNode("//xmp"); if (xmpNode == null) { string msg = string.Format("At DownloadNameRules(). Can not get the name abbreviation in file."); logger.Log(msg, Logger.LogType.Warning); return(namesAbbs); } string content = xmpNode.InnerText; string[] lines = content.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); int startLine = 0; for (int i = 0; i < lines.Length; i++) { if (lines[i].Contains("MEANING") && lines[i].Contains("ABBREVATION")) { startLine = i + 1; break; } } for (int i = startLine; i < lines.Length; i++) { string singleLine = lines[i]; if (!singleLine.Contains(" ")) { continue; } string[] names = singleLine.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (names.Length != 2) { string msg = string.Format("At DownloadNameRules(). Irregular name and abbrevation at line: {0} in 'Abbrevation file'. Ignore it.\r\n\t\t\t\t Line content:{1}.", i, singleLine); //logger.Log(msg); continue; } if (!namesAbbs.ContainsKey(names[0].Trim())) { namesAbbs.Add(names[0].Trim(), names[1].Trim()); } else { string msg = string.Format("At DownloadNameRules(). Repeated name at line: {0} in Abbrevation file. Line content:{1}.", i, singleLine); //logger.Log(msg); } } return(namesAbbs); }
/// <summary> /// Grab new underlying info with korean name. /// </summary> /// <param name="koreaName">korean name</param> /// <returns>new underlying info</returns> public static KoreaUnderlyingInfo GrabNewUnderlyingInfo(string koreaName, string isin) { if (string.IsNullOrEmpty(isin)) { return(null); } KoreaUnderlyingInfo newUnderlying = new KoreaUnderlyingInfo(); string uri = string.Format("http://isin.krx.co.kr/jsp/BA_VW010.jsp?isu_cd={0}&modi=f&req_no=", isin); string pageSource = null; int retry = 3; while (pageSource == null && retry-- > 0) { try { pageSource = WebClientUtil.GetPageSource(null, uri, 6000, "", Encoding.GetEncoding("EUC-KR")); } catch { continue; } } if (pageSource == null) { string msg = "Can not get the New Underlying infos in ISIN webpage. For ISIN:" + isin + ". please check if the webpage can be accessed!"; return(null); } HtmlDocument isinRoot = new HtmlDocument(); isinRoot.LoadHtml(pageSource); HtmlNode isinTable = isinRoot.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[1]/tr[3]/td[1]/table[1]"); HtmlNodeCollection isinTrs = isinTable.SelectNodes("./tr"); string ric = isinTrs[2].SelectNodes("./td")[3].InnerText.TrimStart().TrimEnd(); string sixDigit = ric.Substring(ric.Length - 6); string underEngName = isinTrs[10].SelectNodes("./td")[1].InnerText.TrimStart().TrimEnd(); string suffix = string.IsNullOrEmpty(isinTrs[11].SelectNodes("./td")[2].InnerText.TrimStart().TrimEnd()) ? "KQ" : "KS"; string usName = isinTrs[10].SelectNodes("./td")[3].InnerText.Trim(); string symbol = isinTrs[3].SelectNodes("./td")[1].InnerText.Trim(); string companyName = isinTrs[0].SelectNodes("./td")[1].InnerText.Trim(); Regex regex = new Regex(@"\[.+?]"); Match m = regex.Match(companyName); companyName = m.Value.Trim(new[] { ' ', '[', ']' }); companyName = Regex.Replace(companyName, " ", ""); newUnderlying.UnderlyingRIC = sixDigit + "." + suffix; if (sixDigit.Substring(5) == "0") { sixDigit = sixDigit.Substring(0, 5); } sixDigit = "kr;" + sixDigit.TrimStart('0'); if (suffix == "KQ") { sixDigit += "K"; } string ndaTc = ClearCoLtdForName(underEngName.ToUpper()); newUnderlying.QACommonNamePart = ndaTc; newUnderlying.NDATCUnderlyingTitle = ndaTc; newUnderlying.BNDUnderlying = sixDigit; newUnderlying.KoreaName = koreaName; newUnderlying.KoreaNameFM2 = koreaName; newUnderlying.KoreaNameDrop = koreaName; newUnderlying.IDNDisplayNamePart = GetIDNDisplayName(symbol, usName, ndaTc); newUnderlying.ISIN = isin; newUnderlying.CompanyName = companyName; return(newUnderlying); }
/// <summary> /// Get PEO Add type by ISIN. ORD/KDR/PRF /// </summary> /// <param name="isin">ISIN</param> /// <returns>PEO type</returns> public static string GetPeoTypeByISIN(string isin) { string uri = "http://isin.krx.co.kr/jsp/realBoard99.jsp"; string postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun=99&lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word=&ef_iss_inst_cd=&ef_isu_nm={0}&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=", isin); string pageSource = null; int retries = 3; while (pageSource == null && retries-- > 0) { try { pageSource = WebClientUtil.GetPageSource(null, uri, 180000, postData, Encoding.GetEncoding("EUC-KR")); } catch { System.Threading.Thread.Sleep(5000); } } if (pageSource == null) { return(null); } string peoType = string.Empty; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(pageSource); HtmlNodeCollection trs = null; try { trs = doc.DocumentNode.SelectNodes("/html[1]/body[1]/table[1]/tr[2]/td[1]/table[1]/tr"); HtmlNode tr = trs[1]; HtmlNodeCollection tds = tr.SelectNodes("./td"); string companyName = tds[2].InnerText.Trim(); if (companyName.Contains("보통주")) { peoType = "ORD"; } else if (companyName.Contains("우선주")) { peoType = "PRF"; } else { string instrumentType = tds[3].InnerText.Trim(); if (instrumentType.Equals("예탁증서")) { peoType = "KDR"; } else { peoType = @"#N/A"; } } } catch { string msg = "At GetPeoTypeByISIN(string isin). Error found in searching peo type infos for " + isin; } return(peoType); }
private void ExtractDataStepTwo(FMELWEntity fm) { string isu_nm = fm.IssuingAuthority.Trim(); string std_cd = fm.ISIN.Trim(); string strUrl = @"http://isin.krx.co.kr/srch/srch.do?method=srchPopup11"; string stdcd_type = "11"; string mod_del_cd = ""; string pershr_isu_prc = ""; string isu_shrs = ""; string strPostData = string.Format("stdcd_type={0}&std_cd={1}&mod_del_cd={2}&isu_nm={3}&pershr_isu_prc={4}&isu_shrs={5}", stdcd_type, std_cd, mod_del_cd, isu_nm, pershr_isu_prc, isu_shrs); string strPageSource = string.Empty; //string strReleaseForm = string.Empty; try { AdvancedWebClient wc = new AdvancedWebClient(); HtmlDocument htc = new HtmlDocument(); strPageSource = WebClientUtil.GetPageSource(wc, strUrl, 300000, strPostData); if (string.IsNullOrEmpty(strPageSource)) { Logger.Log(string.Format("return response is null,when query ric:{0}", std_cd)); if (!listNoResponse.Contains(std_cd)) { listNoResponse.Add(std_cd); } return; } htc.LoadHtml(strPageSource); HtmlNodeCollection tables = htc.DocumentNode.SelectNodes(".//table"); if (tables.Count < 5) { Logger.Log(string.Format("tables.count<5,so missing data in the pageSource ,current ric:", isu_nm)); if (!listNoResponse.Contains(isu_nm)) { listNoResponse.Add(isu_nm); } return; } HtmlNode table1 = tables[1]; HtmlNodeCollection trs1 = table1.SelectNodes(".//tr"); //11*2=22 HtmlNode table2 = tables[2]; HtmlNodeCollection trs2 = table2.SelectNodes(".//tr"); //0*2+1*1+2*1=4 HtmlNode table3 = tables[3]; HtmlNodeCollection trs3 = table3.SelectNodes(".//tr"); //3*1=3 if (trs1.Count < 11 || trs2.Count < 3 || trs3.Count < 3) { Logger.Log(string.Format("trs1.count is too small,so missing data in the pageSource ,current ric:", isu_nm)); if (!listNoResponse.Contains(isu_nm)) { listNoResponse.Add(isu_nm); } return; } fm.ReleaseForm = FormatInnerText(trs1[10].SelectNodes(".//td")[0].InnerText); //strReleaseForm = FormatInnerText(trs1[9].SelectNodes(".//td")[0].InnerText); //if (!strReleaseForm.Equals("공모")) //{ // return; //} //first table left on ELW website fm.QuanityOfWarrants = FormatInnerText(trs1[6].SelectNodes(".//td")[0].InnerText); //quanity of warrants fm.IssuePrice = FormatInnerText(trs1[7].SelectNodes(".//td")[0].InnerText); //issue price //first table right on ELW website fm.Ticker = FormatInnerText(trs1[1].SelectNodes(".//td")[1].InnerText); //ticker fm.Issuer = FormatInnerText(trs1[2].SelectNodes(".//td")[1].InnerText); //issuer fm.IssueDate = FormatInnerText(trs1[6].SelectNodes(".//td")[1].InnerText); //issuer date fm.MatDate = FormatInnerText(trs1[7].SelectNodes(".//td")[1].InnerText); //mat date fm.ConversionRatio = FormatInnerText(trs1[9].SelectNodes(".//td")[1].InnerText); //conversion ratio //second table on ELW website fm.XSovereignIssuingAuthority = FormatInnerText(trs2[0].SelectNodes(".//td")[1].InnerText); //X fm.YStockIndexTypes = FormatInnerText(trs2[1].SelectNodes(".//td")[0].InnerText); //Y //third table on ELW website fm.KoreaWarrantName = FormatInnerText(trs3[0].SelectNodes(".//td")[0].InnerText);//korea warrant name } catch (Exception ex) { if (!listError.Contains(std_cd)) { listError.Add(std_cd); } Logger.Log(string.Format("Error found in function: {0}. Exception message: {1}", "GetELWExtractEntityStepOne", ex.Message)); return; } }
private ELWExtractEntity GetGetELWExtractEntityStepTwo(string std_cd, string isu_nm) { string strUrl = @"http://isin.krx.co.kr/srch/srch.do?method=srchPopup11"; string stdcd_type = "11"; string mod_del_cd = ""; string pershr_isu_prc = ""; string isu_shrs = ""; string strPostData = string.Format("stdcd_type={0}&std_cd={1}&mod_del_cd={2}&isu_nm={3}&pershr_isu_prc={4}&isu_shrs={5}", stdcd_type, std_cd, mod_del_cd, isu_nm, pershr_isu_prc, isu_shrs); string strPageSource = string.Empty; ELWExtractEntity tmp = new ELWExtractEntity(); try { AdvancedWebClient wc = new AdvancedWebClient(); HtmlDocument htc = new HtmlDocument(); strPageSource = WebClientUtil.GetPageSource(wc, strUrl, 300000, strPostData); if (string.IsNullOrEmpty(strPageSource)) { Logger.Log(string.Format("return response is null,when query ric:{0}", std_cd)); if (!listRicNoResponse.Contains(std_cd)) { listRicNoResponse.Add(std_cd); } return(null); } htc.LoadHtml(strPageSource); HtmlNodeCollection tables = htc.DocumentNode.SelectNodes(".//table"); if (tables.Count < 5) { Logger.Log(string.Format("tables.count<5,so missing data in the pageSource ,current ric:", isu_nm)); if (!listRicNoResponse.Contains(isu_nm)) { listRicNoResponse.Add(isu_nm); } return(null); } HtmlNode table1 = tables[1]; HtmlNodeCollection trs1 = table1.SelectNodes(".//tr"); //11*2=22 HtmlNode table2 = tables[2]; HtmlNodeCollection trs2 = table2.SelectNodes(".//tr"); //0*2+1*1+2*1=4 HtmlNode table3 = tables[3]; HtmlNodeCollection trs3 = table3.SelectNodes(".//tr"); //3*1=3 HtmlNode table4 = tables[4]; HtmlNodeCollection trs4 = table4.SelectNodes(".//tr"); //1*1=1 if (trs1.Count < 11 || trs2.Count < 3 || trs3.Count < 3 || trs4.Count < 1) { Logger.Log(string.Format("trs1.count is too small,so missing data in the pageSource ,current ric:", isu_nm)); if (!listRicNoResponse.Contains(isu_nm)) { listRicNoResponse.Add(isu_nm); } return(null); } //first table left on ELW website tmp.IssuingAuthorityCongenial = FormatInnerText(trs1[0].SelectNodes(".//td")[0].InnerText); tmp.StandardCongenial = FormatInnerText(trs1[1].SelectNodes(".//td")[0].InnerText); tmp.KoreanProjectName = FormatInnerText(trs1[2].SelectNodes(".//td")[0].InnerText); tmp.FinancialProducts = FormatInnerText(trs1[3].SelectNodes(".//td")[0].InnerText); tmp.ListedOrNot = FormatInnerText(trs1[4].SelectNodes(".//td")[0].InnerText); tmp.Listed = FormatInnerText(trs1[5].SelectNodes(".//td")[0].InnerText); tmp.IssueNumber = FormatInnerText(trs1[6].SelectNodes(".//td")[0].InnerText); tmp.ReleaseTheUnitPrice = FormatInnerText(trs1[7].SelectNodes(".//td")[0].InnerText); tmp.Money = FormatInnerText(trs1[8].SelectNodes(".//td")[0].InnerText); tmp.ReleaseForm = FormatInnerText(trs1[9].SelectNodes(".//td")[0].InnerText); tmp.StandardNonStandard = FormatInnerText(trs1[10].SelectNodes(".//td")[0].InnerText); //first table right on ELW website tmp.IssuingAuthority = FormatInnerText(trs1[0].SelectNodes(".//td")[1].InnerText); tmp.ShortenTheCongenial = FormatInnerText(trs1[1].SelectNodes(".//td")[1].InnerText); tmp.TheProjectNameEnglish = FormatInnerText(trs1[2].SelectNodes(".//td")[1].InnerText); tmp.ToIssue = FormatInnerText(trs1[3].SelectNodes(".//td")[1].InnerText); tmp.WhetherTheActivity = FormatInnerText(trs1[4].SelectNodes(".//td")[1].InnerText); tmp.ListedUntil = FormatInnerText(trs1[5].SelectNodes(".//td")[1].InnerText); tmp.TheDate = FormatInnerText(trs1[6].SelectNodes(".//td")[1].InnerText); tmp.TheExpirationOfThe = FormatInnerText(trs1[7].SelectNodes(".//td")[1].InnerText); tmp.IssueToDistinguish = FormatInnerText(trs1[8].SelectNodes(".//td")[1].InnerText); tmp.ConversionRatio = FormatInnerText(trs1[9].SelectNodes(".//td")[1].InnerText); tmp.ThePowerForm = FormatInnerText(trs1[10].SelectNodes(".//td")[1].InnerText); //second table on ELW website tmp.UnderlyingAssetTypes = FormatInnerText(trs2[0].SelectNodes(".//td")[0].InnerText); tmp.SovereignIssuingAuthority = FormatInnerText(trs2[0].SelectNodes(".//td")[1].InnerText); tmp.StockIndexTypes = FormatInnerText(trs2[1].SelectNodes(".//td")[0].InnerText); tmp.UnderlyingAssetGuitar = FormatInnerText(trs2[2].SelectNodes(".//td")[0].InnerText); //third table on ELW website tmp.TheRightType = FormatInnerText(trs3[0].SelectNodes(".//td")[0].InnerText); tmp.TheIssueOfTheSpecialConditions = FormatInnerText(trs3[0].SelectNodes(".//td")[1].InnerText); tmp.TheExerciseOfTheRightWay = FormatInnerText(trs3[1].SelectNodes(".//td")[0].InnerText); //fourth table on ELW website tmp.CFI = FormatInnerText(trs4[0].SelectNodes(".//td")[0].InnerText); return(tmp); } catch (Exception ex) { if (!listRicError.Contains(std_cd)) { listRicError.Add(std_cd); } Logger.Log(string.Format("Error found in function: {0}. Exception message: {1}", "GetELWExtractEntityStepOne", ex.Message)); return(null); } }