public string FindFullNameClc(HtmlNode BodyNodeS, Fund fund) { bool previous = false; string name = null; HtmlNode tableFile = BodyNodeS.SelectSingleNode(".//table[@class='tableFile']"); if (tableFile != null) { foreach (HtmlNode tr in tableFile.SelectNodes(".//tr")) { HtmlNodeCollection tds = BodyNodeS.SelectNodes(".//td"); if (tds != null) { foreach (HtmlNode td in tds) { if (td.GetAttributeValue("class", "") == "seriesCell" && tools.ClearSpace(td.InnerText) != "") { name = td.InnerText; previous = true; } if (previous == true) { if (td.GetAttributeValue("class", "") == "classContract" && name != null) { HtmlNode a = td.SelectSingleNode(".//a"); if (a != null && a.InnerText == fund.classcontract) { return(name); } } } } } } } if (fund.nameFromEdgar != null) { name = fund.nameFromEdgar; } return(name); }
public void MakeFund(Fund fund) { //Console.WriteLine("Ticker: {0} - {1}", fund.ticker, _I); _I++; int currentNum = 1; int _cNum = currentNum; string urlForms = baseLink + "/cgi-bin/browse-edgar?CIK=" + fund.ticker + "&owner=exclude&action=getcompany&count=200&Find=Search"; //string urlForms = "http://www.sec.gov/cgi-bin/browse-edgar?company=blackrock+global+allocation&owner=exclude&action=getcompany"; // Console.WriteLine(urlForms); //*Вход в edgar на страницу с отчетами для этого фонда HtmlNode body = tools.BodyNodeS(tools.Load(urlForms), htmlMakeAll); if (body == null) { Console.WriteLine("{0} - NOT FOUND IN EDGAR", fund.ticker); fund.makeFailXml("NOT FOUND IN EDGAR", null, xmlPathR); //errors.Error("0008"); fund.status = 1; } else { fund.classcontract = takeClassContract(body); do { //*Ищем необходимую форму string urlForm = FindNQ(fund, body, currentNum); fund.companyName = FindCompanyName(body); try { body = LoadFormAll(fund, body, urlForm, true); } catch (OutOfMemoryException e) { Console.WriteLine("{0} - OutOfMemory", fund.ticker); } currentNum++; }while (currentNum <= numberForms); } }
//*Функция, которая ищет тиккеры, по которым появились новые отчеты public List <Fund> selectUTickers(HtmlNode body) { List <Fund> tickers = new List <Fund>(); HtmlNodeCollection tds = body.SelectNodes(".//div[@id='seriesDiv']//tr[@class='contractRow']//td[position() = 4]"); if (tds != null) { foreach (HtmlNode td in tds) { var tval = td.InnerText.Trim().ToLower(); Fund fund = funds.Find(x => x.ticker.ToLower() == tval); if (fund != null) { tickers.Add(fund); } } } return(tickers); }
public void openFile(string _path, Fund fund = null, Func <string, Fund, string> lineMake = null) { string path = Directory.GetCurrentDirectory(); string fileName = String.Format(_path); int counter = 0; string line; System.IO.StreamReader file = new System.IO.StreamReader(System.IO.Path.Combine(path, fileName)); while ((line = file.ReadLine()) != null) { if (lineMake != null) { lineMake(line, fund); } counter++; } file.Close(); System.Console.ReadLine(); }
//*Функция, которая ищет названия всех фондов и заполняет список заголовков public void GetFullNames(HtmlNode BodyNodeS, Fund fund, List <string> names, List <Caption> captions) { HtmlNodeCollection nms = BodyNodeS.SelectNodes(".//td[@class='seriesCell'][normalize-space()!=' ']"); if (nms != null) { foreach (HtmlNode n in nms) { bool first = false; if (fund.nameFromEdgar == n.InnerText) { first = true; } string captiontext = n.InnerText; names.Add(n.InnerText); //*Добавляем заголовок captions.Add(new Caption(n.InnerText, tools.ClearSpaceCaption(captiontext).ToLower(), first)); } } }
//*Функция, которая обрезает одинаковую часть у всех заголовков public void zNames(List <Caption> captions, Fund fund) { int i = zNamesFindI(captions, captions.First().compressednamenozip); if (i == 0) { zNamesFindI2(captions, tools.ClearSpaceCaption(fund.companyName).ToLower()); } foreach (Caption caption in captions) { if (caption.izip != 0) { caption.compressedname = caption.compressedname.Substring(caption.izip); } if (caption.compressedname.Length > 10) { caption.compressedname = Regex.Replace(caption.compressedname, @"fund$", @"", RegexOptions.IgnoreCase); } caption.compressedname = Regex.Replace(caption.compressedname, @"inc$", @"", RegexOptions.IgnoreCase); } }
public void setName(Fund fund, string name, string _suffix) { string suffix = Regex.Replace(_suffix, @"class|shares|" + name, @"", RegexOptions.IgnoreCase).Trim(); suffix = Regex.Replace(suffix, @"[ ]{2,}", @" ", RegexOptions.IgnoreCase).Trim(); List <string> aname = name.Split(' ').ToList(); List <string> asuffix = suffix.Split(' ').ToList(); List <string> newsuffix = new List <string>(); foreach (string psuffix in asuffix) { if (aname.Find(x => x == psuffix) == null) { newsuffix.Add(psuffix); } else { newsuffix.Clear(); } } fund.name = name + ";" + Regex.Replace(string.Join(" ", newsuffix), @"-|", @"", RegexOptions.IgnoreCase).Trim(); }
public List <Fund> getFundsFromServer() { List <Fund> funds = new List <Fund>(); DateTime now = DateTime.Now; string postData = "Action=GETALLFUNDS&Login=maximgrishkov%40yandex.ru&Password=3333"; HttpWebRequest wr = (HttpWebRequest)WebRequest.Create("https://rixtrema.net/RixtremaWS401k/AJAXFCT.aspx"); wr.Timeout = int.MaxValue; wr.Method = "POST"; wr.ContentType = "application/x-www-form-urlencoded"; wr.ContentLength = postData.Length; StreamWriter writer = new StreamWriter(wr.GetRequestStream()); try { writer.Write(postData); } catch (Exception ex) { throw (ex); } finally { writer.Close(); } HttpWebResponse response = (HttpWebResponse)wr.GetResponse(); StreamReader reader = new StreamReader(response.GetResponseStream()); string respText = reader.ReadToEnd(); double seconds = (DateTime.Now - now).TotalSeconds; Console.WriteLine(seconds); if (!respText.ToLower().Contains("success")) { throw new Exception(respText); } else { try { dynamic js = JsonConvert.DeserializeObject(respText); if (js.FCT == null) { return(null); } if (js.FCT.fund == null) { return(null); } int i = 0; foreach (dynamic jsfund in js.FCT.fund) { i++; if (jsfund.Ticker != null && jsfund.Name != null) { Fund fund = new Fund(); fund.ticker = jsfund.Ticker; fund.name = jsfund.Name; fund.fname = jsfund.FamilyName; funds.Add(fund); } } Console.WriteLine(i); return(funds); } catch { return(null); } } }
//*Функция, которая находит нужную форму, num - номер формы (считаются только нужные типы отчетов) от начала таблицы с формами public string FindNQ(Fund fund, HtmlNode BodyNodeS, int num = 1) { int _num = 1; bool next = false; string url = null; HtmlNode series = BodyNodeS.SelectSingleNode(".//div[@id='seriesDiv']"); if (series != null) { foreach (HtmlNode table in series.SelectNodes(".//table")) { foreach (HtmlNode tr in table.SelectNodes(".//tr")) { HtmlNodeCollection tds = BodyNodeS.SelectNodes(".//td"); if (tds != null) { foreach (HtmlNode td in tds) { if (next == true) { HtmlNode a = td.SelectSingleNode(".//a"); if (url != null && td.GetAttributeValue("class", "") == "") { fund.forms++; return(url); } if (a != null) { if (tools.ClearStrNameNew(a.InnerText) == "Documents") { url = baseLink + a.GetAttributeValue("href", ""); } } } else { if (td.InnerText == "N-Q") { fund.formName = "N-Q"; if (_num == num) { next = true; } _num++; } if (td.InnerText == "N-CSRS") { fund.formName = "N-CSRS"; if (_num == num) { next = true; } _num++; } if (td.InnerText == "N-CSR") { fund.formName = "N-CSR"; if (_num == num) { next = true; } _num++; } } } } } } } return(null); }
public HtmlNode LoadFormAll(Fund fund, HtmlNode body, string urlForm, bool makeXmlAll = false) { // Console.WriteLine("LoadForm"); fund.netAssetsFromEdgar = 0; List <string> names = new List <string>(); List <Caption> captions = new List <Caption>(); //*Вторая страница Edgar для конкретного отчета. HtmlNode bodyForm = tools.BodyNodeS(tools.Load(urlForm), htmlMakeAll); if (bodyForm == null) { return(body); } else { //*Берем обязательно даты, они нужны для сохранения fund.periodOfReport = FindPeriodOfReport(bodyForm); fund.fillingDate = FindFillingDate(bodyForm); fund.dateEffectiveness = FindDateEffectiveness(bodyForm); fund.source = FindUrlDocument(bodyForm, fund.formName); fund.nameFromEdgar = FindFullNameClc(bodyForm, fund); /*if (FindFullName(bodyForm, fund) == fund.nameFromEdgar) * { * Console.WriteLine("{0} - CHECKED", fund.ticker); * //fund.makeFailXml("NO FAIL", captions, xmlPathR); * return null; * }*/ Console.WriteLine("FUND : {0}", fund.ticker); //*Берем названия фондов, которые так же находятся в данном отчете GetFullNames(bodyForm, fund, names, captions); if (fund.source != null) { if (fund.nameFromEdgar != null) { //*Документ, который нужно распарсить string doc = tools.Load(fund.source); //*Определяем тип файла string[] url = fund.source.Split('/'); string[] nameOfFile = url[url.Length - 1].Split('.'); string ext = nameOfFile[nameOfFile.Length - 1]; //Console.WriteLine(ext); if (ext == "html" || ext == "htm") { // fund.saveSource(s, xmlPathR); HtmlNode bodyDocument = tools.BodyNodeS(doc, htmlMakeDocs); if (bodyDocument != null) { //*Сумма всех позиций с сайта morningstar fund.TotalAssetsMorning(); Document document = new Document(bodyDocument); document.Parse(fund, captions); //*Вторая попытка if (document.holdings.Count == 0) { //Console.WriteLine("Try Again"); //*Сжимаем заголовки zNames(captions, fund); document.Parse(fund, captions); } if (document.holdings.Count > 0) { //*Трансормируем набор позиций из словаря в список и назначаем фонду tools.transformToList(fund, document.holdings); fund.sumHoldings(); //*Сохранение fund.MakeXml(xmlPathR); } else { Console.WriteLine("{0} - HOLDINGS FAIL", fund.ticker); fund.makeFailXml("HOLDINGS FAIL", captions, xmlPathR); } fund.remove(); } else { Console.WriteLine("{0} - HTML FAIL", fund.ticker); fund.makeFailXml("HTML FAIL", captions, xmlPathR); //errors.Error("0012"); //*ошибка с html функция_для_кривого_html(fund, captions); //captions - полные названия фондов в этом документе. Взято с предыдущей страницы edgar //fund.sumHoldings(); fund.MakeXml(xmlPathR); - для сохранения } } else { Console.WriteLine("{0} - TXT FAIL", fund.ticker); fund.makeFailXml("TXT FAIL", captions, xmlPathR); //NOTE - файл(doc) - txt. функция_для_txt(fund, captions); } } else { } } else { Console.Write("0013"); fund.status = 2; } } return(body); }
public void MakeFamily(Fund fund) { string urlForms = baseLink + "/cgi-bin/browse-edgar?CIK=" + fund.ticker + "&owner=exclude&action=getcompany&count=200&Find=Search"; HtmlNode body = tools.BodyNodeS(tools.Load(urlForms), htmlMakeAll); string familyName = ""; bool finded = false; if (body == null) { Console.WriteLine("{0} - NOT FOUND IN EDGAR", fund.ticker); fund.makeFailXml("NOT FOUND IN EDGAR", null, xmlPathR); //errors.Error("0008"); fund.status = 1; } else { fund.classcontract = takeClassContract(body); HtmlNode a = body.SelectSingleNode(".//p[@class='identInfo']/a[position()=2]"); if (a != null) { string link = baseLink + a.GetAttributeValue("href", ""); Console.WriteLine(link); HtmlNode bodyL = tools.BodyNodeS(tools.LoadWeb(link), htmlMakeAll); if (bodyL != null) { HtmlNodeCollection trs = bodyL.SelectNodes(".//table[@summary]//tr"); if (trs != null) { foreach (HtmlNode tr in trs) { HtmlNodeCollection tds = tr.SelectNodes(".//td"); if (tds != null) { int t = 0; foreach (HtmlNode td in tds) { HtmlNode aa = td.SelectSingleNode("./a"); if (!finded) { if (aa != null) { //Console.WriteLine(aa.GetAttributeValue("class", "")); if (aa.GetAttributeValue("class", "") == "") { familyName = aa.InnerText; } } if (td.InnerText == fund.classcontract) { finded = true; if (familyName != "" && tds[t + 1] != null) { setName(fund, familyName, tds[t + 1].InnerText); } } } if (t == 4) { Fund fundBonus = funds.Find(x => x.ticker.ToLower() == tds[t].InnerText.ToLower().Trim()); if (fundBonus != null && familyName != "") { setName(fundBonus, familyName, tds[t - 1].InnerText); } } t++; } } } } } } } }