public void MakeFund(Fund fund) { //Console.WriteLine("Ticker: {0} - {1}", fund.ticker, _I); _I++; int currentNum = 1; int _cNum = currentNum; string urlForms = baseLink + "/cgi-bin/browse-edgar?CIK=" + fund.ticker + "&owner=exclude&action=getcompany&count=200&Find=Search"; //string urlForms = "http://www.sec.gov/cgi-bin/browse-edgar?company=blackrock+global+allocation&owner=exclude&action=getcompany"; // Console.WriteLine(urlForms); //*Вход в edgar на страницу с отчетами для этого фонда HtmlNode body = tools.BodyNodeS(tools.Load(urlForms), htmlMakeAll); if (body == null) { Console.WriteLine("{0} - NOT FOUND IN EDGAR", fund.ticker); fund.makeFailXml("NOT FOUND IN EDGAR", null, xmlPathR); //errors.Error("0008"); fund.status = 1; } else { fund.classcontract = takeClassContract(body); do { //*Ищем необходимую форму string urlForm = FindNQ(fund, body, currentNum); fund.companyName = FindCompanyName(body); try { body = LoadFormAll(fund, body, urlForm, true); } catch (OutOfMemoryException e) { Console.WriteLine("{0} - OutOfMemory", fund.ticker); } currentNum++; }while (currentNum <= numberForms); } }
public HtmlNode LoadFormAll(Fund fund, HtmlNode body, string urlForm, bool makeXmlAll = false) { // Console.WriteLine("LoadForm"); fund.netAssetsFromEdgar = 0; List <string> names = new List <string>(); List <Caption> captions = new List <Caption>(); //*Вторая страница Edgar для конкретного отчета. HtmlNode bodyForm = tools.BodyNodeS(tools.Load(urlForm), htmlMakeAll); if (bodyForm == null) { return(body); } else { //*Берем обязательно даты, они нужны для сохранения fund.periodOfReport = FindPeriodOfReport(bodyForm); fund.fillingDate = FindFillingDate(bodyForm); fund.dateEffectiveness = FindDateEffectiveness(bodyForm); fund.source = FindUrlDocument(bodyForm, fund.formName); fund.nameFromEdgar = FindFullNameClc(bodyForm, fund); /*if (FindFullName(bodyForm, fund) == fund.nameFromEdgar) * { * Console.WriteLine("{0} - CHECKED", fund.ticker); * //fund.makeFailXml("NO FAIL", captions, xmlPathR); * return null; * }*/ Console.WriteLine("FUND : {0}", fund.ticker); //*Берем названия фондов, которые так же находятся в данном отчете GetFullNames(bodyForm, fund, names, captions); if (fund.source != null) { if (fund.nameFromEdgar != null) { //*Документ, который нужно распарсить string doc = tools.Load(fund.source); //*Определяем тип файла string[] url = fund.source.Split('/'); string[] nameOfFile = url[url.Length - 1].Split('.'); string ext = nameOfFile[nameOfFile.Length - 1]; //Console.WriteLine(ext); if (ext == "html" || ext == "htm") { // fund.saveSource(s, xmlPathR); HtmlNode bodyDocument = tools.BodyNodeS(doc, htmlMakeDocs); if (bodyDocument != null) { //*Сумма всех позиций с сайта morningstar fund.TotalAssetsMorning(); Document document = new Document(bodyDocument); document.Parse(fund, captions); //*Вторая попытка if (document.holdings.Count == 0) { //Console.WriteLine("Try Again"); //*Сжимаем заголовки zNames(captions, fund); document.Parse(fund, captions); } if (document.holdings.Count > 0) { //*Трансормируем набор позиций из словаря в список и назначаем фонду tools.transformToList(fund, document.holdings); fund.sumHoldings(); //*Сохранение fund.MakeXml(xmlPathR); } else { Console.WriteLine("{0} - HOLDINGS FAIL", fund.ticker); fund.makeFailXml("HOLDINGS FAIL", captions, xmlPathR); } fund.remove(); } else { Console.WriteLine("{0} - HTML FAIL", fund.ticker); fund.makeFailXml("HTML FAIL", captions, xmlPathR); //errors.Error("0012"); //*ошибка с html функция_для_кривого_html(fund, captions); //captions - полные названия фондов в этом документе. Взято с предыдущей страницы edgar //fund.sumHoldings(); fund.MakeXml(xmlPathR); - для сохранения } } else { Console.WriteLine("{0} - TXT FAIL", fund.ticker); fund.makeFailXml("TXT FAIL", captions, xmlPathR); //NOTE - файл(doc) - txt. функция_для_txt(fund, captions); } } else { } } else { Console.Write("0013"); fund.status = 2; } } return(body); }
public void MakeFamily(Fund fund) { string urlForms = baseLink + "/cgi-bin/browse-edgar?CIK=" + fund.ticker + "&owner=exclude&action=getcompany&count=200&Find=Search"; HtmlNode body = tools.BodyNodeS(tools.Load(urlForms), htmlMakeAll); string familyName = ""; bool finded = false; if (body == null) { Console.WriteLine("{0} - NOT FOUND IN EDGAR", fund.ticker); fund.makeFailXml("NOT FOUND IN EDGAR", null, xmlPathR); //errors.Error("0008"); fund.status = 1; } else { fund.classcontract = takeClassContract(body); HtmlNode a = body.SelectSingleNode(".//p[@class='identInfo']/a[position()=2]"); if (a != null) { string link = baseLink + a.GetAttributeValue("href", ""); Console.WriteLine(link); HtmlNode bodyL = tools.BodyNodeS(tools.LoadWeb(link), htmlMakeAll); if (bodyL != null) { HtmlNodeCollection trs = bodyL.SelectNodes(".//table[@summary]//tr"); if (trs != null) { foreach (HtmlNode tr in trs) { HtmlNodeCollection tds = tr.SelectNodes(".//td"); if (tds != null) { int t = 0; foreach (HtmlNode td in tds) { HtmlNode aa = td.SelectSingleNode("./a"); if (!finded) { if (aa != null) { //Console.WriteLine(aa.GetAttributeValue("class", "")); if (aa.GetAttributeValue("class", "") == "") { familyName = aa.InnerText; } } if (td.InnerText == fund.classcontract) { finded = true; if (familyName != "" && tds[t + 1] != null) { setName(fund, familyName, tds[t + 1].InnerText); } } } if (t == 4) { Fund fundBonus = funds.Find(x => x.ticker.ToLower() == tds[t].InnerText.ToLower().Trim()); if (fundBonus != null && familyName != "") { setName(fundBonus, familyName, tds[t - 1].InnerText); } } t++; } } } } } } } }