private SECSourceSubmissionInfo ExtractReportDetailsIndexHTML(SubmissionFile submissionIndexFile) { SECSourceSubmissionInfo subInfo = new SECSourceSubmissionInfo(); string txtContent = System.Text.Encoding.Default.GetString(submissionIndexFile.Content.ToArray()); var doc = new HtmlDocument(); doc.LoadHtml(txtContent); HtmlNode nodeType = doc.DocumentNode.SelectSingleNode("//div[@id='formDiv']/div[@id='formHeader']/div[@id='formName']/strong"); // if (nodeType != null) { if (nodeType.InnerText.Equals("10-Q")) { subInfo.Type = "10-Q"; } else if (nodeType.InnerText.Equals("10-K")) { subInfo.Type = "10-K"; } else if (nodeType.InnerText.Equals("Form 424B2")) { subInfo.Type = "424B2"; } else if (nodeType.InnerText.Equals("Form 4")) { subInfo.Type = "4"; } else if (nodeType.InnerText.Equals("13F-HR")) { subInfo.Type = "13F-HR"; } if (!string.IsNullOrEmpty(subInfo.Type)) { // extracting dates var nodesMetadata = doc.DocumentNode.SelectNodes("//div[@id='formDiv']/div/div/div[@class='infoHead']"); if (nodesMetadata != null) { foreach (HtmlNode node in nodesMetadata) { HtmlNode nodeDate = node.SelectSingleNode("../div[@class='info']"); if (nodeDate != null) { if (node.InnerText == "Accepted") { subInfo.Submitted = DateTime.Parse(nodeDate.InnerText); } else if (node.InnerText == "Period of Report") { subInfo.PeriodEnd = DateTime.Parse(nodeDate.InnerText); } } } } // extracting report file name HtmlNode nodeFilingData = doc.DocumentNode.SelectSingleNode("//div[@id='formDiv']/div/table/tr/td[text()='EX-101.INS']/.."); if (nodeFilingData == null) { switch (subInfo.Type) { case "10-Q": case "10-K": nodeFilingData = doc.DocumentNode.SelectSingleNode("//div[@id='formDiv']/div/table/tr/td[text()='XML']/.."); break; case "4": { var fileNodes = doc.DocumentNode.SelectNodes("//div[@id='formDiv']/div/table/tr/td[text()='4']/.."); nodeFilingData = fileNodes.FirstOrDefault(x => x.SelectSingleNode("td/a") != null && x.SelectSingleNode("td/a").InnerText.IndexOf(".xml") >= 0); } break; case "13F-HR": subInfo.Report.Add("primary_doc.xml"); subInfo.Report.Add("form13fInfoTable.xml"); break; } } if (nodeFilingData != null) { HtmlNode nodeFileName = nodeFilingData.SelectSingleNode("td/a"); subInfo.Report.Add(nodeFileName.InnerText.Trim()); } } } return(subInfo); }
public ISourceSubmissionsInfoResult GetSubmissionsInfo(ISourceSubmissionsInfoParams infoParams) { ISourceSubmissionsInfoResult result = new SECSourceSubmissionsInfoResult(); SECSourceSubmissionsInfoParams secInfoParams = infoParams as SECSourceSubmissionsInfoParams; if (secInfoParams != null) { string cik = _dictionary.LookupRegulatorCompanyCode(infoParams.RegulatorCode, infoParams.CompanyCode); // TODO: lookup in dictionary // for each submission - extracting content and checking type int count = 0; foreach (var item in secInfoParams.Items) { Submission submission = !_extractFromStorage? GetSubmissionFromApi(cik, item.Name) : GetSubmissionFromStorage(infoParams.RegulatorCode, infoParams.CompanyCode, item.Name); if (submission != null) { try { // extracting txt index file SubmissionFileInfo subFileInfo = submission.Files.FirstOrDefault(s => s.Name.Contains("-index.html")); SubmissionFile indexFile = null; if (subFileInfo != null) { indexFile = !_extractFromStorage? _secApi.ArchivesEdgarDataCIKSubmissionFile(cik, item.Name, subFileInfo.Name) : LoadFromStorage(infoParams.RegulatorCode, infoParams.CompanyCode, item.Name, subFileInfo.Name); } if (indexFile != null) { SECSourceSubmissionInfo submissionInfo = ExtractReportDetailsIndexHTML(indexFile); if (submissionInfo != null && !string.IsNullOrEmpty(submissionInfo.Type)) { if (!_extractFromStorage) { PutToStorage(infoParams.RegulatorCode, infoParams.CompanyCode, submission.Name, indexFile); } submissionInfo.Name = item.Name; result.Submissions.Add(submissionInfo); } } } catch (Exception ex) { result.AddError(new Error() { Code = EErrorCodes.ImporterError, Message = string.Format("Report'{0}', Error: {1}", item.Name, ex.Message) }); } } else { result.Errors.Add(new Error() { Code = EErrorCodes.SubmissionNotFound, Type = EErrorType.Warning, Message = string.Format("Submission '{0}' was not found", item.Name) }); } ++count; } result.Success = true; } else { result.Success = false; result.Errors.Add(new Error() { Code = EErrorCodes.InvalidSourceParams, Type = EErrorType.Error, Message = "Invalid parameter provided" }); } return(result); }