예제 #1
0
        private SECSourceSubmissionInfo ExtractReportDetailsIndexHTML(SubmissionFile submissionIndexFile)
        {
            SECSourceSubmissionInfo subInfo = new SECSourceSubmissionInfo();


            string txtContent = System.Text.Encoding.Default.GetString(submissionIndexFile.Content.ToArray());

            var doc = new HtmlDocument();

            doc.LoadHtml(txtContent);


            HtmlNode nodeType = doc.DocumentNode.SelectSingleNode("//div[@id='formDiv']/div[@id='formHeader']/div[@id='formName']/strong"); //

            if (nodeType != null)
            {
                if (nodeType.InnerText.Equals("10-Q"))
                {
                    subInfo.Type = "10-Q";
                }

                else if (nodeType.InnerText.Equals("10-K"))
                {
                    subInfo.Type = "10-K";
                }

                else if (nodeType.InnerText.Equals("Form 424B2"))
                {
                    subInfo.Type = "424B2";
                }

                else if (nodeType.InnerText.Equals("Form 4"))
                {
                    subInfo.Type = "4";
                }

                else if (nodeType.InnerText.Equals("13F-HR"))
                {
                    subInfo.Type = "13F-HR";
                }

                if (!string.IsNullOrEmpty(subInfo.Type))
                {
                    // extracting dates
                    var nodesMetadata = doc.DocumentNode.SelectNodes("//div[@id='formDiv']/div/div/div[@class='infoHead']");
                    if (nodesMetadata != null)
                    {
                        foreach (HtmlNode node in nodesMetadata)
                        {
                            HtmlNode nodeDate = node.SelectSingleNode("../div[@class='info']");
                            if (nodeDate != null)
                            {
                                if (node.InnerText == "Accepted")
                                {
                                    subInfo.Submitted = DateTime.Parse(nodeDate.InnerText);
                                }
                                else if (node.InnerText == "Period of Report")
                                {
                                    subInfo.PeriodEnd = DateTime.Parse(nodeDate.InnerText);
                                }
                            }
                        }
                    }
                    // extracting report file name
                    HtmlNode nodeFilingData = doc.DocumentNode.SelectSingleNode("//div[@id='formDiv']/div/table/tr/td[text()='EX-101.INS']/..");
                    if (nodeFilingData == null)
                    {
                        switch (subInfo.Type)
                        {
                        case "10-Q":
                        case "10-K":
                            nodeFilingData = doc.DocumentNode.SelectSingleNode("//div[@id='formDiv']/div/table/tr/td[text()='XML']/..");
                            break;

                        case "4":
                        {
                            var fileNodes = doc.DocumentNode.SelectNodes("//div[@id='formDiv']/div/table/tr/td[text()='4']/..");
                            nodeFilingData = fileNodes.FirstOrDefault(x => x.SelectSingleNode("td/a") != null && x.SelectSingleNode("td/a").InnerText.IndexOf(".xml") >= 0);
                        }
                        break;

                        case "13F-HR":
                            subInfo.Report.Add("primary_doc.xml");
                            subInfo.Report.Add("form13fInfoTable.xml");
                            break;
                        }
                    }
                    if (nodeFilingData != null)
                    {
                        HtmlNode nodeFileName = nodeFilingData.SelectSingleNode("td/a");
                        subInfo.Report.Add(nodeFileName.InnerText.Trim());
                    }
                }
            }

            return(subInfo);
        }
예제 #2
0
        public ISourceSubmissionsInfoResult GetSubmissionsInfo(ISourceSubmissionsInfoParams infoParams)
        {
            ISourceSubmissionsInfoResult result = new SECSourceSubmissionsInfoResult();

            SECSourceSubmissionsInfoParams secInfoParams = infoParams as SECSourceSubmissionsInfoParams;

            if (secInfoParams != null)
            {
                string cik = _dictionary.LookupRegulatorCompanyCode(infoParams.RegulatorCode, infoParams.CompanyCode); // TODO: lookup in dictionary
                // for each submission - extracting content and checking type
                int count = 0;
                foreach (var item in secInfoParams.Items)
                {
                    Submission submission = !_extractFromStorage?
                                            GetSubmissionFromApi(cik, item.Name) :
                                                GetSubmissionFromStorage(infoParams.RegulatorCode, infoParams.CompanyCode, item.Name);

                    if (submission != null)
                    {
                        try
                        {
                            // extracting txt index file
                            SubmissionFileInfo subFileInfo = submission.Files.FirstOrDefault(s => s.Name.Contains("-index.html"));
                            SubmissionFile     indexFile   = null;

                            if (subFileInfo != null)
                            {
                                indexFile = !_extractFromStorage?
                                            _secApi.ArchivesEdgarDataCIKSubmissionFile(cik, item.Name, subFileInfo.Name) :
                                                LoadFromStorage(infoParams.RegulatorCode, infoParams.CompanyCode, item.Name, subFileInfo.Name);
                            }

                            if (indexFile != null)
                            {
                                SECSourceSubmissionInfo submissionInfo = ExtractReportDetailsIndexHTML(indexFile);
                                if (submissionInfo != null && !string.IsNullOrEmpty(submissionInfo.Type))
                                {
                                    if (!_extractFromStorage)
                                    {
                                        PutToStorage(infoParams.RegulatorCode, infoParams.CompanyCode, submission.Name, indexFile);
                                    }

                                    submissionInfo.Name = item.Name;
                                    result.Submissions.Add(submissionInfo);
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            result.AddError(new Error()
                            {
                                Code = EErrorCodes.ImporterError, Message = string.Format("Report'{0}', Error: {1}", item.Name, ex.Message)
                            });
                        }
                    }
                    else
                    {
                        result.Errors.Add(new Error()
                        {
                            Code = EErrorCodes.SubmissionNotFound, Type = EErrorType.Warning, Message = string.Format("Submission '{0}' was not found", item.Name)
                        });
                    }

                    ++count;
                }

                result.Success = true;
            }
            else
            {
                result.Success = false;
                result.Errors.Add(new Error()
                {
                    Code = EErrorCodes.InvalidSourceParams, Type = EErrorType.Error, Message = "Invalid parameter provided"
                });
            }

            return(result);
        }