public virtual IFilingParserResult Parse(IFilingParserParams parserParams) { ResetState(); SECParserParams secParams = parserParams as SECParserParams; SECParserResult result = new SECParserResult(); try { ValidateFile(secParams, result); if (result.Success) { var doc = OpenDocument(secParams); if (doc != null) { InitNsManager(doc); ExtractContexts(doc, result); ExtractCompanyData(doc, result); ExtractFilingData(doc, result); ExtractValues(doc, secParams, result); } } } catch (Exception ex) { result.Success = false; result.AddError(EErrorCodes.ParserError, EErrorType.Error, ex.Message); } return(result); }
protected void ExtractValues(XmlDocument doc, SECParserParams secParams, SECParserResult secResult) { foreach (var s in _sections.Values) { ParseStatementSection(doc, secParams, secResult, s); } }
protected void ExtractFilingData(XmlDocument doc, SECParserResult secResult) { string[][] tags = { new string[] { "dei:DocumentPeriodEndDate", "DocumentPeriodEndDate" }, new string[] { "dei:DocumentType", "DocumentType" }, new string[] { "dei:DocumentFiscalYearFocus", "DocumentFiscalYearFocus" }, new string[] { "dei:DocumentFiscalPeriodFocus", "DocumentFiscalPeriodFocus" }, }; ExtractXmlData(doc, secResult, tags, secResult.FilingData); // separately extracting end date - using contexts if (secResult.FilingData.ContainsKey("DocumentPeriodEndDate") && secResult.FilingData.ContainsKey("DocumentType")) { DateTime endDate = DateTime.Parse(secResult.FilingData["DocumentPeriodEndDate"]); string type = secResult.FilingData["DocumentType"]; foreach (var ctx in secResult.Contexts) { // TODO: WARNING! this supports only 10-K and 10-Q report types - need to change in future for other types of reports if (ctx.StartDate != DateTime.MinValue && (endDate - ctx.StartDate).Days >= (type == "10-Q" ? 80 /*10-Q*/ : 350 /*10-K*/) && (endDate - ctx.StartDate).Days <= (type == "10-Q" ? 100 /*10-Q*/ : 370 /*10-K*/) ) { secResult.FilingData["DocumentPeriodStartDate"] = ctx.StartDate.ToShortDateString(); break; } } } }
protected void ValidateFile(SECParserParams secParams, SECParserResult secResult) { if (secParams.FileContent == null || !secParams.FileContent.Values.ElementAt(0).CanRead) { secResult.Success = false; secResult.AddError(EErrorCodes.FileNotFound, EErrorType.Error, "Stream is unaccessable"); } }
protected void ExtractXmlData(XmlDocument doc, SECParserResult secResult, string[][] tags, Dictionary <string, string> values) { foreach (var pair in tags) { XmlNodeList nodes = doc.GetElementsByTagName(pair[0]); if (nodes != null && nodes.Count > 0) { values.Add(pair[1], nodes[0].InnerText); } } }
protected void ExtractCompanyData(XmlDocument doc, SECParserResult secResult) { string[][] tags = { new string[] { "dei:EntityRegistrantName", "EntityRegistrantName" }, new string[] { "dei:TradingSymbol", "TradingSymbol" }, new string[] { "dei:EntityCentralIndexKey", "EntityCentralIndexKey" } }; ExtractXmlData(doc, secResult, tags, secResult.CompanyData); }
protected void ParseStatementSection(XmlDocument doc, SECParserParams secParams, SECParserResult result, Section section) { // preparing statements Statement statementSection = new Statement(section.Name); foreach (var value in section.ValueTags.Values) { foreach (var context in result.Contexts) { string xpath = "//" + value.Tag + "[@contextRef='" + (context.ID + (!string.IsNullOrEmpty(value.Suffix) ? value.Suffix : string.Empty)) + "']"; XmlNode valueTag = doc.SelectSingleNode(xpath, _nsmgr); if (valueTag != null) { object valObject = null; string innerText = valueTag.InnerText.Trim(); Decimal valDecimal; DateTime valDateTime; if (Decimal.TryParse(innerText, out valDecimal) && secParams.ExtractDates) { valObject = valDecimal; } else { if (DateTime.TryParse(innerText, out valDateTime) && secParams.ExtractDates) { valObject = valDateTime; } else { if (secParams.ExtractStrings) { valObject = innerText; } } } if (valObject != null) { StatementRecord record = new StatementRecord( value.Code, valObject, valueTag.Attributes["unitRef"] != null ? valueTag.Attributes["unitRef"].Value : null, context.StartDate, context.EndDate, context.Instant, valueTag.Attributes["id"] != null ? valueTag.Attributes["id"].Value : null ); if (!statementSection.Records.Contains(record)) { statementSection.Records.Add(record); } } } } } result.Statements.Add(statementSection); }
protected void ExtractContexts(XmlDocument doc, SECParserResult secResult) { XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable); nsmgr.AddNamespace("xbrli", "http://www.xbrl.org/2003/instance"); nsmgr.AddNamespace("xbrll", "http://www.xbrl.org/2003/linkbase"); nsmgr.AddNamespace("df", doc.DocumentElement.NamespaceURI); Dictionary <string, List <string> > tags = new Dictionary <string, List <string> >(); tags.Add("xbrli:context", new List <string>(new string[] { "xbrli:period", "xbrli:startDate", "xbrli:endDate", "xbrli:instant", "xbrli:entity/xbrli:segment" })); tags.Add("context", new List <string>(new string[] { "df:period", "df:startDate", "df:endDate", "df:instant", "df:entity/df:segment" })); string currContextTagName = string.Empty; XmlNodeList contextTags = null; int currKey = 0; do { currContextTagName = tags.Keys.ElementAt(currKey); contextTags = doc.GetElementsByTagName(currContextTagName); // "xbrli:context" // TODO: This need to be rewritten with Xpath requests - currently Xpath doesn't work on expressions like period/startDate for unknown reason if (contextTags != null) { foreach (XmlNode contextTag in contextTags) { if (contextTag.SelectSingleNode(tags[currContextTagName][4], nsmgr) == null) { string ID = contextTag.Attributes["id"].Value; DateTime startDate = DateTime.MinValue; DateTime endDate = DateTime.MinValue; DateTime instant = DateTime.MinValue; XmlNode tagStartDate = contextTag.SelectSingleNode(tags[currContextTagName][0] + "/" + tags[currContextTagName][1], nsmgr); // "xbrli:period/xbrli:startDate" XmlNode tagEndDate = contextTag.SelectSingleNode(tags[currContextTagName][0] + "/" + tags[currContextTagName][2], nsmgr); // "xbrli:period/xbrli:endDate" XmlNode tagInstant = contextTag.SelectSingleNode(tags[currContextTagName][0] + "/" + tags[currContextTagName][3], nsmgr); // "xbrli:period/xbrli:instant" if (tagStartDate != null) { startDate = DateTime.Parse(tagStartDate.InnerText); } if (tagEndDate != null) { endDate = DateTime.Parse(tagEndDate.InnerText); } if (tagInstant != null) { instant = DateTime.Parse(tagInstant.InnerText); } FilingContex context = new FilingContex(ID, startDate, endDate, instant); secResult.Contexts.Add(context); } } } ++currKey; }while (currKey < tags.Keys.Count); }