private FinanceReport ParseReport(string code, string[] lines) { FinanceReport report = new FinanceReport(); int lineIndex = 0; string companyName; if (!ParseHeader(code, lines, ref lineIndex, out companyName)) { _errorWriter.WriteLine("Failed to parse report header"); return(null); } report.CompanyCode = code; report.CompanyName = companyName; while (lineIndex < lines.Length) { int currentLineIndex = lineIndex; FinanceReportTable table = ParseTable(lines, ref lineIndex); if (table != null && table.Name != "环比分析") { report.AddTable(table); } else { report.Annotations = string.Join( Environment.NewLine, Enumerable .Range(currentLineIndex, lines.Length - currentLineIndex) .Select(i => lines[i])); break; } } return(report); }
private FinanceReportTable ParseTable(string[] lines, ref int lineIndex) { const string UnknownTableName = "<unknown table name>"; string tableName; // parse table name int startLineIndex = lineIndex; if (!ParseTableName(lines, ref lineIndex, out tableName)) { _errorWriter.WriteLine("failed to find table name between line {0}~{1}", startLineIndex, lineIndex); tableName = UnknownTableName; } if (tableName != UnknownTableName) { // get normalized table name tableName = _dataDictionary.GetNormalizedTableName(tableName); } // parse table definition, include row definiton and column definition string rowDefinition; string[] columnDefinitions; startLineIndex = lineIndex; if (!ParseTableDefinition(lines, ref lineIndex, out rowDefinition, out columnDefinitions)) { _errorWriter.WriteLine("failed to parse table definition from line {0}", startLineIndex); return(null); } // get normalized column definitions columnDefinitions = columnDefinitions.Select(s => _dataDictionary.GetNormalizedColumnName(tableName, s)).ToArray(); FinanceReportTable table = new FinanceReportTable(tableName, rowDefinition, columnDefinitions); // find out all possible rows startLineIndex = lineIndex; while (lineIndex < lines.Length) { string currentLine = lines[lineIndex]; if (currentLine.StartsWith(TableLastRowStartPattern)) // end of table { lineIndex++; break; } if (currentLine.StartsWith(EffectiveRowStartPattern) || currentLine.StartsWith(TableSectionSeparatorStartPattern)) { lineIndex++; continue; } // not a valid line in table break; } int endLineIndex = lineIndex; if (startLineIndex == endLineIndex) { // empty table return(table); } // get all cells string[][] cells = Enumerable .Range(startLineIndex, endLineIndex - startLineIndex) .Select(i => lines[i]) .SkipWhile(s => !s.StartsWith(EffectiveRowStartPattern)) .Select(s => s.Split(new string[] { RowCellSeparator }, StringSplitOptions.RemoveEmptyEntries).Select(x => x.Trim()).ToArray()) .ToArray(); // clean up cells cells = CleanUpCells(cells, table.ColumnCount); FinanceReportColumnDefinition[] tableColumnDefinitions = table.ColumnDefinitions.ToArray(); // create rows and adjust cell values according to unit. bool isHbfxTable = (table.Name == "环比分析"); // last chance of getting table name according to row names. if (table.Name == UnknownTableName) { var tableNamesList = cells .Select(rc => isHbfxTable ? table.RowDefinition + rc[0] : rc[0]) .Select(rowName => _dataDictionary.GetPossibleNormalizedTableNameByRowNameAlias(GetCleanedRowName(rowName))) .ToList(); IEnumerable <string> tableNames = new List <string>(); if (tableNamesList.Count == 1) { tableNames = tableNamesList[0]; } else if (tableNamesList.Count > 1) { tableNames = tableNamesList[0]; for (int i = 1; i < tableNamesList.Count; ++i) { tableNames = tableNames.Intersect(tableNamesList[i]); } } if (tableNames.Count() == 0) { _errorWriter.WriteLine("failed to guess table name from row names, no table contains all row names"); return(null); } else if (tableNames.Count() > 1) { _errorWriter.WriteLine("failed to guess table name from row names, more than one tables contain all row names"); return(null); } // now we can set the table name to the unique possibility table.ResetTableName(tableNames.First()); _errorWriter.WriteLine("find table name {0} from row names", table.Name); } foreach (var rowCells in cells) { string rowName = isHbfxTable ? table.RowDefinition + rowCells[0] : rowCells[0]; // get normalized row name rowName = _dataDictionary.GetNormalizedRowName(table.Name, rowName); int rowIndex = table.AddRow(rowName); //if (rowCells[0] == "筹资活动产生的现金流出小计") //{ // Console.WriteLine(".........."); // Console.ReadKey(); //} FinanceReportRow row = table[rowIndex]; for (int i = 0; i < row.Length; ++i) { row[i].Parse(rowCells[i + 1], tableColumnDefinitions[i].HasUnit ? tableColumnDefinitions[i].Unit : row.Unit); } } return(table); }
private static void CreateRevenueTableForLast12Months(IEnumerable <FinanceReport> reports) { foreach (var report in reports) { // assume the tables in report has been expanded and merged. var tables = report.Tables.Where(t => t.Name == "利润表"); var financeReportTables = tables as FinanceReportTable[] ?? tables.ToArray(); if (!financeReportTables.Any()) { continue; } if (financeReportTables.Count() > 1) { throw new InvalidOperationException( string.Format("there are more than one revenue table in the report for company {0}", report.CompanyCode)); } var table = financeReportTables.First(); var columns = table.ColumnDefinitions.ToArray(); for (var i = 0; i < columns.Length; ++i) { columns[i].Tag = i; } var dateColumns = columns .Where(c => c.Type == FinanceReportColumnDefinition.ColumnType.Date) .OrderByDescending(c => c.Date) .ToArray(); if (dateColumns.Length == 0) { continue; } // find first non-empty column (it is important to avoid outliers) FinanceReportColumnDefinition firstActiveColumn = null; for (var i = 0; i < dateColumns.Length; ++i) { int i1 = i; if (table.Rows.Any(r => r[dateColumns[i1].Tag].Type == FinanceReportCell.CellType.Decimal) && dateColumns[i].Date < DateTime.Now) { firstActiveColumn = dateColumns[i]; break; } } if (firstActiveColumn == null) { return; } var newColumns = new[] { firstActiveColumn }; var newRevenueTable = new FinanceReportTable( "跨年度利润表", table.RowDefinition, table.Unit, newColumns); if (firstActiveColumn.Date.Month == 12) // 年报 { // just copy rows foreach (var row in table.Rows) { var rowIndex = newRevenueTable.AddRow(row.Name); newRevenueTable[rowIndex][0].Copy(row[firstActiveColumn.Tag]); } report.AddTable(newRevenueTable); } else { // not yearly report, it could be seasonal report or half year report // so we need to get last 12 month revenue data by 3 data: // Last 12 month data = latest data + lastest annual report data - last year corresponding month data. // for example: // data(2012/9~2013/9) = data(2013/9)+data(2012/12)-data(2012/9) var firstColumnIndex = firstActiveColumn.Tag; var secondColumnIndex = -1; foreach (FinanceReportColumnDefinition t in dateColumns) { if (t.Date.Year == firstActiveColumn.Date.Year - 1 && t.Date.Month == 12) { secondColumnIndex = t.Tag; break; } } if (secondColumnIndex < 0) { // skip new table, just return; return; } var thirdColumnIndex = -1; foreach (FinanceReportColumnDefinition t in dateColumns) { if (t.Date.Year == firstActiveColumn.Date.Year - 1 && t.Date.Month == firstActiveColumn.Date.Month) { thirdColumnIndex = t.Tag; break; } } if (thirdColumnIndex < 0) { // skip new table, just return; return; } foreach (var row in table.Rows) { var v1 = GetCellDecimalValue(row[firstColumnIndex]); var v2 = GetCellDecimalValue(row[secondColumnIndex]); var v3 = GetCellDecimalValue(row[thirdColumnIndex]); var rowIndex = newRevenueTable.AddRow(row.Name); newRevenueTable[rowIndex][0].DecimalValue = v1 + v2 - v3; } report.AddTable(newRevenueTable); } } }