private FinanceReport ParseReport(string code, string[] lines)
        {
            FinanceReport report = new FinanceReport();

            int lineIndex = 0;

            string companyName;

            if (!ParseHeader(code, lines, ref lineIndex, out companyName))
            {
                _errorWriter.WriteLine("Failed to parse report header");
                return(null);
            }

            report.CompanyCode = code;
            report.CompanyName = companyName;

            while (lineIndex < lines.Length)
            {
                int currentLineIndex = lineIndex;

                FinanceReportTable table = ParseTable(lines, ref lineIndex);

                if (table != null && table.Name != "环比分析")
                {
                    report.AddTable(table);
                }
                else
                {
                    report.Annotations = string.Join(
                        Environment.NewLine,
                        Enumerable
                        .Range(currentLineIndex, lines.Length - currentLineIndex)
                        .Select(i => lines[i]));

                    break;
                }
            }

            return(report);
        }
        private FinanceReportTable ParseTable(string[] lines, ref int lineIndex)
        {
            const string UnknownTableName = "<unknown table name>";

            string tableName;

            // parse table name
            int startLineIndex = lineIndex;

            if (!ParseTableName(lines, ref lineIndex, out tableName))
            {
                _errorWriter.WriteLine("failed to find table name between line {0}~{1}", startLineIndex, lineIndex);
                tableName = UnknownTableName;
            }

            if (tableName != UnknownTableName)
            {
                // get normalized table name
                tableName = _dataDictionary.GetNormalizedTableName(tableName);
            }

            // parse table definition, include row definiton and column definition
            string rowDefinition;

            string[] columnDefinitions;

            startLineIndex = lineIndex;
            if (!ParseTableDefinition(lines, ref lineIndex, out rowDefinition, out columnDefinitions))
            {
                _errorWriter.WriteLine("failed to parse table definition from line {0}", startLineIndex);

                return(null);
            }

            // get normalized column definitions
            columnDefinitions = columnDefinitions.Select(s => _dataDictionary.GetNormalizedColumnName(tableName, s)).ToArray();

            FinanceReportTable table = new FinanceReportTable(tableName, rowDefinition, columnDefinitions);

            // find out all possible rows
            startLineIndex = lineIndex;
            while (lineIndex < lines.Length)
            {
                string currentLine = lines[lineIndex];

                if (currentLine.StartsWith(TableLastRowStartPattern)) // end of table
                {
                    lineIndex++;
                    break;
                }

                if (currentLine.StartsWith(EffectiveRowStartPattern) || currentLine.StartsWith(TableSectionSeparatorStartPattern))
                {
                    lineIndex++;
                    continue;
                }

                // not a valid line in table
                break;
            }

            int endLineIndex = lineIndex;

            if (startLineIndex == endLineIndex)
            {
                // empty table
                return(table);
            }

            // get all cells
            string[][] cells =
                Enumerable
                .Range(startLineIndex, endLineIndex - startLineIndex)
                .Select(i => lines[i])
                .SkipWhile(s => !s.StartsWith(EffectiveRowStartPattern))
                .Select(s => s.Split(new string[] { RowCellSeparator }, StringSplitOptions.RemoveEmptyEntries).Select(x => x.Trim()).ToArray())
                .ToArray();

            // clean up cells
            cells = CleanUpCells(cells, table.ColumnCount);

            FinanceReportColumnDefinition[] tableColumnDefinitions = table.ColumnDefinitions.ToArray();

            // create rows and adjust cell values according to unit.
            bool isHbfxTable = (table.Name == "环比分析");

            // last chance of getting table name according to row names.
            if (table.Name == UnknownTableName)
            {
                var tableNamesList = cells
                                     .Select(rc => isHbfxTable ? table.RowDefinition + rc[0] : rc[0])
                                     .Select(rowName => _dataDictionary.GetPossibleNormalizedTableNameByRowNameAlias(GetCleanedRowName(rowName)))
                                     .ToList();

                IEnumerable <string> tableNames = new List <string>();

                if (tableNamesList.Count == 1)
                {
                    tableNames = tableNamesList[0];
                }
                else if (tableNamesList.Count > 1)
                {
                    tableNames = tableNamesList[0];

                    for (int i = 1; i < tableNamesList.Count; ++i)
                    {
                        tableNames = tableNames.Intersect(tableNamesList[i]);
                    }
                }

                if (tableNames.Count() == 0)
                {
                    _errorWriter.WriteLine("failed to guess table name from row names, no table contains all row names");

                    return(null);
                }
                else if (tableNames.Count() > 1)
                {
                    _errorWriter.WriteLine("failed to guess table name from row names, more than one tables contain all row names");

                    return(null);
                }

                // now we can set the table name to the unique possibility
                table.ResetTableName(tableNames.First());
                _errorWriter.WriteLine("find table name {0} from row names", table.Name);
            }

            foreach (var rowCells in cells)
            {
                string rowName = isHbfxTable ? table.RowDefinition + rowCells[0] : rowCells[0];

                // get normalized row name
                rowName = _dataDictionary.GetNormalizedRowName(table.Name, rowName);

                int rowIndex = table.AddRow(rowName);

                //if (rowCells[0] == "筹资活动产生的现金流出小计")
                //{
                //    Console.WriteLine("..........");
                //    Console.ReadKey();
                //}

                FinanceReportRow row = table[rowIndex];

                for (int i = 0; i < row.Length; ++i)
                {
                    row[i].Parse(rowCells[i + 1], tableColumnDefinitions[i].HasUnit ? tableColumnDefinitions[i].Unit : row.Unit);
                }
            }

            return(table);
        }
Ejemplo n.º 3
0
        private static void CreateRevenueTableForLast12Months(IEnumerable <FinanceReport> reports)
        {
            foreach (var report in reports)
            {
                // assume the tables in report has been expanded and merged.
                var tables = report.Tables.Where(t => t.Name == "利润表");
                var financeReportTables = tables as FinanceReportTable[] ?? tables.ToArray();
                if (!financeReportTables.Any())
                {
                    continue;
                }

                if (financeReportTables.Count() > 1)
                {
                    throw new InvalidOperationException(
                              string.Format("there are more than one revenue table in the report for company {0}", report.CompanyCode));
                }

                var table = financeReportTables.First();

                var columns = table.ColumnDefinitions.ToArray();
                for (var i = 0; i < columns.Length; ++i)
                {
                    columns[i].Tag = i;
                }

                var dateColumns = columns
                                  .Where(c => c.Type == FinanceReportColumnDefinition.ColumnType.Date)
                                  .OrderByDescending(c => c.Date)
                                  .ToArray();

                if (dateColumns.Length == 0)
                {
                    continue;
                }

                // find first non-empty column (it is important to avoid outliers)
                FinanceReportColumnDefinition firstActiveColumn = null;
                for (var i = 0; i < dateColumns.Length; ++i)
                {
                    int i1 = i;
                    if (table.Rows.Any(r => r[dateColumns[i1].Tag].Type == FinanceReportCell.CellType.Decimal) &&
                        dateColumns[i].Date < DateTime.Now)
                    {
                        firstActiveColumn = dateColumns[i];
                        break;
                    }
                }

                if (firstActiveColumn == null)
                {
                    return;
                }

                var newColumns = new[] { firstActiveColumn };

                var newRevenueTable
                    = new FinanceReportTable(
                          "跨年度利润表",
                          table.RowDefinition,
                          table.Unit,
                          newColumns);

                if (firstActiveColumn.Date.Month == 12) // 年报
                {
                    // just copy rows
                    foreach (var row in table.Rows)
                    {
                        var rowIndex = newRevenueTable.AddRow(row.Name);
                        newRevenueTable[rowIndex][0].Copy(row[firstActiveColumn.Tag]);
                    }

                    report.AddTable(newRevenueTable);
                }
                else
                {
                    // not yearly report, it could be seasonal report or half year report
                    // so we need to get last 12 month revenue data by 3 data:
                    // Last 12 month data = latest data + lastest annual report data - last year corresponding month data.
                    // for example:
                    //    data(2012/9~2013/9) = data(2013/9)+data(2012/12)-data(2012/9)
                    var firstColumnIndex = firstActiveColumn.Tag;

                    var secondColumnIndex = -1;
                    foreach (FinanceReportColumnDefinition t in dateColumns)
                    {
                        if (t.Date.Year == firstActiveColumn.Date.Year - 1 &&
                            t.Date.Month == 12)
                        {
                            secondColumnIndex = t.Tag;
                            break;
                        }
                    }

                    if (secondColumnIndex < 0)
                    {
                        // skip new table, just return;
                        return;
                    }

                    var thirdColumnIndex = -1;
                    foreach (FinanceReportColumnDefinition t in dateColumns)
                    {
                        if (t.Date.Year == firstActiveColumn.Date.Year - 1 &&
                            t.Date.Month == firstActiveColumn.Date.Month)
                        {
                            thirdColumnIndex = t.Tag;
                            break;
                        }
                    }

                    if (thirdColumnIndex < 0)
                    {
                        // skip new table, just return;
                        return;
                    }

                    foreach (var row in table.Rows)
                    {
                        var v1 = GetCellDecimalValue(row[firstColumnIndex]);
                        var v2 = GetCellDecimalValue(row[secondColumnIndex]);
                        var v3 = GetCellDecimalValue(row[thirdColumnIndex]);

                        var rowIndex = newRevenueTable.AddRow(row.Name);
                        newRevenueTable[rowIndex][0].DecimalValue = v1 + v2 - v3;
                    }

                    report.AddTable(newRevenueTable);
                }
            }
        }