private void Parse(string filepath, string filename, string hash) { // open var wb = _excel_app.Workbooks.Open(filepath); var worksheets = wb.Worksheets; Trace.WriteLine($"Worksheets.Count={worksheets.Count}"); // load all cells foreach (Excel.Worksheet current_sheet in worksheets) { if (current_sheet.Name.IndexOf('#') != -1) { continue; } var range = current_sheet.UsedRange; Trace.WriteLine($"ActiveSheet.UsedRange Rows={range.Rows.Count}, Columns={range.Columns.Count} <{current_sheet.Name}>"); int row_count = range.Rows.Count; int column_count = range.Columns.Count; if (row_count > 100000 || column_count > 100000) { throw new ApplicationException($"COUNT OVERFLOW ; ActiveSheet.UsedRange Rows={range.Rows.Count}, Columns={range.Columns.Count} <{current_sheet.Name}>"); } for (int i = 1; i <= row_count; ++i) { for (int j = 1; j <= column_count; ++j) { var cell = current_sheet.Cells[i, j]; if (cell.Value2 != null) { string cell_string = cell.Value2.ToString(); if (cell_string.Equals(LABEL_TABLE)) { // 테이블 디텍트 TableRange table_range = DetectTable(current_sheet, i, j); // raw data 생성 TableRawData raw_data = ParseRawTable(current_sheet, ref table_range, filename, hash); _table_raw_data_store.RawDataList.Add(raw_data); Trace.WriteLine($"Table<{raw_data.TableName}> SR:{table_range.start_row} SC:{table_range.start_column} ER:{table_range.end_row} EC:{table_range.end_column}"); } } } } Marshal.FinalReleaseComObject(current_sheet); } // close Marshal.FinalReleaseComObject(worksheets); wb.Close(SaveChanges: false); Marshal.FinalReleaseComObject(wb); }
private TableRawData ParseRawTable(Excel.Worksheet worksheet, ref TableRange range, string filename, string hash) { TableRawData raw_data = new TableRawData(); if (worksheet.Cells[range.start_row, range.start_column + 1].Value2 == null) { throw new ApplicationException("테이블 이름이 없습니다"); } // 테이블 이름 raw_data.TableName = worksheet.Cells[range.start_row, range.start_column + 1].Value2.ToString().Trim(); // 파일명 및 해시 raw_data.FileName = filename; raw_data.Hash = hash; // 필드 이름/타입 raw_data.FieldNames = new List <string>(range.end_column - range.start_column - 1); raw_data.FieldNameCounts = new List <TableRawFieldNameCount>(range.end_column - range.start_column - 1); raw_data.FieldTypes = new List <TableFieldType>(range.end_column - range.start_column - 1); raw_data.FieldTypeNames = new List <string>(range.end_column - range.start_column - 1); List <int> skipped_columns = new List <int>(); for (int column = range.start_column + 1; column <= range.end_column - 1; ++column) { string field_name = worksheet.Cells[range.start_row + 1, column].Value2.ToString().Trim(); if (field_name.IndexOf('#') != -1) { skipped_columns.Add(column); continue; } raw_data.FieldNames.Add(field_name); var field_name_counts = raw_data.FieldNameCounts.Find(x => x.FieldName == field_name); if (field_name_counts == null) { field_name_counts = new TableRawFieldNameCount() { FieldName = field_name, Count = 0 }; raw_data.FieldNameCounts.Add(field_name_counts); } field_name_counts.Count += 1; if (worksheet.Cells[range.start_row + 2, column].Value2 == null) { throw new ApplicationException(string.Format("필드 이름 없음 - Table={0}, FieldName={1}", raw_data.TableName, field_name)); } string field_type_name = worksheet.Cells[range.start_row + 2, column].Value2.ToString().Trim(); raw_data.FieldTypes.Add(GetFieldType(field_type_name)); raw_data.FieldTypeNames.Add(field_type_name); } // 레코드 raw_data.Records = new List <TableRawRecords>(range.end_row - range.start_row - 3); { var start_cell = (Excel.Range)worksheet.Cells[range.start_row + 3, range.start_column + 1]; var end_cell = (Excel.Range)worksheet.Cells[range.end_row - 1, range.end_column - 1]; var range_cell = worksheet.get_Range(start_cell, end_cell); var range_values = (object[, ])range_cell.Value2; int row_count = range_values.GetLength(0); int col_count = range_values.GetLength(1); if (row_count == 0) { throw new ApplicationException("테이블 내용이 없습니다: " + raw_data.TableName); } for (int row = 1; row <= row_count; ++row) { string[] record_array = new string[raw_data.FieldNames.Count]; int index = 0; for (int col = 1; col <= col_count; ++col) { if (skipped_columns.Contains(range.start_column + col)) { //Trace.WriteLine("필드 넘김: {0} {1}", range.start_column + col, range_values[row, col]); continue; } var cell = range_values[row, col]; if (cell != null) { record_array[index++] = Convert.ToString(cell); } else { record_array[index++] = null; } } raw_data.Records.Add(new TableRawRecords(record_array)); } } return(raw_data); }
private TableRange DetectTable2(Excel.Worksheet worksheet, int table_start_row, int table_start_column) { var range = worksheet.UsedRange; int max_row = range.Rows.Count + 1; int max_column = range.Columns.Count + 1; bool detect_eot = false; bool detect_eof = false; TableRange table_range = new TableRange(); table_range.start_row = table_start_row; table_range.start_column = table_start_column; var start_cell = (Excel.Range)worksheet.Cells[table_start_row + 1, table_start_column]; var end_cell = (Excel.Range)worksheet.Cells[max_row, max_column]; var range_cell = worksheet.get_Range(start_cell, end_cell); var range_values = (object[, ])range_cell.Value2; for (int column = table_start_column; column <= max_column; ++column) { for (int row = table_start_row + 1; row <= max_row; ++row) { int cell_row = row - table_start_row; int cell_col = column - table_start_column + 1; var cell = range_values[cell_row, cell_col]; if (cell != null) { string cell_string = Convert.ToString(cell); if (cell_string.Equals(LABEL_TABLE)) { // 이건 언제든지 등장하면 안되는 녀석-_- throw new ApplicationException("테이블이 종료되지 않은 채로 테이블이 또 시작"); } if (cell_string.Equals(LABEL_EOT)) { if (detect_eot) { throw new ApplicationException("테이블이 종료되었는데 또 테이블 종료가 발견"); } // 테이블 종료를 발견했으니 다음 컬럼으로 진행 detect_eot = true; table_range.end_row = row; max_row = row; break; } if (cell_string.Equals(LABEL_EOF)) { if (detect_eof) { throw new ApplicationException("필드 종료 라벨 중복"); } // 필드 종료를 발견했으니 완전히 종료 detect_eof = true; table_range.end_column = column; return(table_range); } } } } throw new ApplicationException("테이블/필드 종료 라벨을 발견 못함"); //return table_range; }
private TableRange DetectTable(Excel.Worksheet worksheet, int table_start_row, int table_start_column) { var range = worksheet.UsedRange; int max_row = range.Rows.Count + 1; int max_column = range.Columns.Count + 1; bool detect_eot = false; bool detect_eof = false; TableRange table_range = new TableRange(); table_range.start_row = table_start_row; table_range.start_column = table_start_column; double total_count = (max_column - table_start_column + 1) * (max_row - table_start_row + 2); for (int column = table_start_column; column <= max_column; ++column) { for (int row = table_start_row + 1; row <= max_row; ++row) { var cell = worksheet.Cells[row, column]; if (cell.Value2 != null) { string cell_string = cell.Value2.ToString(); if (cell_string.Equals(LABEL_TABLE)) { // 이건 언제든지 등장하면 안되는 녀석-_- throw new ApplicationException("테이블이 종료되지 않은 채로 테이블이 또 시작"); } if (cell_string.Equals(LABEL_EOT)) { if (detect_eot) { throw new ApplicationException("테이블이 종료되었는데 또 테이블 종료가 발견"); } // 테이블 종료를 발견했으니 다음 컬럼으로 진행 detect_eot = true; table_range.end_row = row; max_row = row; break; } if (cell_string.Equals(LABEL_EOF)) { if (detect_eof) { throw new ApplicationException("필드 종료 라벨 중복"); } // 필드 종료를 발견했으니 완전히 종료 detect_eof = true; table_range.end_column = column; return(table_range); } } } } throw new ApplicationException("테이블/필드 종료 라벨을 발견 못함"); //return table_range; }