private static List <WorldOfficialGoldHoldingReportRow_Raw> TryGetCells(string textLine, int noCols) { var cellValues = textLine.Split(' '); var name_colSpan = cellValues.Length - noCols; if (name_colSpan <= 0) { return(null); } var name = string.Join(" ", cellValues.Take(name_colSpan)); var reportRowPerCell = new List <WorldOfficialGoldHoldingReportRow_Raw>(); for (int colIdx = 0; colIdx < noCols; colIdx++) { var cellVal = cellValues[name_colSpan + colIdx]; int cellValLen = cellVal.Length; var match = s_regexNote.Match(cellVal); if (match.Success) { cellValLen = match.Index; } var reportRow = new WorldOfficialGoldHoldingReportRow_Raw(); reportRow.Name = name; // In table 1 "0" means less than 0.01 tons and "-" means zero. if (1 == cellValLen && string.CompareOrdinal(cellVal, 0, "-", 0, 1) == 0) { reportRow.Tons = 0M; } else if (cellValLen != 3 || string.CompareOrdinal(cellVal, 0, "n/a", 0, 3) != 0) { if (cellValLen == cellVal.Length && (match = s_regexEstimate.Match(cellVal)).Success) { cellValLen = match.Index; } decimal cellValNum; if (!decimal.TryParse(cellVal.Substring(0, cellValLen), NumberStyles.AllowDecimalPoint | NumberStyles.AllowThousands, NumberFormatInfo.InvariantInfo, out cellValNum)) { return(null); } reportRow.Tons = cellValNum; } reportRowPerCell.Add(reportRow); } return(reportRowPerCell); }
internal List <WorldOfficialGoldHoldingReport_Raw> Run() { var t = typeof(WorldOfficialGoldHoldingReportsScraper_2000To2015); Workbook wb; using (var stream = t.Assembly.GetManifestResourceStream(t, "quarterly_gold_and_fx_reserves_2000_2015.xlsx")) { wb = Workbook.LoadExcel(stream); } var ws2 = wb.Worksheets.Single(ws1 => 0 <= ws1.Name.IndexOf("gold", StringComparison.OrdinalIgnoreCase) && 0 <= ws1.Name.IndexOf("ton", StringComparison.OrdinalIgnoreCase)); int i = 0; int _2000Q1ColIdx = -1; while (true) { _2000Q1ColIdx = ws2.Rows[i].Cells_NonEmpty.FindIndex(c => c.ValueUnformatted != null && s_regexYearQuarter.IsMatch(c.ValueUnformatted)); if (0 <= _2000Q1ColIdx) { break; } i += 1; } int firstRowIdx = i + 1; List <WorldOfficialGoldHoldingReport_Raw> reports = new List <WorldOfficialGoldHoldingReport_Raw>(); for (i = 0; i < 4 * 16; i++) { var m = s_regexYearQuarter.Match(ws2.Rows[firstRowIdx - 1].Cells_NonEmpty[i + _2000Q1ColIdx].ValueUnformatted); bool flag = false; if (m.Success) { var g1c0 = m.Groups[1].Captures.Cast <Capture>().Single().Value; int i1; if (int.TryParse(g1c0, NumberStyles.None, NumberFormatInfo.InvariantInfo, out i1) && i1 - 1 == (i & 3)) { var g2c0 = m.Groups[2].Captures.Cast <Capture>().Single().Value; int i2; if (int.TryParse(g2c0, NumberStyles.None, NumberFormatInfo.InvariantInfo, out i2) && i2 == (i >> 2) + 2000) { flag = true; } } } if (!flag) { throw new Exception(); } reports.Add(new WorldOfficialGoldHoldingReport_Raw()); reports[i].PublishTimePoint = new DateTime(2016, 3, 11, 0, 0, 0, DateTimeKind.Unspecified); reports[i].DataTimePoint = new DateTime(2000 + (i >> 2), 1 + (i & 3) * 3, 1, 0, 0, 0, DateTimeKind.Unspecified); reports[i].Rows = new List <WorldOfficialGoldHoldingReportRow_Raw>(); } for (i = firstRowIdx; ;) { string politicalEntityName = ws2.Rows[i].Cells_NonEmpty[0].ValueUnformatted; if (new Regex("\\d+\\)$").IsMatch(politicalEntityName)) { throw new ArgumentException(); } for (int j = 0; j < 4 * 16; j++) { var ronsStr = ws2.Rows[i].Cells_NonEmpty[j + _2000Q1ColIdx].ValueUnformatted; var row = new WorldOfficialGoldHoldingReportRow_Raw() { Name = politicalEntityName, }; if (ronsStr != "-") { row.Tons = decimal.Parse(ronsStr, NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, NumberFormatInfo.InvariantInfo); } reports[j].Rows.Add(row); } i += 1; if (ws2.Rows.Count <= i || ws2.Rows[i].Cells_NonEmpty.Count < 4 * 16 + _2000Q1ColIdx) { break; } } return(reports); }
private WorldOfficialGoldHoldingReportRow_Raw TryParseEntry(Row wbRow, int firstColumnIndex) { int firstCellIndex = FindColumn(wbRow.Cells_NonEmpty, firstColumnIndex); int lastCellIndex = FindColumn(wbRow.Cells_NonEmpty, firstColumnIndex + 4); if (lastCellIndex < 0) { lastCellIndex = ~lastCellIndex; } if (lastCellIndex - firstCellIndex != 4) { return(null); } int rank; if (!int.TryParse(wbRow.Cells_NonEmpty[firstCellIndex].ValueUnformatted, NumberStyles.None, NumberFormatInfo.InvariantInfo, out rank)) { return(null); } var reportRow = new WorldOfficialGoldHoldingReportRow_Raw(); var reportRow_nameRaw = wbRow.Cells_NonEmpty[firstCellIndex + 1].ValueUnformatted; if (reportRow_nameRaw == null) { return(null); } var m = noteRefSuffixRegex.Match(reportRow_nameRaw); if (m.Success) { var noteRefCapture = m.Groups[1].Captures.Cast <Capture>().Single(); var noteIndex_oneBased = int.Parse( noteRefCapture.Value, NumberStyles.None, NumberFormatInfo.InvariantInfo); reportRow.Name = reportRow_nameRaw.Substring(0, noteRefCapture.Index); if (m_getNoteFunc == null) { throw new NotImplementedException(); } reportRow.Note = m_getNoteFunc(noteIndex_oneBased); } else { reportRow.Name = reportRow_nameRaw; } decimal t; if (!decimal.TryParse( wbRow.Cells_NonEmpty[firstCellIndex + 2].ValueUnformatted, NumberStyles.AllowDecimalPoint, NumberFormatInfo.InvariantInfo, out t)) { return(null); } reportRow.Tons = t; //if (decimal.TryParse( // wbRow.Cells_NonEmpty[firstCellIndex + 3].ValueUnformatted, // NumberStyles.AllowDecimalPoint, // NumberFormatInfo.InvariantInfo, // out t)) //{ // reportRow.PortionOfReserves = t; //} return(reportRow); }