Exemple #1
0
        private static List <WorldOfficialGoldHoldingReportRow_Raw> TryGetCells(string textLine, int noCols)
        {
            var cellValues   = textLine.Split(' ');
            var name_colSpan = cellValues.Length - noCols;

            if (name_colSpan <= 0)
            {
                return(null);
            }
            var name             = string.Join(" ", cellValues.Take(name_colSpan));
            var reportRowPerCell = new List <WorldOfficialGoldHoldingReportRow_Raw>();

            for (int colIdx = 0; colIdx < noCols; colIdx++)
            {
                var cellVal    = cellValues[name_colSpan + colIdx];
                int cellValLen = cellVal.Length;
                var match      = s_regexNote.Match(cellVal);
                if (match.Success)
                {
                    cellValLen = match.Index;
                }
                var reportRow = new WorldOfficialGoldHoldingReportRow_Raw();
                reportRow.Name = name;
                // In table 1 "0" means less than 0.01 tons and "-" means zero.
                if (1 == cellValLen && string.CompareOrdinal(cellVal, 0, "-", 0, 1) == 0)
                {
                    reportRow.Tons = 0M;
                }
                else if (cellValLen != 3 || string.CompareOrdinal(cellVal, 0, "n/a", 0, 3) != 0)
                {
                    if (cellValLen == cellVal.Length && (match = s_regexEstimate.Match(cellVal)).Success)
                    {
                        cellValLen = match.Index;
                    }
                    decimal cellValNum;
                    if (!decimal.TryParse(cellVal.Substring(0, cellValLen), NumberStyles.AllowDecimalPoint | NumberStyles.AllowThousands, NumberFormatInfo.InvariantInfo, out cellValNum))
                    {
                        return(null);
                    }
                    reportRow.Tons = cellValNum;
                }
                reportRowPerCell.Add(reportRow);
            }
            return(reportRowPerCell);
        }
Exemple #2
0
        internal List <WorldOfficialGoldHoldingReport_Raw> Run()
        {
            var      t = typeof(WorldOfficialGoldHoldingReportsScraper_2000To2015);
            Workbook wb;

            using (var stream = t.Assembly.GetManifestResourceStream(t, "quarterly_gold_and_fx_reserves_2000_2015.xlsx"))
            {
                wb = Workbook.LoadExcel(stream);
            }

            var ws2 = wb.Worksheets.Single(ws1 =>
                                           0 <= ws1.Name.IndexOf("gold", StringComparison.OrdinalIgnoreCase) &&
                                           0 <= ws1.Name.IndexOf("ton", StringComparison.OrdinalIgnoreCase));

            int i             = 0;
            int _2000Q1ColIdx = -1;

            while (true)
            {
                _2000Q1ColIdx = ws2.Rows[i].Cells_NonEmpty.FindIndex(c => c.ValueUnformatted != null && s_regexYearQuarter.IsMatch(c.ValueUnformatted));
                if (0 <= _2000Q1ColIdx)
                {
                    break;
                }
                i += 1;
            }
            int firstRowIdx = i + 1;
            List <WorldOfficialGoldHoldingReport_Raw> reports = new List <WorldOfficialGoldHoldingReport_Raw>();

            for (i = 0; i < 4 * 16; i++)
            {
                var  m    = s_regexYearQuarter.Match(ws2.Rows[firstRowIdx - 1].Cells_NonEmpty[i + _2000Q1ColIdx].ValueUnformatted);
                bool flag = false;
                if (m.Success)
                {
                    var g1c0 = m.Groups[1].Captures.Cast <Capture>().Single().Value;
                    int i1;
                    if (int.TryParse(g1c0, NumberStyles.None, NumberFormatInfo.InvariantInfo, out i1) && i1 - 1 == (i & 3))
                    {
                        var g2c0 = m.Groups[2].Captures.Cast <Capture>().Single().Value;
                        int i2;
                        if (int.TryParse(g2c0, NumberStyles.None, NumberFormatInfo.InvariantInfo, out i2) && i2 == (i >> 2) + 2000)
                        {
                            flag = true;
                        }
                    }
                }
                if (!flag)
                {
                    throw new Exception();
                }
                reports.Add(new WorldOfficialGoldHoldingReport_Raw());
                reports[i].PublishTimePoint = new DateTime(2016, 3, 11, 0, 0, 0, DateTimeKind.Unspecified);
                reports[i].DataTimePoint    = new DateTime(2000 + (i >> 2), 1 + (i & 3) * 3, 1, 0, 0, 0, DateTimeKind.Unspecified);
                reports[i].Rows             = new List <WorldOfficialGoldHoldingReportRow_Raw>();
            }
            for (i = firstRowIdx; ;)
            {
                string politicalEntityName = ws2.Rows[i].Cells_NonEmpty[0].ValueUnformatted;
                if (new Regex("\\d+\\)$").IsMatch(politicalEntityName))
                {
                    throw new ArgumentException();
                }
                for (int j = 0; j < 4 * 16; j++)
                {
                    var ronsStr = ws2.Rows[i].Cells_NonEmpty[j + _2000Q1ColIdx].ValueUnformatted;
                    var row     = new WorldOfficialGoldHoldingReportRow_Raw()
                    {
                        Name = politicalEntityName,
                    };
                    if (ronsStr != "-")
                    {
                        row.Tons = decimal.Parse(ronsStr, NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, NumberFormatInfo.InvariantInfo);
                    }
                    reports[j].Rows.Add(row);
                }

                i += 1;
                if (ws2.Rows.Count <= i ||
                    ws2.Rows[i].Cells_NonEmpty.Count < 4 * 16 + _2000Q1ColIdx)
                {
                    break;
                }
            }

            return(reports);
        }
        private WorldOfficialGoldHoldingReportRow_Raw TryParseEntry(Row wbRow, int firstColumnIndex)
        {
            int firstCellIndex = FindColumn(wbRow.Cells_NonEmpty, firstColumnIndex);
            int lastCellIndex  = FindColumn(wbRow.Cells_NonEmpty, firstColumnIndex + 4);

            if (lastCellIndex < 0)
            {
                lastCellIndex = ~lastCellIndex;
            }
            if (lastCellIndex - firstCellIndex != 4)
            {
                return(null);
            }
            int rank;

            if (!int.TryParse(wbRow.Cells_NonEmpty[firstCellIndex].ValueUnformatted, NumberStyles.None, NumberFormatInfo.InvariantInfo, out rank))
            {
                return(null);
            }
            var reportRow         = new WorldOfficialGoldHoldingReportRow_Raw();
            var reportRow_nameRaw = wbRow.Cells_NonEmpty[firstCellIndex + 1].ValueUnformatted;

            if (reportRow_nameRaw == null)
            {
                return(null);
            }
            var m = noteRefSuffixRegex.Match(reportRow_nameRaw);

            if (m.Success)
            {
                var noteRefCapture     = m.Groups[1].Captures.Cast <Capture>().Single();
                var noteIndex_oneBased = int.Parse(
                    noteRefCapture.Value,
                    NumberStyles.None,
                    NumberFormatInfo.InvariantInfo);
                reportRow.Name = reportRow_nameRaw.Substring(0, noteRefCapture.Index);
                if (m_getNoteFunc == null)
                {
                    throw new NotImplementedException();
                }
                reportRow.Note = m_getNoteFunc(noteIndex_oneBased);
            }
            else
            {
                reportRow.Name = reportRow_nameRaw;
            }
            decimal t;

            if (!decimal.TryParse(
                    wbRow.Cells_NonEmpty[firstCellIndex + 2].ValueUnformatted,
                    NumberStyles.AllowDecimalPoint,
                    NumberFormatInfo.InvariantInfo, out t))
            {
                return(null);
            }
            reportRow.Tons = t;
            //if (decimal.TryParse(
            //    wbRow.Cells_NonEmpty[firstCellIndex + 3].ValueUnformatted,
            //    NumberStyles.AllowDecimalPoint,
            //    NumberFormatInfo.InvariantInfo,
            //    out t))
            //{
            //    reportRow.PortionOfReserves = t;
            //}
            return(reportRow);
        }