} // constructor public override ISeeds Run(SheafMetaData oFileID, byte[] oFile) { Info("Parsing {0}...", oFileID); m_oReader = new PdfReader(oFile); Seeds = new VatReturnSeeds(this); if (!VatPeriod()) { return(null); } if (!BusinessDetails()) { return(null); } if (!ReturnDetails()) { return(null); } Info("Parsing {0} complete.", oFileID); return(Seeds); } // Run
} // Add /// <summary> /// Adds parsed data. Thread safe. /// </summary> /// <param name="fi">File identifier.</param> /// <param name="oData">Data to add.</param> public void Add(SheafMetaData fi, ISeeds oData) { if (oData == null) { return; } lock (this) { Seeds[fi.DataType][fi.BaseFileName] = oData; } // lock } // Add
} // constructor /// <summary> /// Adds an error message. Thread safe. /// </summary> /// <param name="fi">File identifier where the error occurred.</param> /// <param name="response">HTTP response with an error.</param> public void Add(SheafMetaData fi, HttpResponseMessage response) { lock (this) { Errors[fi.DataType][fi.FileType][fi.BaseFileName] = new HarvesterError { Code = response.StatusCode, Message = response.ReasonPhrase }; ErrorCount++; } // lock } // Add
public override ISeeds Run(SheafMetaData oFileID, byte[] oFile) { Info("Parsing {0}...", oFileID); var doc = new HtmlDocument(); doc.LoadHtml(Encoding.UTF8.GetString(oFile)); // Actual XPath expression is //*[@id="VAT0012"]/div[2]/form/div // However HtmlAgilityPack parses file wrong and DIVs that should be // children of the FORM become siblings of the FORM. HtmlNodeCollection oDivs = doc.DocumentNode.SelectNodes("//*[@id=\"VAT0012\"]/div[2]/div"); if ((oDivs == null) || (oDivs.Count != 4)) { Warn("Data sections not found in {0}.", oFileID); return(null); } // if var seeds = (VatReturnSeeds)Seeds; if (!VatPeriod(oDivs[0], seeds)) { return(null); } if (!BusinessDetails(oDivs[1], seeds)) { return(null); } if (!ReturnDetails(oDivs[2], seeds)) { return(null); } Info("Parsing {0} complete.", oFileID); return(seeds); } // Run
} // Add /// <summary> /// Adds a file. Thread safe. /// </summary> /// <param name="fi">File identifier.</param> /// <param name="oFileData">File to add.</param> public void Add(SheafMetaData fi, byte[] oFileData) { lock (this) { Files[fi.DataType][fi.FileType][fi.BaseFileName] = oFileData; } // lock } // Add
} // ParseGBP public abstract ISeeds Run(SheafMetaData oFileID, byte[] oFile);
} // ExtractTaxOfficeNumber private void FetchRtiTaxYears() { if (string.IsNullOrWhiteSpace(TaxOfficeNumber)) { Debug("Not fetching RTI Tax Years: Tax Office number is empty."); return; } // if Debug("Fetching RTI Tax Years started..."); HtmlDocument doc = GetPage("/paye/org/" + TaxOfficeNumber + "/account"); if ((doc == null) || (doc.DocumentNode == null)) { throw new HarvesterException("Failed to fetch PAYE account page."); } var oOutput = new MemoryStream(); doc.Save(oOutput); var smd = new SheafMetaData { BaseFileName = "PAYE RTI Tax Year", DataType = DataType.PayeRtiTaxYears, FileType = FileType.Html, Thrasher = null }; Hopper.Add(smd, oOutput.ToArray()); HtmlNode oTHead = doc.DocumentNode.SelectSingleNode("//*[@id=\"top\"]/div[3]/div[2]/div/div[2]/table[1]/thead"); if (oTHead == null) { Info("RTI tax years table head not found."); return; } // if HtmlNodeCollection oHeadRows = oTHead.SelectNodes("tr"); if ((oHeadRows == null) || (oHeadRows.Count != 1)) { throw new HarvesterException("RTI tax years table head is empty."); } HtmlNodeCollection oHeadCells = oHeadRows[0].SelectNodes("th | td"); string[] aryExpectedColumnHeaders = new [] { "Date", "Amount paid in period", "Amount due in period", }; if ((oHeadCells == null) || (oHeadCells.Count != aryExpectedColumnHeaders.Length)) { throw new HarvesterException(string.Format("Failed to fetch RTI tax years: no cells in header row")); } for (int i = 0; i < aryExpectedColumnHeaders.Length; i++) { if (!oHeadCells[i].InnerText.Trim().StartsWith(aryExpectedColumnHeaders[i])) { Info( "Not fetching RTI tax years: unexpected column {0} name: {1} (expected: {2})", i, oHeadCells[i].InnerText, aryExpectedColumnHeaders[i] ); return; } // if } // for HtmlNode oTBody = doc.DocumentNode.SelectSingleNode("//*[@id=\"top\"]/div[3]/div[2]/div/div[2]/table[1]/tbody"); if (oTBody == null) { throw new HarvesterException("RTI tax years table body not found."); } HtmlNodeCollection oRows = oTBody.SelectNodes("tr"); if ((oRows == null) || (oRows.Count < 1)) { throw new HarvesterException("RTI tax years data not found."); } bool bFirst = true; int nRowNum = -1; int nFirstYear = 0; int nLastYear = 0; var data = new List <RtiTaxYearRowData>(); foreach (HtmlNode oTR in oRows) { nRowNum++; HtmlNodeCollection oCells = oTR.SelectNodes("th | td"); if ((oCells == null) || (oCells.Count < 1)) { throw new HarvesterException(string.Format( "Failed to fetch RTI tax years: no cells in row {0}.", nRowNum )); } // if if (bFirst) { bFirst = false; HtmlNode oCell = oCells[0]; if (!oCell.Attributes.Contains("colspan") || (oCell.Attributes["colspan"].Value != "3")) { throw new HarvesterException(string.Format( "Failed to fetch RTI tax years: incorrect format in row {0}", nRowNum )); } // if if (oCell.InnerText.Trim() == "Previous tax years") { break; } MatchCollection match = Regex.Matches(oCell.InnerText.Trim(), @"^Current tax year (\d\d)(\d\d)-(\d\d)$"); if (match.Count != 1) { throw new HarvesterException(string.Format( "Failed to fetch RTI tax years: incorrect content in row {0}.", nRowNum )); } // if GroupCollection grp = match[0].Groups; if (grp.Count != 4) { throw new HarvesterException(string.Format( "Failed to fetch RTI tax years: unexpected content in row {0}.", nRowNum )); } // if nFirstYear = Convert.ToInt32(grp[1].Value) * 100 + Convert.ToInt32(grp[2].Value); nLastYear = Convert.ToInt32(grp[1].Value) * 100 + Convert.ToInt32(grp[3].Value); Info("Current tax year: {0} - {1}", nFirstYear, nLastYear); continue; } // if first row string sFirstCell = oCells.Count > 0 ? oCells[0].InnerText.Trim() : string.Empty; if (oCells.Count != 3) { if ((oCells.Count == 1) && (sFirstCell == "Previous tax years")) { break; } throw new HarvesterException(string.Format( "Failed to fetch RTI tax years: unexpected number of cells in row {0}.", nRowNum )); } // if if (sFirstCell == "Total") { break; } try { data.Add(new RtiTaxYearRowData(sFirstCell, oCells[1].InnerText.Trim(), oCells[2].InnerText.Trim())); } catch (Exception e) { throw new HarvesterException( string.Format( "Failed to fetch RTI tax years: unexpected format in row {0}.", nRowNum ), e ); } // try } // for each row int nCurYear = nFirstYear; var rtys = new RtiTaxYearSeeds(); foreach (RtiTaxYearRowData rd in data.ToArray().Reverse()) { rtys.Months.Add(new RtiTaxMonthSeed { DateStart = new DateTime(nCurYear, rd.MonthStart, rd.DayStart), DateEnd = new DateTime(nCurYear, rd.MonthEnd, rd.DayEnd), AmountPaid = new Coin(rd.AmountPaid, "GBP"), AmountDue = new Coin(rd.AmountDue, "GBP") }); if (rd.MonthStart == 12) { nCurYear = nLastYear; } } // for each Hopper.Add(smd, rtys); Debug("Fetching RTI Tax Years complete."); } // FetchRtiTaxYears