public static void GetArchiveFromArchiveHtml(this Precinct precinct) { var db = new DatabaseDataContext(); // get links from archivehtml var links = OverviewParser.GetlinksByHtml(Doc.GetArchiveHtmlByPost(precinct.LandingUrl())); var reportManager = new ReportManager(precinct); // create reports and download reporthtml in parallel var reports = links .AsParallel() //SelectMany(_ => _.Value.Select(v => GetReport(v, _.Key))); .Select(pr => reportManager.GetReport(pr.Item1, pr.Item2, pr.Item3)) .Where(x => x != null); // parse events from reports in parallel var relevantReports = reports. Where(_ => _.ReportDate.HasValue && _.ReportDate.Value.Date > precinct.Cutoff.Value.Date && _.ReportDate.Value.Date <= DateTime.Now.Date && !_.Events.Any()); ExtractAndInsert(relevantReports, db); db.SubmitChanges(); }
public static IEnumerable<Report> GetIncremental(this Precinct precinct) { var links = OverviewParser.GetLinksByUrl(precinct.LandingUrl()); var db = new DatabaseDataContext(); var reportManager = new ReportManager(precinct); var reports = links. // filter out ones that are in db already Where(l => // reports not in db !db.Reports.Any(r => r.Uri == l.Item1 && precinct.PrecinctId == r.PrecinctId && !r.Events.Any()) ) .Select(l => reportManager.GetReport(l.Item1, l.Item2, l.Item3)) .Where(x => x != null); ExtractAndInsert(reports, db); db.SubmitChanges(); return reports; }