public virtual void Go() { //IList<ParserConfig> parserToProcess = GetConfiguration(); //get the configuration in the database for the file IList <BankConfig> lenderSheetZoneConfigs = GetConfiguration(); //for each config to process foreach (BankConfig lenderSheetZoneConfig in lenderSheetZoneConfigs) { _parserResultsToPersist = new List <ZoneProcessorState>(); //look at the json to see if _jsonconfig = JObject.Parse(lenderSheetZoneConfig.Configuration); //get sheet name Utilities.StringFunctions.Replace(ref _builderConfigSheetName, _jsonconfig["Sheets"][0]["Name"], Const.Parser.UPPER_CASE_VALUE); _builderConfigSheetName.Append("$"); ProcessedPage extractedPage = _pagesToProcess.Find(x => x.Name.ToUpper().EndsWith(_builderConfigSheetName.ToString())); if (extractedPage != null) { ProcessZone(new ZoneProcessorState( (JArray)_jsonconfig["Sheets"][0]["Zones"], extractedPage.Json, extractedPage.Name, lenderSheetZoneConfig.Id)); } } PersistConfigResults(); }
public void UpdateProcessedPage(ProcessedPage result) { if (ProcessedPages.ContainsKey(result.Id)) { ProcessedPages[result.Id] = result; } }
public void UpdateProcessedPage(ProcessedPage result) { using (var session = _sessionFactory.OpenSession()) { using (var transaction = session.BeginTransaction()) { session.Update(result); transaction.Commit(); } } }
public static ProcessedPage GetProcessedPage(string url) { var page = new ProcessedPage(); page.SessionId = 77; page.CrawlerId = 34; page.PageUrl = url; page.Title = "Blah 1"; page.Description = "Blah blah and blah"; page.KeyWords = "blue, red"; page.StatusCode = System.Net.HttpStatusCode.Ambiguous; return(page); }
/// <summary> /// Processes a crawled page and returns a ProcessedPage object /// which can be stored. /// </summary> /// <param name="page">The results of a crawled url</param> /// <returns>ProcessedPage or null</returns> public ProcessedPage ProcessPage(Abot.Poco.CrawledPage page) { //TODO extract data var processed = new ProcessedPage(); processed.SessionId = page.PageBag.SessionId; processed.CrawlerId = page.PageBag.CrawlerId; processed.PageUrl = page.Uri.AbsoluteUri; processed.StatusCode = page.HttpWebResponse.StatusCode; //TODO store cookies var cookies = page.HttpWebResponse.Cookies; return(processed); }
public void AddProcessedPage(ProcessedPage result) { result.Id = NextId; ProcessedPages.Add(result.Id, result); }