public void UpdateEntry(HtmlRecord record) { record.timeStamp = DateTime.UtcNow; jobSchedule.UpdateSchedule(record.recordid); Database.Instance.htmlCollection.Save(typeof(HtmlRecord), record); processEvent.Set(); }
public HtmlRecord RetrieveEntryById(ObjectId id) { IMongoQuery queryId = Query.EQ("_id", id); HtmlRecord entity = Database.Instance.htmlCollection.FindOne(queryId); return(entity); }
public IHtmlRecord CreateHtmlRecord(Uri domain) { IHtmlRecord record = Database.Instance.htmlCollection.FindOneAs <HtmlRecord> (Query.EQ("url", domain.AbsoluteUri)) as IHtmlRecord; if (record == null) { record = new HtmlRecord(domain); } return(record); }
public void DistributeWork(HtmlRecord record) { while (crawlerNodes.Count == 0) { ; } var nodes = crawlerNodes.OrderBy(x => x.Value.messageQueue.Count) .OrderBy(y => y.Value.messageQueue .Where(z => z.domain.Host == record.domain.Host) .Count()); CrawlerNode node = nodes.ElementAt(0).Value; jobSet[record.recordid] = node; node.EnqueueHtmlRecord(record); }
public ServerResponse ReturnFinishedJob(byte[] zippedBytes) { ServerResponse response = ServerResponse.ServerError; try { byte[] recordBytes = RestAPI.Unzip(zippedBytes); HtmlRecord record = BSON.Deserialize <HtmlRecord>(recordBytes); CrawlerManager.Instance.RemoveJob(record.recordid); DataManager.Instance.UpdateEntry(record); response = ServerResponse.Success; } catch (Exception ex) { System.Diagnostics.Debug.Print(ex.ToString()); response = ServerResponse.ServerError; } return(response); }
public void Send() { RestAPI api = new RestAPI(); while (!processDestroyed) { HtmlRecord record = messageQueue.Take(); foreach (HtmlResults results in record.results.Values) { results.links = null; } byte[] recordString = record.ToBson <HtmlRecord>(); try { JObject obj = api.EnqueueJob(recordString); bool messageReceived = obj.GetValue("Successful").Value <bool>(); if (messageReceived) { ServerResponse response = api.ParseResponse(obj); if (response != ServerResponse.Success) { messageQueue.Add(record); } else { System.Diagnostics.Debug.Print("[" + DateTime.Now.ToString() + "] Sent: " + record.domain.AbsoluteUri); } } else { messageQueue.Add(record); } } catch (WebException ex) { System.Diagnostics.Debug.Print(ex.ToString()); messageQueue.Add(record); } } }
private void ScheduleJobs() { try { while (true) { KeyValuePair <DateTime, ObjectId> jobPair = jobSchedule.GetJob(); if (jobPair.Key > DateTime.UtcNow) { Thread.Sleep((int)(jobPair.Key - DateTime.UtcNow).TotalMilliseconds); } HtmlRecord record = RetrieveEntryById(jobPair.Value); CrawlerManager.Instance.DistributeWork(record); processEvent.Reset(); } } catch (ThreadInterruptedException) { scheduleJobsProc = new Thread(ScheduleJobs); scheduleJobsProc.Start(); } }
public void EnqueueHtmlRecord(HtmlRecord record) { messageQueue.Add(record); }