public LinkOwner CreateEntry(IHtmlResults newUpdate, ObjectId userid) { try { HtmlRecord record = Database.Instance.htmlCollection.FindOneAs <HtmlRecord>(Query.EQ("url", newUpdate.domain.AbsoluteUri)); LinkOwner ownerResult = null; if (record == null) { record = new HtmlRecord(newUpdate.domain); ownerResult = record.AddResults(newUpdate, userid); Database.Instance.htmlCollection.Save(record, WriteConcern.Acknowledged); jobSchedule.AddNewJob(record.id, record.timeStamp); processJobs.Set(); } else { ownerResult = record.AddResults(newUpdate, userid); Database.Instance.htmlCollection.Save(record, WriteConcern.Acknowledged); } return(ownerResult); } catch (MongoWriteConcernException ex) { Console.WriteLine(ex.ToString()); // TODO: stop potential stack overflow exception return(CreateEntry(newUpdate, userid)); } }
public void EnqueueHtmlRecord(HtmlRecord record) { lock (messageQueue) { RecordMessage message = new RecordMessage(record); messageQueue.Add(message); allowSend.Set(); } }
public void DistributeWork(HtmlRecord record) { while (crawlerNodes.Count == 0) { ; } CrawlerNode node = crawlerNodes.OrderByDescending(x => x.Value.messageQueue.Count).Last().Value; jobSet[record.id] = node; node.EnqueueHtmlRecord(record); }
public void UpdateUsersByRecord(HtmlRecord record) { foreach (HtmlResults results in record.results.Values) { IMongoQuery query = Query.EQ("Links.v._id", results.id); MongoCursor <User> users = Database.Instance.userCollection.FindAs <User>(query); foreach (User user in users) { // Send SyncData notification to all users associated with HtmlRecord object } } }
public HtmlRecord RetrieveEntryById(ObjectId id) { try { IMongoQuery queryId = Query.EQ("_id", id); HtmlRecord entity = Database.Instance.htmlCollection.FindOne(queryId); return(entity); } catch (Exception ex) { Console.WriteLine(ex.ToString()); throw ex; } }
public LinkOwner ManualRequest(LinkOwner owner) { try { IMongoQuery query = Query.EQ("results.k", owner.resultsid); HtmlRecord record = Database.Instance.htmlCollection.FindOne(query); return(record.results[owner.resultsid].RetrieveResults(owner.userid)); } catch (Exception ex) { Console.WriteLine(ex.ToString()); throw ex; } }
public void UpdateEntry(object sender, HtmlRecord record) { try { record.timeStamp = DateTime.UtcNow; jobSchedule.UpdateSchedule(record.id); Database.Instance.htmlCollection.Save(typeof(HtmlRecord), record); UserManager.Instance.UpdateUsersByRecord(record); processJobs.Set(); } catch (Exception ex) { Console.WriteLine(ex.ToString()); throw ex; } }
private void ScheduleJobs() { while (true) { if (jobSchedule.jobSchedule.Count == 0) { processJobs.WaitOne(); } ObjectId job = jobSchedule.GetJob(); HtmlRecord record = RetrieveEntryById(job); CrawlerManager.Instance.DistributeWork(record); processJobs.Reset(); } }
// User is unsubscribing altogether public void RemoveResultsOwner(LinkOwner entryToRemove) { try { HtmlRecord record = RetrieveEntryById(entryToRemove.resultsid); record.RemoveResultsOwner(entryToRemove); Database.Instance.htmlCollection.Save(record, WriteConcern.Acknowledged); } catch (MongoWriteConcernException ex) { Console.WriteLine(ex.ToString()); throw ex; } catch (Exception ex) { Console.WriteLine(ex.ToString()); throw ex; } }
// replicates CreateEntry in the event of record duplication during a parallel "Save" // /*public LinkOwner ModifyHtmlResults(IHtmlResults modifiedEntry, LinkOwner owner) * { * try * { * HtmlRecord record = Database.Instance.htmlCollection.FindOneAs<HtmlRecord>(Query.EQ("url", modifiedEntry.domain.AbsoluteUri)); * LinkOwner ownerResult = record.ModifyResults(modifiedEntry, owner); * * Database.Instance.htmlCollection.Save(record, WriteConcern.Acknowledged); * return ownerResult; * } * catch(MongoWriteConcernException ex) * { * Console.WriteLine(ex.ToString()); * throw ex; * } * catch(Exception ex) * { * Console.WriteLine(ex.ToString()); * throw ex; * } * }*/ /*public bool Edit(HtmlRecord record) * { * var result = Database.Instance.htmlCollection.FindAndModify(Query.And(Query.EQ("url", record.domain.AbsoluteUri), * Query.EQ("Version", record.version)), * null, * Update.Set("_id", record.id) * .Set("url", record.url) * .Set("domain", record.domain.ToBson()) * .Set("timeStamp", record.timeStamp) * .Set("results", record.results.ToBson()) * .Set("serverResponse", record.serverResponse) * .Inc("version", 1)); * return result.ModifiedDocument != null; * }*/ public LinkOwner ModifyOwnership(LinkOwner newOwnerResults) { try { HtmlRecord record = RetrieveEntryById(newOwnerResults.resultsid); LinkOwner linkOwner = record.ModifyOwner(newOwnerResults); Database.Instance.htmlCollection.Save(record, WriteConcern.Acknowledged); return(linkOwner); } catch (MongoWriteConcernException ex) { Console.WriteLine(ex.ToString()); throw ex; } catch (Exception ex) { Console.WriteLine(ex.ToString()); throw ex; } }
public void OnUpdateReceived(object sender, HtmlRecord args) { jobSet.Remove(args.id); }
public RecordMessage(HtmlRecord record) { htmlRecord = record; }
private async Task <List <HtmlRecord> > LoadData() { var records = new List <HtmlRecord>(); var httpClient = new HttpClient(); var response = await httpClient.GetAsync(Uri); if (response.StatusCode == HttpStatusCode.OK) { var contenttype = response.Content.Headers.First(h => h.Key.Equals("Content-Type")); var rawencoding = contenttype.Value.First(); var codingRegexMatch = codingRegex.Match(rawencoding); if (!codingRegexMatch.Success) { //hopefully not :D return(new List <HtmlRecord>()); } var codingName = codingRegexMatch.Groups[1].Value; var bytes = await response.Content.ReadAsByteArrayAsync(); //testing purposes //var html = File.ReadAllText("C:\\Users\\jakuchar\\Desktop\\resultTest.html");//Encoding.GetEncoding(codingName).GetString(bytes); var html = Encoding.GetEncoding(codingName).GetString(bytes); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var htmlNode = htmlDocument.GetElementbyId("table-los"); var groups = htmlNode.SelectNodes("//tr[@class='r1']"); foreach (var group in groups) { var record = new HtmlRecord(); var index = 0; foreach (var item in group.ChildNodes) { if (item.Name != "td") { continue; } if (item.HasClass("tal") && index == 0) { record.Date = item.InnerHtml; } if (item.HasClass("tac") && index == 1) { record.HomeTeam = item.InnerText; } if (item.HasClass("tac") && index == 2) { record.AwayTeam = item.InnerText; } if (item.HasClass("tac") && index == 3) { record.TimeOrResult = item.InnerText; record.Link = item.ChildNodes.First().Attributes.First().Value; } index++; } records.Add(record); if (records.Count == 36) { break; } } } return(records); }