async Task ParseEventData_2(ILogEnd log, MyDbContext data, Event evt, string htmlText, long eventTawId) { // this page is so badly coded the HTML is invalid, chrome shows it correctly though, kudos to it // but HtmlAgilityPack just fails on it var eventInfoText = htmlText.TakeStringBetween("<table cellpadding=\"20\" cellspacing=\"5\">", "</table>"); var eventInfoDoc = new HtmlDocument(); eventInfoDoc.LoadHtml(eventInfoText); var eventInfo = new HtmlTwoColsStringTable(eventInfoDoc.DocumentNode); /* * Name GRAW Practice -- Saber Squad Thursday Night (NA-SA) * Description GRAW Practice -- Saber Squad Thursday Night (NA-SA) * Type Practice * Unit Ghost Recon * When From: 6/3/2016 04:00:00 +02:00 to: 6/3/2016 05:00:00 +02:00 * Mandatory Yes * Cancelled No */ evt.Name = eventInfo["Name"]; evt.Description = eventInfo["Description"]; evt.Type = eventInfo["Type"]; evt.Mandatory = eventInfo["Mandatory"] == "Yes"; evt.Cancelled = eventInfo["Cancelled"] == "Yes"; var when = eventInfo["When"]; var strFrom = when.TakeStringBetween("from:", "to:", StringComparison.InvariantCultureIgnoreCase).Trim(); if (strFrom != null) { evt.From = ParseUSDateTime(strFrom); } var strTo = when.TakeStringAfter("to:", StringComparison.InvariantCultureIgnoreCase).Trim(); if (strTo != null) { evt.To = ParseUSDateTime(strTo); } var attendeesText = htmlText.TakeStringBetween("<table width=100%>", "</table>"); var attendessDoc = new HtmlDocument(); attendessDoc.LoadHtml(attendeesText); var attendeesTable = new HtmlTable(attendessDoc.DocumentNode); var newPersonEvents = new List <PersonEvent>(); var personEvents = evt.Attended.ToList(); foreach (var row in attendeesTable) { var name = row[0]?.InnerText?.Trim(); var nameHref = row[0]?.SelectSingleNode("a")?.GetAttributeValue("href", ""); // http://taw.net/event/66327.aspx last row, unit name has no link if (nameHref != null && nameHref.StartsWith("/member")) { if (name.IsNullOrWhiteSpace()) { // a deleted member attended event, so there is a row of event attendee with empty name } else { var person = await GetPersonFromName(data, name); var personToEvent = personEvents.FirstOrDefault(p => p.EventId == evt.EventId && p.PersonId == person.PersonId); if (personToEvent == null) { personToEvent = new PersonEvent(); personToEvent.EventId = evt.EventId; personToEvent.PersonId = person.PersonId; personEvents.Add(personToEvent); newPersonEvents.Add(personToEvent); } var attendanceStr = row[1]?.InnerText?.Trim(); AttendanceType attendanceType = AttendanceType.Unknown; if (attendanceStr != null && Enum.TryParse(attendanceStr.ToLowerInvariant(), true, out attendanceType)) { personToEvent.AttendanceType = attendanceType; } var timestampStr = row[2]?.InnerText?.Trim(); if (!timestampStr.Contains("--")) { personToEvent.TimeStamp = ParseUSDateTime(timestampStr); } } } else if (nameHref != null && nameHref.StartsWith("/unit")) { var unitTawIdStr = nameHref.Split('/', '\\').Last().RemoveFromEnd(".aspx".Length); var unitTawId = int.Parse(unitTawIdStr); var unit = await GetUnit(data, unitTawId, name); evt.Units.Add(unit); } else if (nameHref == null) { // event with no unit } else { throw new Exception("something is wrong, found unexpected data, taw event id:" + eventTawId); } } newPersonEvents.ForEach(pe => data.PersonEvents.Add(pe)); }
/// <summary> /// "dossier movements" DO NOT show demotions. /// So we first need to parse "dossier movements", then if the rank differs from rank in "dossier next to picture" we add that one. /// </summary> /// <param name="logParent"></param> /// <param name="personName"></param> /// <returns></returns> public async Task UpdateInfoFromProfilePage(ILogEnd logParent, string personName) { var logPerson = logParent.ProfileStart("updating profile of " + personName); var url = Person.GetPersonProfilePageUrl(personName); var response = await sessionManager.GetUrlAsync(url, logPerson.Profile("getting html")); var html = response.HtmlDocument; string realRankNameLong = null; using (var data = db.NewContext) { var person = data.People.FirstOrDefault(p => p.Name == personName); if (person == null) { logPerson.Error("person not found in database"); return; } // steam profile id var steamProfileLinkPrefix = "http://steamcommunity.com/profiles/"; var steamProfileLinkElement = html.GetElementbyId("hfSteam"); if (steamProfileLinkElement != null) { var steamProfileLink = steamProfileLinkElement.GetAttributeValue("href", steamProfileLinkPrefix + "-1"); var steamId = long.Parse(steamProfileLink.Substring(steamProfileLinkPrefix.Length)); person.SteamId = steamId; } // avatar image var avatarElement = html.DocumentNode.SelectSingleNode("//*[@class='dossieravatar']/img"); if (avatarElement != null) { var avatarImageLink = avatarElement.GetAttributeValue("src", null); if (avatarImageLink != null) { person.AvatarImageUrl = "http://taw.net" + avatarImageLink; } } // bio var biographyElement = html.DocumentNode.SelectSingleNode("//*[@id='dossierbio']"); if (biographyElement != null) { var biography = biographyElement.InnerText.Trim(); var bioTextHeader = "Bio:"; if (biography.StartsWith(bioTextHeader)) { biography = biography.Substring(bioTextHeader.Length); } person.BiographyContents = biography; } var table = new HtmlTwoColsStringTable(html.DocumentNode.SelectNodes("//*[@class='dossiernexttopicture']/table//tr")); // country person.CountryName = table.GetValue("Location:", person.CountryName).Trim(); person.Status = table.GetValue("Status:", person.Status).Trim().ToLower(); // joined var joined = table.GetValue("Joined:", "01-01-0001"); person.DateJoinedTaw = ParseUSDateTime(joined); // real rank var rank = table.GetValue("Rank:", null); if (!rank.IsNullOrEmpty()) { realRankNameLong = rank.TakeStringBefore("(").Trim(); } person.LastProfileDataUpdatedDate = DateTime.UtcNow; await data.SaveChangesAsync(); } // dossier movements // rank in time // position in unit in time { var stringData = await sessionManager.PostJsonAsync("http://taw.net/services/JSONFactory.asmx/GetMovement", new { callsign = personName }, logPerson.Profile("getting movements")); var log = logPerson.ProfileStart("parsing movements"); try { var jsonData = (string)JObject.Parse(stringData)["d"]; var dossierMovements = JsonConvert.DeserializeObject <DossierMovements>(jsonData); if (dossierMovements.Movements == null) { log.Fatal("no movements parsed, data: " + stringData); return; } using (var data = db.NewContext) { var person = data.People.FirstOrDefault(p => p.Name == personName); var wasDischarged = false; foreach (var dossierMovement in dossierMovements.Movements) { var timestamp = ParseUSDateTime(dossierMovement.timestamp); var tawId = long.Parse(dossierMovement.id); var description = dossierMovement.description; if (description.Contains("was admitted to TAW")) { person.AdmittedToTaw = timestamp; } else if (description.Contains("was promoted to") || description.Contains("applied for TAW")) { if (!person.Ranks.Any(r => r.TawId == tawId)) { string rankNameLong = "unknown"; string byWho = null; if (description.Contains("applied for TAW")) { person.AppliedForTaw = timestamp; rankNameLong = "Recruit"; } else { // aeroson was promoted to Sergeant by <a href="/member/Samblues.aspx">Samblues</a>. // aeroson was promoted to Private First Class by <a href="/member/MaverickSabre.aspx">MaverickSabre</a>. var rankByWho = description.TakeStringAfter("was promoted to").Trim(); byWho = description.TakeStringAfter(" by ").TakeStringBetween(">", "</a>").Trim(); while (byWho.EndsWith(".")) { byWho = byWho.RemoveFromEnd(1).Trim(); } rankNameLong = rankByWho.TakeStringBefore("by").Trim(); } var personRank = new PersonRank(); personRank.NameLong = rankNameLong; personRank.ValidFrom = timestamp; personRank.Person = person; if (!byWho.IsNullOrWhiteSpace() && byWho.Length > 0) { personRank.PromotedBy = await GetPersonFromName(data, byWho); } personRank.TawId = tawId; person.Ranks.Add(personRank); } } else if (description.Contains("was joined to units")) { // aeroson was joined to units AM2 Charlie Squad by MaverickSabre. // aeroson was joined to units AM2 Charlie FT by Samblues. // <a href="/member/aeroson.aspx">aeroson</a> was joined to units <a href="/unit/3617.aspx">AM2 Charlie FT</a> by <a href="/member/Samblues.aspx">Samblues</a>. } else if (description.Contains("was removed from units")) { // aeroson was removed from units AM2 TI Office by MaverickSabre. // <a href="/member/aeroson.aspx">aeroson</a> was removed from units <a href="/unit/1549.aspx">AM2 TI Office</a> by <a href="/member/MaverickSabre.aspx">MaverickSabre</a>. } else if (description.Contains("was assigned to position")) { // aeroson was assigned to position Training Instructor in unit AM2 TI Office by MaverickSabre. // aeroson was assigned to position Squad Leader in unit AM2 Charlie Squad by MaverickSabre. // <a href="/member/aeroson.aspx">aeroson</a> was assigned to position Squad Leader in unit <a href="/unit/1505.aspx">AM2 Charlie Squad</a> by <a href="/member/MaverickSabre.aspx">MaverickSabre</a>. } else if (description.Contains("was removed from position")) { // aeroson was removed from position Training Instructor in unit AM2 TI Office by MaverickSabre. // <a href="/member/aeroson.aspx">aeroson</a> was removed from position Training Instructor in unit <a href="/unit/1549.aspx">AM2 TI Office</a> by MaverickSabre. } else if (description.Contains("was returned to active duty by")) { // <a href="/member/MaverickSabre.aspx">MaverickSabre</a> was returned to active duty by <a href="/member/Lucky.aspx">Lucky</a>. wasDischarged = false; } else if (description.Contains("was put on leave by")) { // <a href="/member/MaverickSabre.aspx">MaverickSabre</a> was put on leave by <a href="/member/Juvenis.aspx">Juvenis</a>. } else if (description.Contains("was reinstated by")) { // <a href="/member/Dackey.aspx">Dackey</a> was reinstated by <a href="/member/Phenom.aspx">Phenom</a> wasDischarged = false; } else if (description.Contains("was discharged by")) { // http://taw.net/member/gravedigger.aspx // leave from unit that is before this // <a href="/member/MaverickSabre.aspx">MaverickSabre</a> was discharged by <a href="/member/Lucid.aspx">Lucid</a>. wasDischarged = true; } else if (description.Contains("was discharged honorable by")) { // <a href="/member/Xsage.aspx">Xsage</a> was discharged honorable by <a href="/member/TexasHillbilly.aspx">TexasHillbilly</a>. wasDischarged = true; } else if (description.Contains("was discharged dishonorable by")) { // <a href="/member/Dackey.aspx">Dackey</a> was discharged dishonorable by <a href="/member/Juvenis.aspx">Juvenis</a>. wasDischarged = true; } else if (description.Contains("Unknown was removed from unit Unknown by")) { // removed person from removed unit } else { log.Warn("unexpected dossier row: " + description); } } if (wasDischarged) { // make sure all units relations are marked as not valid, so we are seen as not active var utcNow = DateTime.UtcNow; foreach (var u in person.Units.Where(u => u.Removed > utcNow).ToArray()) { u.Removed = utcNow; } } if (!realRankNameLong.IsNullOrEmpty() && person.Rank?.NameLong != realRankNameLong) { // most up to date rank in dossier movements and in main section is different -> add rank from main section log.Info("found rank discrepancy, adding real rank: '" + realRankNameLong + "'"); var personRank = new PersonRank(); personRank.NameLong = realRankNameLong; personRank.ValidFrom = DateTime.UtcNow; personRank.Person = person; personRank.TawId = -1; // special case, added from profile main section person.Ranks.Add(personRank); } // cleanup person ranks // remove ranks added from unit pages if (person.Ranks == null || person.Ranks.Any(r => r.TawId == 0)) { while (person.Ranks?.Count > 0) { data.PersonRanks.Remove(person.Ranks.First()); } } await data.SaveChangesAsync(); } } catch (Exception e) { log.Fatal($"exception occured while parsing: " + stringData); log.FatalException(e); } log.End(); } logPerson.End("done, parsed and saved"); }