public async Task <ParseEventResult> ParseEventData(ILogEnd _log, long eventTawId) { var log = _log.ProfileStart("working event data " + eventTawId); try { var url = Event.GetEventPage(eventTawId); var response = await sessionManager.GetUrlAsync(url, log.Profile("getting html")); ParseEventResult result; var scope = log.ScopeStart("parsing html"); using (var data = db.NewContext) { result = await ParseEventData_1(scope, data, response, eventTawId); } scope.End(); return(result); } catch (Exception e) { log.Error("error"); log.FatalException(e); return(ParseEventResult.ErrorenousEvent); } finally { log.End(); } }
public async Task Run(ILogEnd log, SessionMannager sessionManager, int tawUnitId) { var newVersion = new Random().Next(); // all person to unit relations without this unit version will be deleted log = log.ProfileStart($"unit {tawUnitId}"); var url = Unit.GetUnitRoasterPage(tawUnitId); var response = await sessionManager.GetUrlAsync(url, log.Profile("getting html")); var html = response.HtmlDocument; var roasterDiv = html.GetElementbyId("ctl00_bcr_UpdatePanel1").SelectSingleNode("./div/ul"); using (log.ScopeStart("parsing html")) await ParseUnitContents(log, roasterDiv, null); using (var log2 = log.ScopeStart("processing people")) { var tasks = new List <Task>(personNameToPersonLines.Count); foreach (var kvp in personNameToPersonLines) { var personName = kvp.Key; var personLines = kvp.Value; var task = Task.Run(async() => { using (var log3 = log2.ProfileStart(personName)) { using (var data = Db.NewContext) { foreach (var personLine in personLines) { await personLine.FinishParsing(log3, data); } var personUnitIds = personLines.Select(p => p.PersonToUnitId).ToArray(); var utcNow = DateTime.UtcNow; // if some person to unit is still valid, and not one of those we just updated, mark it as not valid anymore data .People .First(p => p.Name == personName) .Units .Where(u => u.Removed > utcNow) // still valid, not removed .Where(u => !personUnitIds.Contains(u.PersonUnitId)) // except those we found & updated .ForEach(u => u.Removed = utcNow); // remove it try { await data.SaveChangesAsync(); } catch (Exception e) { log3.Fatal(e); } } } }); tasks.Add(task); } ; await Task.WhenAll(tasks.ToArray()); } }
/// <summary> /// "dossier movements" DO NOT show demotions. /// So we first need to parse "dossier movements", then if the rank differs from rank in "dossier next to picture" we add that one. /// </summary> /// <param name="logParent"></param> /// <param name="personName"></param> /// <returns></returns> public async Task UpdateInfoFromProfilePage(ILogEnd logParent, string personName) { var logPerson = logParent.ProfileStart("updating profile of " + personName); var url = Person.GetPersonProfilePageUrl(personName); var response = await sessionManager.GetUrlAsync(url, logPerson.Profile("getting html")); var html = response.HtmlDocument; string realRankNameLong = null; using (var data = db.NewContext) { var person = data.People.FirstOrDefault(p => p.Name == personName); if (person == null) { logPerson.Error("person not found in database"); return; } // steam profile id var steamProfileLinkPrefix = "http://steamcommunity.com/profiles/"; var steamProfileLinkElement = html.GetElementbyId("hfSteam"); if (steamProfileLinkElement != null) { var steamProfileLink = steamProfileLinkElement.GetAttributeValue("href", steamProfileLinkPrefix + "-1"); var steamId = long.Parse(steamProfileLink.Substring(steamProfileLinkPrefix.Length)); person.SteamId = steamId; } // avatar image var avatarElement = html.DocumentNode.SelectSingleNode("//*[@class='dossieravatar']/img"); if (avatarElement != null) { var avatarImageLink = avatarElement.GetAttributeValue("src", null); if (avatarImageLink != null) { person.AvatarImageUrl = "http://taw.net" + avatarImageLink; } } // bio var biographyElement = html.DocumentNode.SelectSingleNode("//*[@id='dossierbio']"); if (biographyElement != null) { var biography = biographyElement.InnerText.Trim(); var bioTextHeader = "Bio:"; if (biography.StartsWith(bioTextHeader)) { biography = biography.Substring(bioTextHeader.Length); } person.BiographyContents = biography; } var table = new HtmlTwoColsStringTable(html.DocumentNode.SelectNodes("//*[@class='dossiernexttopicture']/table//tr")); // country person.CountryName = table.GetValue("Location:", person.CountryName).Trim(); person.Status = table.GetValue("Status:", person.Status).Trim().ToLower(); // joined var joined = table.GetValue("Joined:", "01-01-0001"); person.DateJoinedTaw = ParseUSDateTime(joined); // real rank var rank = table.GetValue("Rank:", null); if (!rank.IsNullOrEmpty()) { realRankNameLong = rank.TakeStringBefore("(").Trim(); } person.LastProfileDataUpdatedDate = DateTime.UtcNow; await data.SaveChangesAsync(); } // dossier movements // rank in time // position in unit in time { var stringData = await sessionManager.PostJsonAsync("http://taw.net/services/JSONFactory.asmx/GetMovement", new { callsign = personName }, logPerson.Profile("getting movements")); var log = logPerson.ProfileStart("parsing movements"); try { var jsonData = (string)JObject.Parse(stringData)["d"]; var dossierMovements = JsonConvert.DeserializeObject <DossierMovements>(jsonData); if (dossierMovements.Movements == null) { log.Fatal("no movements parsed, data: " + stringData); return; } using (var data = db.NewContext) { var person = data.People.FirstOrDefault(p => p.Name == personName); var wasDischarged = false; foreach (var dossierMovement in dossierMovements.Movements) { var timestamp = ParseUSDateTime(dossierMovement.timestamp); var tawId = long.Parse(dossierMovement.id); var description = dossierMovement.description; if (description.Contains("was admitted to TAW")) { person.AdmittedToTaw = timestamp; } else if (description.Contains("was promoted to") || description.Contains("applied for TAW")) { if (!person.Ranks.Any(r => r.TawId == tawId)) { string rankNameLong = "unknown"; string byWho = null; if (description.Contains("applied for TAW")) { person.AppliedForTaw = timestamp; rankNameLong = "Recruit"; } else { // aeroson was promoted to Sergeant by <a href="/member/Samblues.aspx">Samblues</a>. // aeroson was promoted to Private First Class by <a href="/member/MaverickSabre.aspx">MaverickSabre</a>. var rankByWho = description.TakeStringAfter("was promoted to").Trim(); byWho = description.TakeStringAfter(" by ").TakeStringBetween(">", "</a>").Trim(); while (byWho.EndsWith(".")) { byWho = byWho.RemoveFromEnd(1).Trim(); } rankNameLong = rankByWho.TakeStringBefore("by").Trim(); } var personRank = new PersonRank(); personRank.NameLong = rankNameLong; personRank.ValidFrom = timestamp; personRank.Person = person; if (!byWho.IsNullOrWhiteSpace() && byWho.Length > 0) { personRank.PromotedBy = await GetPersonFromName(data, byWho); } personRank.TawId = tawId; person.Ranks.Add(personRank); } } else if (description.Contains("was joined to units")) { // aeroson was joined to units AM2 Charlie Squad by MaverickSabre. // aeroson was joined to units AM2 Charlie FT by Samblues. // <a href="/member/aeroson.aspx">aeroson</a> was joined to units <a href="/unit/3617.aspx">AM2 Charlie FT</a> by <a href="/member/Samblues.aspx">Samblues</a>. } else if (description.Contains("was removed from units")) { // aeroson was removed from units AM2 TI Office by MaverickSabre. // <a href="/member/aeroson.aspx">aeroson</a> was removed from units <a href="/unit/1549.aspx">AM2 TI Office</a> by <a href="/member/MaverickSabre.aspx">MaverickSabre</a>. } else if (description.Contains("was assigned to position")) { // aeroson was assigned to position Training Instructor in unit AM2 TI Office by MaverickSabre. // aeroson was assigned to position Squad Leader in unit AM2 Charlie Squad by MaverickSabre. // <a href="/member/aeroson.aspx">aeroson</a> was assigned to position Squad Leader in unit <a href="/unit/1505.aspx">AM2 Charlie Squad</a> by <a href="/member/MaverickSabre.aspx">MaverickSabre</a>. } else if (description.Contains("was removed from position")) { // aeroson was removed from position Training Instructor in unit AM2 TI Office by MaverickSabre. // <a href="/member/aeroson.aspx">aeroson</a> was removed from position Training Instructor in unit <a href="/unit/1549.aspx">AM2 TI Office</a> by MaverickSabre. } else if (description.Contains("was returned to active duty by")) { // <a href="/member/MaverickSabre.aspx">MaverickSabre</a> was returned to active duty by <a href="/member/Lucky.aspx">Lucky</a>. wasDischarged = false; } else if (description.Contains("was put on leave by")) { // <a href="/member/MaverickSabre.aspx">MaverickSabre</a> was put on leave by <a href="/member/Juvenis.aspx">Juvenis</a>. } else if (description.Contains("was reinstated by")) { // <a href="/member/Dackey.aspx">Dackey</a> was reinstated by <a href="/member/Phenom.aspx">Phenom</a> wasDischarged = false; } else if (description.Contains("was discharged by")) { // http://taw.net/member/gravedigger.aspx // leave from unit that is before this // <a href="/member/MaverickSabre.aspx">MaverickSabre</a> was discharged by <a href="/member/Lucid.aspx">Lucid</a>. wasDischarged = true; } else if (description.Contains("was discharged honorable by")) { // <a href="/member/Xsage.aspx">Xsage</a> was discharged honorable by <a href="/member/TexasHillbilly.aspx">TexasHillbilly</a>. wasDischarged = true; } else if (description.Contains("was discharged dishonorable by")) { // <a href="/member/Dackey.aspx">Dackey</a> was discharged dishonorable by <a href="/member/Juvenis.aspx">Juvenis</a>. wasDischarged = true; } else if (description.Contains("Unknown was removed from unit Unknown by")) { // removed person from removed unit } else { log.Warn("unexpected dossier row: " + description); } } if (wasDischarged) { // make sure all units relations are marked as not valid, so we are seen as not active var utcNow = DateTime.UtcNow; foreach (var u in person.Units.Where(u => u.Removed > utcNow).ToArray()) { u.Removed = utcNow; } } if (!realRankNameLong.IsNullOrEmpty() && person.Rank?.NameLong != realRankNameLong) { // most up to date rank in dossier movements and in main section is different -> add rank from main section log.Info("found rank discrepancy, adding real rank: '" + realRankNameLong + "'"); var personRank = new PersonRank(); personRank.NameLong = realRankNameLong; personRank.ValidFrom = DateTime.UtcNow; personRank.Person = person; personRank.TawId = -1; // special case, added from profile main section person.Ranks.Add(personRank); } // cleanup person ranks // remove ranks added from unit pages if (person.Ranks == null || person.Ranks.Any(r => r.TawId == 0)) { while (person.Ranks?.Count > 0) { data.PersonRanks.Remove(person.Ranks.First()); } } await data.SaveChangesAsync(); } } catch (Exception e) { log.Fatal($"exception occured while parsing: " + stringData); log.FatalException(e); } log.End(); } logPerson.End("done, parsed and saved"); }