async Task ParseUnitContents(ILogEnd log, HtmlNode unitNamePlusUl, long?parentUnitId) { List <Task> tasks; using (var data = Db.NewContext) { var unitTypeNameElement = unitNamePlusUl.SelectSingleNode("li | span"); var unitTypeA = unitTypeNameElement.SelectSingleNode("*/a[1] | a[1]"); var unitNameA = unitTypeNameElement.SelectSingleNode("*/a[2] | a[2]"); var type = unitTypeA.InnerText; var tawId = int.Parse(unitNameA.GetAttributeValue("href", "/unit/-1.aspx").TakeStringBetweenLast("/", ".aspx")); var name = unitNameA.InnerText; log.Trace("parsing unit " + tawId); var unit = await GetUnit(data, tawId, name); unit.Type = type; if (parentUnitId.HasValue) { unit.ParentUnit = data.Units.Find(parentUnitId.Value); } await data.SaveChangesAsync(); var children = unitNamePlusUl.SelectSingleNode("ul"); tasks = new List <Task>(children.ChildNodes.Count); foreach (var child in children.ChildNodes) { var personA = child.SelectSingleNode("a"); if (personA != null) { // person var text = child.InnerText; //tasks.Add(Task.Run(() => ParsePersonFromUnitRoaster(text, unit.Id))); var personLine = new UnitRoasterPersonLine(log, text, unit.UnitId); lock (personNameToPersonLines) { List <UnitRoasterPersonLine> personLines; if (!personNameToPersonLines.TryGetValue(personLine.PersonName, out personLines)) { personNameToPersonLines[personLine.PersonName] = personLines = new List <UnitRoasterPersonLine>(); } personLines.Add(personLine); } } else { // unit tasks.Add(Task.Run(() => ParseUnitContents(log, child, unit.UnitId))); } } } await Task.WhenAll(tasks.ToArray()); }
async Task <ParseEventResult> ParseEventData_1(ILogEnd log, MyDbContext data, MyHttpWebResponse response, long eventTawId) { var uriPath = response.ResponseUri.AbsolutePath; if (uriPath.Contains("event") == false) { log.Warn("the event you are trying to parse has invalid uri:" + uriPath + " should contain taw event id:" + eventTawId); return(ParseEventResult.InvalidUriShouldRetry); } var eventTawIdStr = uriPath.Split('/', '\\').Last().RemoveFromEnd(".aspx".Length); var parsedEventTawId = int.Parse(eventTawIdStr); if (eventTawId != parsedEventTawId) { throw new Exception($"should not happen, looking for tat event id:{eventTawId} but found:{parsedEventTawId}"); } var htmlText = response.ResponseText; htmlText = htmlText?.TakeStringAfter("ctl00_ctl00_bcr_bcr_UpdatePanel\">"); if (htmlText.Contains("This is a Base Event and should never be seen")) { log.Trace("invalid 'base event', skipping"); return(ParseEventResult.BaseEvent); // http://taw.net/event/65132.aspx } var evt = data.Events.FirstOrDefault(e => e.TawId == eventTawId); if (evt == null) { evt = new Event(); evt.TawId = eventTawId; evt = data.Events.Add(evt); await data.SaveChangesAsync(); } await ParseEventData_2(log, data, evt, htmlText, eventTawId); await data.SaveChangesAsync(); return(ParseEventResult.ValidEvent); }