Пример #1
0
        public UserProfileScrapingStatus NextProfile()
        {
            Log.Logger.Information("In next Profile");
            var s = new UserProfileScrapingStatus();

            this.ProfileScrapingStatuses.Add(s);
            Log.Logger.Information("adding s to profileScrapingStatus");

            this.SaveChanges();
            Log.Logger.Information($"saved changes, Starting work on {s.Id}");
            return(s);
        }
Пример #2
0
 public bool SetStatusForId(UserProfileScrapingStatus status)
 {
     try
     {
         this.Entry(status).State = ProfileStatusExists(status.Id) ? EntityState.Modified : EntityState.Added;
         this.SaveChanges();
         return(true);
     }
     catch (Exception e)
     {
         Console.WriteLine($"Error: {e.Message}");
         throw e;
     }
 }
Пример #3
0
        private UserPageModel Parse(int id, HtmlDocument doc, UserProfileScrapingStatus userProfileStatus)
        {
            if (doc.DocumentNode.InnerHtml.Contains("An Error Has Occurred!"))
            {
                Log.Information("parsing - Profile doesn't exist");

                return(null);
            }
            if (doc.DocumentNode.InnerText.Contains("403"))
            {
                Log.Information("rate limited!!!! <- could be the root cause!");
                var context = new MariaContext();
                userProfileStatus.Status = ProfileStatus.Error;
                context.SetStatusForId(userProfileStatus);
                context.Dispose();
                throw new Exception("Error! getting 403 response. Quitting so we don't get locked out for longer!");
            }

            var item = new UserPageModel(id);

            item.Name = handleItem(doc.DocumentNode.SelectNodes(XpathSelectors.NameSelector));
            var baseCol = doc.DocumentNode.SelectSingleNode(XpathSelectors.baseSelector);

            if (baseCol == null)
            {
                throw new Exception("Error, should never be null!");
            }

            item.Merit          = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol,"Merit")}/td[2]"));
            item.Position       = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Position")}/td[2]"));
            item.Posts          = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Posts")}/td[2]"));
            item.Activity       = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Activity")}/td[2]"));
            item.DateRegistered = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Date Registered")}/td[2]"));
            item.LastActive     = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Last Active")}/td[2]"));
            item.Gender         = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Gender")}/td[2]"));
            item.Age            = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Age")}/td[2]"));
            item.Location       = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Location")}/td[2]"));
            item.LocalTime      = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Local Time")}/td[2]"));
            Log.Information("Finished successfully");

            return(item);
        }