Beispiel #1
0
        private void Timer_Elapsed(object sender, ElapsedEventArgs e)
        {
            Log.Information("timer elapsed");

            var context = new MariaContext(); //reinstaniating like this means its thread safe
            var status  = context.NextProfile();

            Log.Information("NextProfile finished successfully");
            if (status == null)
            {
                context.Dispose();
                return;
            }
            if (status.Id < End)
            {
                isWorking = true;
                HtmlWeb web = new HtmlWeb();

                var doc = web.Load(MakeUrl(status.Id));
                Log.Information("Loaded page successfully");
                var result = this.Parse(status.Id, doc, status);
                Log.Information("parsed successfully");

                if (result != null)
                {
                    status.Status = core.models.ProfileStatus.Complete;
                    context.SetStatusForId(status);
                    context.Users.Add(result);
                    context.SaveChanges();
                }
                else
                {
                    status.Status = core.models.ProfileStatus.ProfileNotPresent;
                    context.SetStatusForId(status);
                }
            }
            else
            {
                Log.Information("oh no, in a bad place! <- could be the root cause!");

                timer.Stop();
                isRunning = false;
            }
            context.Dispose();
        }
Beispiel #2
0
        private UserPageModel Parse(int id, HtmlDocument doc, UserProfileScrapingStatus userProfileStatus)
        {
            if (doc.DocumentNode.InnerHtml.Contains("An Error Has Occurred!"))
            {
                Log.Information("parsing - Profile doesn't exist");

                return(null);
            }
            if (doc.DocumentNode.InnerText.Contains("403"))
            {
                Log.Information("rate limited!!!! <- could be the root cause!");
                var context = new MariaContext();
                userProfileStatus.Status = ProfileStatus.Error;
                context.SetStatusForId(userProfileStatus);
                context.Dispose();
                throw new Exception("Error! getting 403 response. Quitting so we don't get locked out for longer!");
            }

            var item = new UserPageModel(id);

            item.Name = handleItem(doc.DocumentNode.SelectNodes(XpathSelectors.NameSelector));
            var baseCol = doc.DocumentNode.SelectSingleNode(XpathSelectors.baseSelector);

            if (baseCol == null)
            {
                throw new Exception("Error, should never be null!");
            }

            item.Merit          = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol,"Merit")}/td[2]"));
            item.Position       = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Position")}/td[2]"));
            item.Posts          = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Posts")}/td[2]"));
            item.Activity       = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Activity")}/td[2]"));
            item.DateRegistered = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Date Registered")}/td[2]"));
            item.LastActive     = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Last Active")}/td[2]"));
            item.Gender         = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Gender")}/td[2]"));
            item.Age            = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Age")}/td[2]"));
            item.Location       = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Location")}/td[2]"));
            item.LocalTime      = handleItem(baseCol.SelectNodes($"{DynamicXpath(baseCol, "Local Time")}/td[2]"));
            Log.Information("Finished successfully");

            return(item);
        }