Пример #1
0
        public void GragPost(int postId)
        {
            Stopwatch timer = new Stopwatch();

            timer.Start();
//                        html = new HtmlWeb().Load(UrlConstants.SpecifiedPost + postId);
            using (var client = new WebClient()) {
                html.LoadHtml(client.DownloadString("http://pikabu.ru/" + UrlConstants.SpecifiedPost + postId));
            }

            //http://pikabu.ru/story/afrika_obedinyonnaya_respublika_tanzaniya_kakoyto_znakomyiy_vid_4820359#comment_81152709

//            var restRequest = new RestRequest(UrlConstants.SpecifiedPost + postId, Method.GET);
//
//            var restResponse = client.Execute(restRequest);
//            var en = Encoding.GetEncoding("windows-1251");
//            var ar = Encoding.Convert(Encoding.Default, en, Encoding.Default.GetBytes(restResponse.Content)).ToString();
//            html.LoadHtml(ar);


            var processingDateTime = DateTime.Now;

            var post = html.DocumentNode.QuerySelector(".story");

            if (post == null)
            {
                return;
            }
            var authorName = post.QuerySelector(".story__author").InnerText;

            if (authorName == "ads")
            {
                return;
            }


            // empty link video url

            // removed comment


            using (var db = new DataContext()) {
//                var user = PostProcessingManager.GetUser(db, authorName);

                var  postInfoNode = post.QuerySelector(".story__toggle-button");
                Post dbPostEntry;


                var postRate         = post.QuerySelector(".story__rating-count");
                var isDeleted        = postRate.QuerySelectorAll("i.i-sprite--feed__rating-trash").Any(); //4801869 deleted
                var rateNotAvailable = postRate.QuerySelectorAll("i").Any();
                var postTitle        = post.QuerySelector(".story__header-title a.story__title-link").InnerText;
                var postContent      = post.QuerySelector(".b-story__content")?.InnerHtml;
                var postCreateDate   = Helper.UnixTimeStampToDateTime(
                    double.Parse(
                        post.QuerySelector(".story__date").Attributes.AttributesWithName("title").FirstOrDefault().Value));
                var commentsCount = Int32.Parse(post.QuerySelector(".story__comments-count").InnerText.Split(' ')[0]);
                var postType      = postInfoNode.Attributes.AttributesWithName("data-story-type").FirstOrDefault().Value;
                var isLong        = postInfoNode.Attributes.AttributesWithName("data-story-long").FirstOrDefault().Value.Equals("true", StringComparison.CurrentCultureIgnoreCase);
                db.Posts.Add(dbPostEntry = new Post {
                    Id        = postId,
                    Title     = postTitle,
                    Content   = postContent,
                    Created   = postCreateDate,
                    LastCheck = processingDateTime,

                    CommentsCount = commentsCount,
                    IsDeleted     = isDeleted,
                    Rating        = rateNotAvailable ? (int?)null : int.Parse(postRate.InnerText),
                    AuthorName    = authorName,
//                    Author = user,
                    Type    = postType,
                    IsLong  = isLong,
                    IsMine  = post.QuerySelectorAll(".story__header-title a.story__authors").Any(),
                    IsStraw = post.QuerySelectorAll(".story__header-additional a.story__straw").Any(),

                    Tags         = new List <PostTag>(),
                    PostLinks    = new List <PostLink>(),
                    UserComments = new List <UserComment>()
                });


                foreach (var comment in html.DocumentNode.QuerySelectorAll(".b-comments_type_main .b-comment"))
                {
                    CommentProcessing(comment, dbPostEntry);
                }

                if (dbPostEntry.CommentsCount > dbPostEntry.UserComments.Count)
                {
                    AjaxUploadRestComments(dbPostEntry);
                }

                var postCommunity = post.QuerySelector(".story__author + a");
                if (postCommunity != null)
                {
                    var community = db.Communities.FirstOrDefault(x => x.Name == postCommunity.InnerText);
                    if (community == null)
                    {
                        db.Communities.Add(community = new Community {
                            Name = postCommunity.InnerText,
                            Link = postCommunity.GetAttributeValue("href", (string)null)
                        });
                    }
                    dbPostEntry.Community = community;
                }
//                    var a = db.ChangeTracker.Entries().Where(e => e.State != System.Data.Entity.EntityState.Unchanged).ToList();
//                    var a2 = db.ChangeTracker.Entries().Where(e => e.State == System.Data.Entity.EntityState.Added).ToList();
//                    db.SaveChanges();
                PostProcessingManager.ProcessingTags(db, dbPostEntry, post.QuerySelectorAll(".story__tag").Select(x => x.InnerText).ToList());
                post.QuerySelectorAll("[data-large-image]")
                .Select(x => x.ParentNode.GetAttributeValue("href", null)).ToList().ForEach((link) => {
                    dbPostEntry.PostLinks.Add(
                        new PostLink {
                        Url  = link,
                        Type = LinkType.Image
                    });
                });
                post.QuerySelectorAll(".b-video")
                .Select(x => {
                    var url = x.GetAttributeValue("data-url", null);
                    return(new PostLink {
                        Url = url,
                        DataId =     //postType == "video" ?
                                 url.Substring(url.LastIndexOf('/') + 1),
                        //VideoCodeRegex.Match(url).Value : x.GetAttributeValue("data-id", null),
                        Type = LinkType.Video
                    });
                }).ToList().ForEach((link) => {
                    dbPostEntry.PostLinks.Add(link);
                });
                post.QuerySelectorAll(".b-gifx__player")
                .Select(x =>
                        new PostLink {
                    Url  = x.GetAttributeValue("data-src", null),
                    Type = LinkType.Gif
                }
                        ).ToList().ForEach((link) => {
                    dbPostEntry.PostLinks.Add(link);
                });
                post.QuerySelectorAll("noindex > a[rel=\"nofollow\"]")
                .Select(x => x.GetAttributeValue("href", null)).ToList().ForEach(
                    (address) => {
                    var link = GetRefLinkByUrl <PostLink>(address);
                    dbPostEntry.PostLinks.Add(link);
                });
//                    db.SaveChanges();
                timer.Stop();
                dbPostEntry.ProcessedTime = timer.Elapsed;
                db.SaveChanges();
            }
            //            for (int i = 0; i < 10; i++)
            //                Console.WriteLine(JsonConvert.SerializeObject(commentInfo[i]));

            //            IList<HtmlNode> nodes = html.QuerySelectorAll("div .my-class[data-attr=123] > ul li");
            //            HtmlNode node = nodes[0].QuerySelector("p.with-this-class span[data-myattr]");
            //            html.DocumentElement.SelectNodes("//a[@href")

            //            throw new NotImplementedException();
            //            });

            //            task.Wait();

//            AltLog.Error($"{postId} #1 {altTimer.Elapsed}");
        }