Exemple #1
0
        public static bool Get_Tagged_PinsDetail(ref PinsModels model, string pinId = "")
        {
            string data    = string.Empty;
            var    urlOrg  = Commons.HostApiOrtherPin;
            var    objJson = new
            {
                options = new
                {
                    field_set_key   = "detailed",
                    id              = pinId,
                    is_landing_page = false,
                },
                context = new
                {
                },
            };
            string input = JsonConvert.SerializeObject(objJson);

            urlOrg = Commons.HostApiPinDetail + "/pin/" + pinId + "/";
            string[] pattern      = new string[] { "\n", "\r", "\t" };
            string[] replacements = new string[] { "", "", "" };
            data = Preg_replace(input, pattern, replacements);
            // data = HttpContext.Current.Server.UrlEncode(data);
            var timestamp = GetTimestamp(DateTime.Now);
            var url       = urlOrg + "&data=" + data + "&_=" + timestamp;
            var bookmarks = "";

            model = getDataPinterestDetail(url, pinId, ref bookmarks);
            return(false);
        }
        public static bool findCreateAt(List <string> listFbId, ref HtmlNode node, ref PinsModels pin)
        {
            var ret = false;

            try
            {
                var ajaxify = node.GetAttributeValue("ajaxify", "");
                var fbID    = findFbId(ajaxify);
                if (listFbId.Contains(fbID)) /* check fb id */
                {
                    var abbr = node.Descendants("abbr").FirstOrDefault();
                    if (abbr != null)
                    {
                        /* pares datetime */
                        //DateTime created_at = Commons.MinDate;
                        //var timeTitle = abbr.GetAttributeValue("title", "");
                        //if (DateTime.TryParse(timeTitle, out created_at))
                        //{
                        //    pin.Created_At = created_at;
                        //    ret = true;
                        //}

                        var             timeStamp  = abbr.GetAttributeValue("data-utime", "");
                        System.DateTime dtDateTime = new DateTime(1970, 1, 1, 0, 0, 0, 0, System.DateTimeKind.Utc);
                        pin.Created_At = dtDateTime.AddSeconds(double.Parse(timeStamp)).ToLocalTime();
                    }
                }
            }
            catch (Exception ex) { };
            return(ret);
        }
        public ActionResult ProductDetail(string id, string Key)
        {
            var modelCrawler = new CMS_CrawlerModels();

            try
            {
                var model = new PinsModels();
                CrawlerHelper.Get_Tagged_PinsDetail(ref model, id);
                CrawlerHelper.Get_Tagged_OrtherPins(ref modelCrawler, Key, Commons.PinOrtherDefault, "", 1, id);
                modelCrawler.Pin = model;
            }
            catch (Exception) { }
            return(View(modelCrawler));
        }
Exemple #4
0
        public static void CrawlerFBDetail(string Url, List <string> fb_id, ref PinsModels pin)
        {
            try
            {
                int    _port  = 0;
                string _proxy = CommonHelper.RamdomProxy(ref _port);
                Url = "https://www.facebook.com" + Url + "";
                Uri uri            = new Uri(Url);
                var httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);
                //httpWebRequest.Proxy = new WebProxy(_proxy, _port);
                httpWebRequest.Headers["Cookie"] = Cookies;
                httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
                httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();

                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var html = streamReader.ReadToEnd();
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(html);

                    /* FIND FB CREATED DATE (created_at) */
                    var tagA = doc.DocumentNode.Descendants("a").Where(n => n.GetAttributeValue("rel", "") == "theater").FirstOrDefault();
                    findCreateAt(fb_id, ref tagA, ref pin);

                    /* FIND FEEDBACK_TARGET */
                    var script      = doc.DocumentNode.Descendants().Where(n => n.Name == "script").ToList();
                    var innerScript = script.Where(o => !string.IsNullOrEmpty(o.InnerText) && o.InnerText.Contains("require(\"TimeSlice\").guard(function() {require(\"ServerJSDefine\")")).Select(o => o.InnerText).FirstOrDefault();
                    findNode(innerScript, "feedbacktarget", 0, fb_id, ref pin);
                }
            }
            catch (Exception ex)
            {
                LogHelper.WriteLogs("ErrorCrawlerFBDetail: ", JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("CrawlerFB Detail", ex);
            }
        }
        public static void CrawlerFBDetail(string Url, string fb_id, ref PinsModels pin)
        {
            try
            {
                var url = "https://www.facebook.com" + Url + "";
                //url = "https://www.facebook.com/lifewithsunshine/photos/a.1797478757133845.1073741829.1757631344451920/2063689897179395/?type=3&theater&fb_id=2063689920512726";
                Uri uri            = new Uri(url);
                var httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

                httpWebRequest.Headers["Cookie"] = "fr=0g932KaBNIHkPNSHd.AWUyWBwpX4_A_YKA4NhvmupYBkk.BbMcZu.uD.Fs5.0.0.BbOtXK.AWXncoeT; sb=NmI3W-ffluEtyFHleEWSjhBl; wd=1920x943; datr=NmI3WwtbosYtTwDtslqJtXZd; c_user=100003727776485; xs=136%3Au6XG_yUasjTeFQ%3A2%3A1530356294%3A6091%3A726; pl=n; spin=r.4066192_b.trunk_t.1530495666_s.1_v.2_; act=1530582591458%2F0; presence=EDvF3EtimeF1530582595EuserFA21B03727776485A2EstateFDutF1530582595488CEchFDp_5f1B03727776485F2CC";
                httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
                httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                httpWebRequest.Headers["Proxy-Authorization"] = "Digest username=\"54737357\", realm=\"anonymox.net\", nonce=\"rt86WwAAAABgQKLm21UAAAwL3yIAAAAA\", uri=\"www.facebook.com:443\", response=\"47dc76deffbdaef3fc92784579b19d65\", qop=auth, nc=0000021f, cnonce=\"d0a10718bc5ea4b9\"";
                //httpWebRequest.Headers["Remote-Address"] = "146.185.28.59:443";
                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var html = streamReader.ReadToEnd();
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(html);
                    var script = doc.DocumentNode.Descendants()
                                 .Where(n => n.Name == "script").ToList();
                    if (script != null && script.Count > 10)
                    {
                        var nodeJson = script[10].InnerText;
                        if (!string.IsNullOrEmpty(nodeJson))
                        {
                            findNode(nodeJson, "feedbacktarget", 0, fb_id, ref pin);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                NSLog.Logger.Error("CrawlerFB Detail", ex);
            }
        }
        public static bool findNode(string input, string key, int start, List <string> fb_id, ref PinsModels pin)
        {
            var jsonfeedbacktarget = findElement(input, "feedbacktarget", 0);

            if (string.IsNullOrEmpty(jsonfeedbacktarget))
            {
                return(false);
            }
            try
            {
                var dobj = JsonConvert.DeserializeObject <JsonObject_v2>(jsonfeedbacktarget);
                if (dobj != null)
                {
                    if (fb_id.Contains(dobj.entidentifier))
                    {
                        pin.commentTotalCount = dobj.commentcount;
                        pin.sharecount        = dobj.sharecount;
                        pin.reactioncount     = dobj.reactioncount;
                        pin.ID = dobj.entidentifier;

                        return(true);
                    }
                    else
                    {
                        jsonfeedbacktarget = "\"feedbacktarget\":" + jsonfeedbacktarget;
                        input = input.Replace(jsonfeedbacktarget, "");
                        return(findNode(input, key, start, fb_id, ref pin));
                    }
                }
            }
            catch (Exception ex) { }
            return(false);
        }
        public static void CrawlerFBDetail(string Url, List <string> fb_id, string cookie, ref PinsModels pin)
        {
            int    _port  = 0;
            string _proxy = CommonHelper.RamdomProxy(ref _port);
            // Url = "https://www.facebook.com" + Url + "";
            Uri uri            = new Uri(Url);
            var httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

            //httpWebRequest.Proxy = new WebProxy(_proxy, _port);
            httpWebRequest.KeepAlive             = false;
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;
            httpWebRequest.Headers["Cookie"]     = cookie;
            httpWebRequest.UserAgent             = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
            httpWebRequest.Accept  = "*/*";
            httpWebRequest.Timeout = 9000000;

            try
            {
                using (HttpWebResponse httpResponse = (HttpWebResponse)httpWebRequest.GetResponse())
                {
                    try
                    {
                        if (httpResponse.StatusCode == HttpStatusCode.OK)
                        {
                            using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                            {
                                var html = streamReader.ReadToEnd();
                                // streamReader.Close();
                                streamReader.Dispose();
                                // httpResponse.Close();
                                httpResponse.Dispose();
                                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                                doc.LoadHtml(html);

                                /* FIND FB CREATED DATE (created_at) */
                                var tagA = doc.DocumentNode.Descendants("a").Where(n => n.GetAttributeValue("rel", "") == "theater").FirstOrDefault();
                                findCreateAt(fb_id, ref tagA, ref pin);

                                /* FIND FEEDBACK_TARGET */
                                var script      = doc.DocumentNode.Descendants().Where(n => n.Name == "script").ToList();
                                var innerScript = script.Where(o => !string.IsNullOrEmpty(o.InnerText) && o.InnerText.Contains("require(\"TimeSlice\").guard(function() {require(\"ServerJSDefine\")")).Select(o => o.InnerText).FirstOrDefault();
                                findNode(innerScript, "feedbacktarget", 0, fb_id, ref pin);
                            }
                        }
                        else
                        {
                            Thread.Sleep(500);
                            CrawlerFBDetail(Url, fb_id, cookie, ref pin);
                            //if(countExp <= 5)
                            //{
                            //    countExp = countExp + 1;
                            //    CrawlerFBDetail(Url, fb_id, cookie,ref countExp, ref pin);
                            //}
                        }
                    }
                    catch (IOException exIO)
                    {
                        NSLog.Logger.Info("Crawl detail error io exception" + Url + " ", exIO.Message);
                        Thread.Sleep(500);
                        CrawlerFBDetail(Url, fb_id, cookie, ref pin);
                        //if (countExp <= 5)
                        //{
                        //    countExp = countExp + 1;
                        //    CrawlerFBDetail(Url, fb_id, cookie, ref countExp, ref pin);
                        //}
                    }
                    catch (Exception ex)
                    {
                        if (httpResponse.StatusCode == HttpStatusCode.NotFound)
                        {
                            Thread.Sleep(500);
                            CrawlerFBDetail(Url, fb_id, cookie, ref pin);
                            //if (countExp <= 5)
                            //{
                            //    countExp = countExp + 1;
                            //    CrawlerFBDetail(Url, fb_id, cookie, ref countExp, ref pin);
                            //}
                        }
                        LogHelper.WriteLogs("ErrorCrawlerFBDetail: ", JsonConvert.SerializeObject(ex));
                        NSLog.Logger.Error("CrawlerFB Detail", ex);
                    }
                    // Do your processings here....
                }
            }
            catch (WebException ex)
            {
                NSLog.Logger.Info("Crawl detail error : " + Url + " " + ex.Message);
                if (ex.Status == WebExceptionStatus.ProtocolError && ex.Response != null)
                {
                    var resp = (HttpWebResponse)ex.Response;
                    if (resp.StatusCode == HttpStatusCode.NotFound)
                    {
                        Thread.Sleep(500);
                        CrawlerFBDetail(Url, fb_id, cookie, ref pin);
                        //if (countExp <= 5)
                        //{
                        //    countExp = countExp + 1;
                        //    CrawlerFBDetail(Url, fb_id, cookie, ref countExp, ref pin);
                        //}
                    }
                }
                else
                {
                    Thread.Sleep(500);
                    CrawlerFBDetail(Url, fb_id, cookie, ref pin);
                    //if (countExp <= 5)
                    //{
                    //    countExp = countExp + 1;
                    //    CrawlerFBDetail(Url, fb_id, cookie, ref countExp, ref pin);
                    //}
                }
            }
            catch (IOException exIO)
            {
                NSLog.Logger.Info("Crawl detail error io exception" + Url + " ", exIO.Message);
                Thread.Sleep(500);
                CrawlerFBDetail(Url, fb_id, cookie, ref pin);
                //if (countExp <= 5)
                //{
                //    countExp = countExp + 1;
                //    CrawlerFBDetail(Url, fb_id, cookie, ref countExp, ref pin);
                //}
            }
            catch (Exception ex) {
                NSLog.Logger.Error("Crawl detail error :", ex);
            }
            //httpWebRequest.Abort();//cancel request
        }
        public static void CrawlerDataFacebook(string strHtml, bool IsNextPage, ref CMS_CrawlerModels pins1, ref string _pageId)
        {
            try
            {
                if (!string.IsNullOrEmpty(strHtml))
                {
                    CMS_CrawlerModels pins = new CMS_CrawlerModels();
                    var htmlDoc            = new HtmlDocument();
                    htmlDoc.LoadHtml(strHtml);

                    //find page id of fan page
                    if (!IsNextPage)
                    {
                        var nodePageId = htmlDoc.DocumentNode.Descendants().Where
                                             (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                    x.Attributes["class"].Value.Contains("_643h"))).ToList();
                        if (nodePageId != null && nodePageId.Count > 0)
                        {
                            var _643h    = nodePageId[0].GetAttributeValue("data-report-meta", "");
                            var str_643h = System.Web.HttpUtility.HtmlDecode(_643h);
                            if (!string.IsNullOrEmpty(_643h))
                            {
                                JObject o = JObject.Parse(str_643h);
                                if (o != null)
                                {
                                    _pageId = o.SelectToken("landing_page_id").ToString();
                                }
                            }
                        }
                    }

                    List <HtmlNode> nodeHtml = htmlDoc.DocumentNode.Descendants().Where
                                                   (x => (x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_643h"))).ToList();

                    if (nodeHtml != null && nodeHtml.Count > 0)
                    {
                        var           description = "";
                        var           OwnerName   = "";
                        List <string> fb_ids      = null;
                        foreach (var itemHtml in nodeHtml)
                        {
                            var _node = itemHtml.Descendants("div")
                                        .Where(x => !x.InnerText.Equals("report") &&
                                               x.InnerHtml.Contains("_5pbx userContent _3576") &&
                                               x.InnerHtml.Contains("_6a _5u5j _6b") &&
                                               x.InnerHtml.Contains("_5pcp _5lel _2jyu _232_") &&
                                               x.InnerHtml.Contains("mtm")).ToList();

                            if (_node != null && _node.Count > 0)
                            {
                                fb_ids = new List <string>();
                                var item  = _node[0];
                                var _Html = item.InnerHtml;
                                if (!string.IsNullOrEmpty(_Html))
                                {
                                    var _Doc = new HtmlDocument();
                                    _Doc.LoadHtml(_Html);
                                    // Description
                                    var _des = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pbx userContent _3576")).ToList();
                                    if (_des != null && _des.Count > 0)
                                    {
                                        description = _des[0].InnerText;
                                        if (!string.IsNullOrEmpty(description))
                                        {
                                            description = description.Replace("&quot;", "");
                                        }
                                    }
                                    // Owner name
                                    var _ownerName = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_6a _5u5j _6b")).ToList();
                                    if (_ownerName != null && _ownerName.Count > 0)
                                    {
                                        foreach (var itemOwner in _ownerName)
                                        {
                                            var NodeName = itemOwner.Descendants("a").ToList();
                                            if (NodeName != null)
                                            {
                                                OwnerName = NodeName[0].InnerText;
                                                if (!string.IsNullOrEmpty(OwnerName))
                                                {
                                                    OwnerName = OwnerName.Replace("&quot;", "");
                                                }
                                                else
                                                {
                                                    pins.ErrorStatus = (byte)Commons.EErrorStatus.AccBlocked;
                                                }
                                                break;
                                            }
                                        }
                                    }

                                    // fb_id
                                    var _FbId = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pcp _5lel _2jyu _232_")).ToList();
                                    if (_FbId != null && _FbId.Count > 0)
                                    {
                                        foreach (var itemFbId in _FbId)
                                        {
                                            var strfb_id = itemFbId.Id;
                                            if (!string.IsNullOrEmpty(strfb_id))
                                            {
                                                var charecter = "";
                                                var fb_id     = findFbId_v3(strfb_id, "subtitle_", "_", ref charecter);
                                                if (!string.IsNullOrEmpty(fb_id))
                                                {
                                                    fb_ids.Add(fb_id);
                                                }
                                                if (!string.IsNullOrEmpty(charecter) && charecter.Equals(";"))
                                                {
                                                    fb_id = findFbId_v3(strfb_id, ";", ";", ref charecter);
                                                    if (!string.IsNullOrEmpty(fb_id))
                                                    {
                                                        fb_ids.Add(fb_id);
                                                    }
                                                }
                                                break;
                                            }
                                        }
                                    }

                                    //  Image
                                    var _Image = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("mtm")).ToList();

                                    if (_Image != null && _Image.Count > 0)
                                    {
                                        foreach (var itemImg in _Image)
                                        {
                                            // post normal
                                            var nodeChildImage = item.Descendants("a").ToList();
                                            if (nodeChildImage != null && nodeChildImage.Count > 0)
                                            {
                                                foreach (var itemImage in nodeChildImage)
                                                {
                                                    var fb_id      = new List <string>();
                                                    var _image     = itemImage.GetAttributeValue("data-ploi", "");
                                                    var _apiDetail = itemImage.GetAttributeValue("href", "");
                                                    if (!string.IsNullOrEmpty(_image))
                                                    {
                                                        _image = _image.Replace("amp;", "");
                                                    }


                                                    if (!string.IsNullOrEmpty(_image) && !string.IsNullOrEmpty(_apiDetail))
                                                    {
                                                        var Pin    = new PinsModels();
                                                        var Splits = _apiDetail.Split('/').ToList();
                                                        if (Splits != null && Splits.Count >= 5)
                                                        {
                                                            fb_id.Add(Splits[4]);
                                                        }
                                                        if (fb_ids != null && fb_ids.Count > 0)
                                                        {
                                                            fb_id.AddRange(fb_ids);
                                                        }
                                                        //CrawlerFBDetail(_apiDetail, fb_id, ref Pin);
                                                        Pin.LinkApi     = "https://www.facebook.com" + _apiDetail;
                                                        Pin.ImageURL    = _image;
                                                        Pin.OwnerName   = OwnerName;
                                                        Pin.Description = description;
                                                        Pin.FbIds       = fb_id;
                                                        pins.Pins.Add(Pin);
                                                        //if (!string.IsNullOrEmpty(Pin.ID))
                                                        //    pins.Pins.Add(Pin);
                                                    }
                                                }
                                            }
                                            //post dynamic
                                            var nodeChildDynamic = itemImg.Descendants("ul").ToList();
                                            if (nodeChildDynamic != null && nodeChildDynamic.Count > 0)
                                            {
                                                var _doc = new HtmlDocument();
                                                _doc.LoadHtml(nodeChildDynamic[0].InnerHtml);
                                                var nodeLI = _doc.DocumentNode.Descendants().Where(
                                                    x => (x.Name == "li" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5ya"))).ToList();
                                                if (nodeLI != null && nodeLI.Count > 0)
                                                {
                                                    Parallel.ForEach(nodeLI, (itemLI) =>
                                                    {
                                                        var Pin         = new PinsModels();
                                                        var nodeLIImage = itemLI.Descendants("img").ToList();
                                                        if (nodeLIImage != null && nodeLIImage.Count > 0)
                                                        {
                                                            var _image = nodeLIImage[0].GetAttributeValue("src", "");
                                                            if (!string.IsNullOrEmpty(_image))
                                                            {
                                                                _image       = _image.Replace("amp;", "");
                                                                Pin.ImageURL = _image;
                                                                var PinId    = findFbOh(_image);
                                                                if (!string.IsNullOrEmpty(PinId))
                                                                {
                                                                    Pin.ID = PinId + "_" + fb_ids[0];
                                                                }
                                                                else
                                                                {
                                                                    PinId = findFbHash(_image);
                                                                    if (!string.IsNullOrEmpty(PinId))
                                                                    {
                                                                        Pin.ID = PinId + "_" + fb_ids[0];
                                                                    }
                                                                    else
                                                                    {
                                                                        Pin.ID = Guid.NewGuid().ToString();
                                                                    }
                                                                }
                                                            }
                                                        }
                                                        var nodeLink = itemLI.Descendants("a").ToList();
                                                        if (nodeLink != null && nodeLink.Count > 0)
                                                        {
                                                            var _link = nodeLink[0].GetAttributeValue("href", "");
                                                            Pin.Link  = _link;
                                                        }

                                                        //description
                                                        var nodeLIDescription = itemLI.Descendants("div").ToList();
                                                        if (nodeLIDescription != null && nodeLIDescription.Count > 0)
                                                        {
                                                            var _description = nodeLIDescription.Where(x => x.LastChild.Name.Equals("#text")).FirstOrDefault();
                                                            if (_description != null)
                                                            {
                                                                Pin.Description = _description.InnerText;
                                                            }
                                                        }
                                                        Pin.OwnerName = OwnerName;
                                                        Pin.IsDynamic = true;
                                                        if (!string.IsNullOrEmpty(Pin.ID))
                                                        {
                                                            pins.Pins.Add(Pin);
                                                        }
                                                    });
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }

                    if (pins != null && pins.Pins != null && pins.Pins.Any())
                    {
                        pins1.Pins.AddRange(pins.Pins);
                    }
                }
            }
            catch (Exception ex) { }
        }
Exemple #9
0
        public static void CrawlerFb(string url, ref CMS_CrawlerModels pins)
        {
            try
            {
                int    _port          = 0;
                string _proxy         = CommonHelper.RamdomProxy(ref _port);
                Uri    uri            = new Uri(url);
                var    httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);
                //httpWebRequest.Proxy = new WebProxy(_proxy,_port);
                /* request need cookie & user agent */
                httpWebRequest.Headers["Cookie"] = Cookies;
                httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
                httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var html = streamReader.ReadToEnd();
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(html);
                    List <HtmlNode> nodeHtml = doc.DocumentNode.Descendants().Where
                                                   (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                          x.Attributes["class"].Value.Contains("_5pbx userContent _3576"))).ToList();

                    var ListDescription = new List <string>();
                    if (nodeHtml != null && nodeHtml.Count > 0)
                    {
                        foreach (var item in nodeHtml)
                        {
                            var NodeDescription = item.Descendants("p").ToList();
                            if (NodeDescription != null)
                            {
                                var description = NodeDescription[0].InnerText;
                                if (!string.IsNullOrEmpty(description))
                                {
                                    description = description.Replace("&quot;", "");
                                }
                                ListDescription.Add(description);
                            }
                            else
                            {
                                ListDescription.Add("");
                            }
                        }
                    }

                    //Name
                    List <HtmlNode> nodehtmlName = doc.DocumentNode.Descendants().Where
                                                       (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                              x.Attributes["class"].Value.Contains("_6a _5u5j _6b"))).ToList();

                    var ListName = new List <string>();
                    if (nodehtmlName != null && nodehtmlName.Count > 0)
                    {
                        foreach (var item in nodehtmlName)
                        {
                            var NodeName = item.Descendants("a").ToList();
                            if (NodeName != null)
                            {
                                var name = NodeName[0].InnerText;
                                if (!string.IsNullOrEmpty(name))
                                {
                                    name = name.Replace("&quot;", "");
                                }
                                ListName.Add(name);
                            }
                            else
                            {
                                ListName.Add("");
                            }
                        }
                    }
                    else
                    {
                        /* */
                        pins.ErrorStatus = (byte)Commons.EErrorStatus.AccBlocked;
                    }

                    // fb_id
                    var nodeFb_Id = doc.DocumentNode.Descendants().Where
                                    (
                        x => (x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pcp _5lel _2jyu _232_"))
                                    ).ToList();
                    List <string> fb_ids = new List <string>();
                    if (nodeFb_Id != null && nodeFb_Id.Count > 0)
                    {
                        foreach (var item in nodeFb_Id)
                        {
                            var strfb_id = item.GetAttributeValue("id", "");
                            if (!string.IsNullOrEmpty(strfb_id))
                            {
                                //var split = strfb_id.Split(';').ToList();
                                //if (split != null && split.Count > 1)
                                //{
                                //    var fb_id = split[1];
                                //    fb_ids.Add(fb_id);
                                //}
                                //else
                                //{
                                //    fb_ids.Add("");
                                //}
                                var fb_id = findFbId_v2(strfb_id);
                                if (!string.IsNullOrEmpty(fb_id))
                                {
                                    fb_ids.Add(fb_id);
                                }
                                else
                                {
                                    fb_ids.Add("");
                                }
                            }
                        }
                    }

                    LogHelper.WriteLogs("fb_ids: " + url, JsonConvert.SerializeObject(fb_ids));

                    // node html image
                    List <HtmlNode> nodeHtmlImage = doc.DocumentNode.Descendants().Where
                                                        (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                               x.Attributes["class"].Value.Contains("mtm"))).ToList();

                    if (nodeHtmlImage != null && nodeHtmlImage.Count > 0)
                    {
                        var index = 0;
                        foreach (var item in nodeHtmlImage)
                        {
                            List <string> fb_id = new List <string>();
                            // post normal
                            var nodeChildImage = item.Descendants("a").ToList();
                            if (nodeChildImage != null && nodeChildImage.Count > 0)
                            {
                                foreach (var itemImage in nodeChildImage)
                                {
                                    var _image     = itemImage.GetAttributeValue("data-ploi", "");
                                    var _apiDetail = itemImage.GetAttributeValue("href", "");
                                    if (!string.IsNullOrEmpty(_image))
                                    {
                                        _image = _image.Replace("amp;", "");
                                    }

                                    var Pin = new PinsModels();
                                    if (!string.IsNullOrEmpty(_image) && !string.IsNullOrEmpty(_apiDetail))
                                    {
                                        var Splits = _apiDetail.Split('/').ToList();
                                        if (Splits != null && Splits.Count >= 5)
                                        {
                                            fb_id.Add(Splits[4]);
                                        }

                                        if (fb_ids != null && fb_ids.Count >= index /*&& nodeChildImage.Count == 1*/)
                                        {
                                            if (!string.IsNullOrEmpty(fb_ids[index]))
                                            {
                                                fb_id.Add(fb_ids[index]);
                                            }
                                        }
                                        CrawlerFBDetail(_apiDetail, fb_id, ref Pin);
                                        Pin.ImageURL = _image;
                                        if (ListDescription != null && ListDescription.Count >= index)
                                        {
                                            Pin.Description = ListDescription[index];
                                        }

                                        if (ListName != null && ListName.Count >= index)
                                        {
                                            Pin.OwnerName = ListName[index];
                                        }
                                        pins.Pins.Add(Pin);
                                    }
                                }
                            }
                            //post dynamic
                            var nodeChildDynamic = item.Descendants("ul").ToList();
                            if (nodeChildDynamic != null && nodeChildDynamic.Count > 0)
                            {
                                var _doc = new HtmlDocument();
                                _doc.LoadHtml(nodeChildDynamic[0].InnerHtml);
                                var nodeLI = _doc.DocumentNode.Descendants().Where(
                                    x => (x.Name == "li" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5ya"))).ToList();
                                if (nodeLI != null && nodeLI.Count > 0)
                                {
                                    foreach (var itemLI in nodeLI)
                                    {
                                        var Pin         = new PinsModels();
                                        var nodeLIImage = itemLI.Descendants("img").ToList();
                                        if (nodeLIImage != null && nodeLIImage.Count > 0)
                                        {
                                            var _image = nodeLIImage[0].GetAttributeValue("src", "");
                                            if (!string.IsNullOrEmpty(_image))
                                            {
                                                _image       = _image.Replace("amp;", "");
                                                Pin.ImageURL = _image;
                                                var PinId = findFbOh(_image);
                                                Pin.ID = PinId + "_" + fb_ids[index];
                                            }
                                        }

                                        var nodeLink = itemLI.Descendants("a").ToList();
                                        if (nodeLink != null && nodeLink.Count > 0)
                                        {
                                            var _link = nodeLink[0].GetAttributeValue("href", "");
                                            Pin.Link = _link;
                                        }

                                        //description
                                        var nodeLIDescription = itemLI.Descendants("div").ToList();
                                        if (nodeLIDescription != null && nodeLIDescription.Count > 0)
                                        {
                                            var _description = nodeLIDescription.Where(x => x.LastChild.Name.Equals("#text")).FirstOrDefault();
                                            if (_description != null)
                                            {
                                                Pin.Description = _description.InnerText;
                                            }
                                        }
                                        if (ListName != null && ListName.Count >= index)
                                        {
                                            Pin.OwnerName = ListName[index];
                                        }

                                        if (!string.IsNullOrEmpty(Pin.ID))
                                        {
                                            pins.Pins.Add(Pin);
                                        }
                                    }
                                }
                            }
                            index++;
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                LogHelper.WriteLogs("ErrorCrawlerFB: " + url, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("Crawler Fb: ", ex);
            }
        }
Exemple #10
0
        public static void CrawlerNextPage(string pageId, string userId, int cursor, string referer, ref CMS_CrawlerModels pins)
        {
            int    _port          = 0;
            string _proxy         = CommonHelper.RamdomProxy(ref _port);
            var    url            = "https://www.facebook.com/pages/ads/more/?cursor=" + cursor + "&surface=www_page_ads&unit_count=" + cursor + "&country=1&dpr=1&__user="******"&__a=1&__req=v&__be=1&__pc=PHASED%3ADEFAULT&__rev=4075583&__spin_r=4075583&__spin_b=trunk&__spin_t=1530846023&page_id=" + pageId + "";
            Uri    uri            = new Uri(url);
            var    httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

            //httpWebRequest.Proxy = new WebProxy(_proxy, _port);
            /* request need cookie & user agent */
            httpWebRequest.Headers["Cookie"] = Cookies;
            httpWebRequest.Referer           = referer;
            httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
            httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

            httpWebRequest.Timeout = 100000;
            var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();

            using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
            {
                var html = streamReader.ReadToEnd();
                if (!string.IsNullOrEmpty(html))
                {
                    html = html.Replace("for (;;);", "");
                    JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
                    dynamic dobj   = jsonSerializer.Deserialize <dynamic>(html);
                    var     domops = dobj["domops"];
                    if (domops != null)
                    {
                        var _objhtmt = domops[0][3];
                        if (_objhtmt != null)
                        {
                            var _html = _objhtmt["__html"];
                            if (!string.IsNullOrEmpty(_html))
                            {
                                var htmlDoc = new HtmlDocument();
                                htmlDoc.LoadHtml(_html);

                                List <HtmlNode> nodeHtml = htmlDoc.DocumentNode.Descendants().Where
                                                               (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                                      x.Attributes["class"].Value.Contains("_5pbx userContent _3576"))).ToList();

                                var ListDescription = new List <string>();
                                if (nodeHtml != null && nodeHtml.Count > 0)
                                {
                                    foreach (var item in nodeHtml)
                                    {
                                        var NodeDescription = item.Descendants("p").ToList();
                                        if (NodeDescription != null)
                                        {
                                            var description = NodeDescription[0].InnerText;
                                            if (!string.IsNullOrEmpty(description))
                                            {
                                                description = description.Replace("&quot;", "");
                                            }
                                            ListDescription.Add(description);
                                        }
                                        else
                                        {
                                            ListDescription.Add("");
                                        }
                                    }
                                }

                                //Name
                                List <HtmlNode> nodehtmlName = htmlDoc.DocumentNode.Descendants().Where
                                                                   (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                                          x.Attributes["class"].Value.Contains("_6a _5u5j _6b"))).ToList();

                                var ListName = new List <string>();
                                if (nodehtmlName != null && nodehtmlName.Count > 0)
                                {
                                    foreach (var item in nodehtmlName)
                                    {
                                        var NodeName = item.Descendants("a").ToList();
                                        if (NodeName != null)
                                        {
                                            var name = NodeName[0].InnerText;
                                            if (!string.IsNullOrEmpty(name))
                                            {
                                                name = name.Replace("&quot;", "");
                                            }
                                            ListName.Add(name);
                                        }
                                        else
                                        {
                                            ListName.Add("");
                                        }
                                    }
                                }

                                // fb_id
                                var nodeFb_Id = htmlDoc.DocumentNode.Descendants().Where
                                                (
                                    x => (x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pcp _5lel _2jyu _232_"))
                                                ).ToList();

                                List <string> fb_ids = new List <string>();
                                if (nodeFb_Id != null && nodeFb_Id.Count > 0)
                                {
                                    foreach (var item in nodeFb_Id)
                                    {
                                        var strfb_id = item.GetAttributeValue("id", "");
                                        if (!string.IsNullOrEmpty(strfb_id))
                                        {
                                            //var split = strfb_id.Split(';').ToList();
                                            //if (split != null && split.Count > 1)
                                            //{
                                            //    var fb_id = split[1];
                                            //    fb_ids.Add(fb_id);
                                            //}
                                            //else
                                            //{
                                            //    fb_ids.Add("");
                                            //}

                                            var fb_id = findFbId_v2(strfb_id);
                                            if (!string.IsNullOrEmpty(fb_id))
                                            {
                                                fb_ids.Add(fb_id);
                                            }
                                            else
                                            {
                                                fb_ids.Add("");
                                            }
                                        }
                                    }
                                }

                                List <HtmlNode> nodeHtmlImage = htmlDoc.DocumentNode.Descendants().Where
                                                                    (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                                           x.Attributes["class"].Value.Contains("mtm"))).ToList();

                                if (nodeHtmlImage != null && nodeHtmlImage.Count > 0)
                                {
                                    var index = 0;
                                    foreach (var item in nodeHtmlImage)
                                    {
                                        List <string> fb_id          = new List <string>();
                                        var           nodeChildImage = item.Descendants("a").ToList();
                                        if (nodeChildImage != null && nodeChildImage.Count > 0)
                                        {
                                            foreach (var itemImage in nodeChildImage)
                                            {
                                                var _image     = itemImage.GetAttributeValue("data-ploi", "");
                                                var _apiDetail = itemImage.GetAttributeValue("href", "");
                                                if (!string.IsNullOrEmpty(_image))
                                                {
                                                    _image = _image.Replace("amp;", "");
                                                }

                                                var Pin = new PinsModels();
                                                if (!string.IsNullOrEmpty(_image) && !string.IsNullOrEmpty(_apiDetail))
                                                {
                                                    var Splits = _apiDetail.Split('/').ToList();
                                                    if (Splits != null && Splits.Count >= 5)
                                                    {
                                                        fb_id.Add(Splits[4]);
                                                    }

                                                    if (fb_ids != null && fb_ids.Count >= index /*&& nodeChildImage.Count == 1*/)
                                                    {
                                                        if (!string.IsNullOrEmpty(fb_ids[index]))
                                                        {
                                                            fb_id.Add(fb_ids[index]);
                                                        }
                                                    }
                                                    CrawlerFBDetail(_apiDetail, fb_id, ref Pin);
                                                    Pin.ImageURL = _image;
                                                    if (ListDescription != null && ListDescription.Count >= index)
                                                    {
                                                        Pin.Description = ListDescription[index];
                                                    }

                                                    if (ListName != null && ListName.Count >= index)
                                                    {
                                                        Pin.OwnerName = ListName[index];
                                                    }
                                                    pins.Pins.Add(Pin);
                                                }
                                            }
                                        }

                                        //post dynamic
                                        var nodeChildDynamic = item.Descendants("ul").ToList();
                                        if (nodeChildDynamic != null && nodeChildDynamic.Count > 0)
                                        {
                                            var _doc = new HtmlDocument();
                                            _doc.LoadHtml(nodeChildDynamic[0].InnerHtml);
                                            var nodeLI = _doc.DocumentNode.Descendants().Where(
                                                x => (x.Name == "li" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5ya"))).ToList();
                                            if (nodeLI != null && nodeLI.Count > 0)
                                            {
                                                foreach (var itemLI in nodeLI)
                                                {
                                                    var Pin         = new PinsModels();
                                                    var nodeLIImage = itemLI.Descendants("img").ToList();
                                                    if (nodeLIImage != null && nodeLIImage.Count > 0)
                                                    {
                                                        var _image = nodeLIImage[0].GetAttributeValue("src", "");
                                                        if (!string.IsNullOrEmpty(_image))
                                                        {
                                                            _image       = _image.Replace("amp;", "");
                                                            Pin.ImageURL = _image;
                                                            var PinId = findFbOh(_image);
                                                            Pin.ID = PinId + "_" + fb_ids[index];
                                                        }
                                                    }

                                                    var nodeLink = itemLI.Descendants("a").ToList();
                                                    if (nodeLink != null && nodeLink.Count > 0)
                                                    {
                                                        var _link = nodeLink[0].GetAttributeValue("href", "");
                                                        Pin.Link = _link;
                                                    }

                                                    //description
                                                    var nodeLIDescription = itemLI.Descendants("div").ToList();
                                                    if (nodeLIDescription != null && nodeLIDescription.Count > 0)
                                                    {
                                                        var _description = nodeLIDescription.Where(x => x.LastChild.Name.Equals("#text")).FirstOrDefault();
                                                        if (_description != null)
                                                        {
                                                            Pin.Description = _description.InnerText;
                                                        }
                                                    }
                                                    if (ListName != null && ListName.Count >= index)
                                                    {
                                                        Pin.OwnerName = ListName[index];
                                                    }
                                                    if (!string.IsNullOrEmpty(Pin.ID))
                                                    {
                                                        pins.Pins.Add(Pin);
                                                    }
                                                }
                                            }
                                        }
                                        index++;
                                    }
                                }
                            }
                            else
                            {
                                return;
                            }
                        }
                    }
                }
            }

            // đệ quy craweler next page
            cursor = cursor + 8;
            CrawlerNextPage(pageId, userId, cursor, referer, ref pins);
        }
Exemple #11
0
        public static CMS_CrawlerModels getDataPinterestHome(string url, CMS_CrawlerModels model, string pinId, ref string bookmarks)
        {
            try
            {
                Uri uri            = new Uri(url);
                var httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);
                httpWebRequest.Headers["X-Requested-With"] = "XMLHttpRequest";
                httpWebRequest.Headers["Cookie"]           = "_auth=1; csrftoken=dMWi2a6L1DTFUHmyqem0oGrDmteiaETw; _pinterest_sess=\"TWc9PSZsSlA1dUF4QWlRWGRYVGR6Qm9mN3pwczUyUDk4ZDYvckduSjl4N3ZSRHlsU1VmWkhBTUsrMU9KNkxjS3pyUk1zREdDL2Rmb2VuT1dwRDhSTmxTOE1Ja0FjOUtreTJVc0o0SmthQ2xhN3lRa3BQVnRMcUF5dlN0Z255Syt4am56VnQvYVQwT0JyejBCSlk4YzFyQ0pEekZwNSs0YjZnMTBseEIvRkU0Um1XeWthZ1cvNGxpdDVyTEdrSHRzWFVLN244T25TaGVoYy93TGVSRjVxNzl5dnlZV1A5L3NlNnc5MWE4djl0ZjNoeEhqTTNuaGduRnZ2VkF1RTd6V1V3VnBCT3cyMksxMHJIdVE0TVVjc3FmWVozVllzekhpNFRGNDFBTERIVzdkcUNUS3NlWEJFdE1mSXJBbnNPVStHQXJiUWJRSENyVVVKTVJYNit5MkZTMFVNN3ptY09FNmFoaHk3Nk9MdUtuRmdDSWRWRVhPTWYrSXA4dFhlRU1hYW5paFNQMU5OcFNwY2xSZlJHZVlWWU03eHFsNWVmSWRHL0ZtN3NhdU9ubzhpUjZqMzNTTUxwMTlOQWRGa29zVUc1UXFqZ1BUYzhHL3M0YndDY2ZBN2ZMZnJQZTlGbXdPWjg5SXJVOEpUMEtPVnMzcjZPcytOVHRFUnlRUnoyNmJZdjl0YXJlOVp1WGQvM29SSi9xWUwvYmFPcDl5VFl1aEw2ZFBtMHlhZ0g4MXlIMXp1dnFXWWY1VytmY0ZPc0FSMzhqYXdhNTBqQjlYRHJ6OE9CY1ViMmljZkFhQkVydGxyVUtlNis4cnh3R3NPbXVTVjZCZUNTR1NKQ3JpWFJsajBsSEFGcytOMnptN2R2S1BXN1NocTFtZVlKMzF0Y1hyQXNseG9DdzdrQklxNnZXMkk2dXQ4azJJOTR4YWlIUDMvVzAwcmQ0SDVqNnhYc3NlTTNpK0ZHUU9xaUpCOER0N1pQaWFFTUhLRGxpdk1EVDlOYi9DdmRLcTQvdUROekpjRXNJSjVtcEl1bWVLUHhRdTVQQk91L1RWS0w0YkkzZDNwaW5mRnJFakRsck9aNTRBUXVsVFdFWVlTRHJ5OUxBWHdMa0V4Jk1FSHZIUWlQUlE2Q05OZWJydEZrV25SQ2tmND0 = \"; G_ENABLED_IDPS=google; _b=\"ATWTNNfXaINNj5j6VvA6 + rquchpAz7VF + IS8VabE7fJo7ragqOV82ASwCOgxcnxHC5k = \"; pnodepath=\" / 4\"; _ga=GA1.2.1908176321.1528170001; fba=True; cm_sub=none; sessionFunnelEventLogged=1; bei=false";
                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();

                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var answer = streamReader.ReadToEnd();
                    JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
                    dynamic dobj = jsonSerializer.Deserialize <dynamic>(answer);
                    if (dobj != null)
                    {
                        var resource_data_cache = dobj["resource_response"];
                        if (resource_data_cache != null)
                        {
                            var data = resource_data_cache["data"];
                            if (data != null)
                            {
                                var results = (dynamic)null;
                                results = data;
                                if (results != null)
                                {
                                    foreach (var item in results)
                                    {
                                        var  pin     = new PinsModels();
                                        var  itemPin = item as Dictionary <string, dynamic>;
                                        bool flag    = true;
                                        if (itemPin.ContainsKey("domain"))
                                        {
                                            pin.Domain = itemPin["domain"];
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("id"))
                                        {
                                            pin.ID = itemPin["id"];
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("link"))
                                        {
                                            pin.Link = itemPin["link"];
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("created_at"))
                                        {
                                            pin.Created_At = DateTime.Parse(itemPin["created_at"], new CultureInfo("en-US", true));
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("images"))
                                        {
                                            var Images = itemPin["images"] as Dictionary <string, dynamic>;
                                            if (Images != null)
                                            {
                                                foreach (var itemImg in Images)
                                                {
                                                    var Image       = itemImg.Value;
                                                    var _ImageModel = new ImageModels()
                                                    {
                                                        url    = Image["url"],
                                                        height = Convert.ToInt16(Image["height"]),
                                                        width  = Convert.ToInt16(Image["width"])
                                                    };
                                                    pin.Images.Add(_ImageModel);
                                                }
                                            }
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (flag)
                                        {
                                            model.Pins.Add(pin);
                                        }
                                    }
                                }
                            }

                            var dataBookmark = dobj["resource"]["options"];
                            if (dataBookmark != null)
                            {
                                bookmarks = dataBookmark["bookmarks"][0];
                            }
                        }
                    }

                    streamReader.Close();
                    streamReader.Dispose();
                }
            }
            catch (Exception ex) { }
            return(model);
        }
        public static CMS_CrawlerModels getDataPinterest(string url, CMS_CrawlerModels model, string pinId, ref string bookmarks)
        {
            try
            {
                Uri uri            = new Uri(url);
                var httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

                /* request need cookie & user agent */
                httpWebRequest.Headers["Cookie"] = "fr=0HZLfh0cIOmtmNqCq.AWXR_MW9yNog0CyLSTuvJhhdnGM.BajqQf.RE.AAA.0.0.BbOi7n.AWUh9bO8; sb=Cs05WwnlYymkzEg6Xn32mzc8; wd=1366x654; datr=Js05W_jbAaa1Ij5CurtBJmwC; locale=en_GB; c_user=100003324695675; xs=23%3AVia9gvMSQtiufw%3A2%3A1530514908%3A467%3A6165; pl=n; spin=r.4066324_b.trunk_t.1530514908_s.1_v.2_; act=1530541156947%2F6; presence=EDvF3EtimeF1530541148EuserFA21B03324695675A2EstateFDutF1530541148851CEchFDp_5f1B03324695675F4CC; x-src=%2Fpg%2Flifewithsunshine%2Fads%2F%7Ccontent_container; pnl_data2=eyJhIjoib25hZnRlcmxvYWQiLCJjIjoiWFBhZ2VzUHJvZmlsZUhvbWVDb250cm9sbGVyIiwiYiI6ZmFsc2UsImQiOiIvbGlmZXdpdGhzdW5zaGluZS9hZHMvIiwiZSI6W119";
                httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";

                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();

                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var answer  = streamReader.ReadToEnd();
                    var htmlDoc = new HtmlDocument();
                    htmlDoc.LoadHtml(answer);

                    /* get list scripts */
                    var scripts = htmlDoc.DocumentNode.Descendants("script").ToList();

                    var listData = new List <string>();
                    int i        = 0;
                    foreach (var script in scripts)
                    {
                        i++;
                        if (i == 3) /* ERROR IN 3TH SCRIPT */
                        {
                            break;
                        }

                        /* find pay load element */
                        var res = findElement(script.InnerHtml, "payload", 0);
                        if (!string.IsNullOrEmpty(res))
                        {
                            JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
                            dynamic dobj     = jsonSerializer.Deserialize <dynamic>(res);
                            var     htmlData = dobj["content"];
                            if (htmlData != null)
                            {
                                var xmlData = htmlData["content"];
                                if (xmlData != null)
                                {
                                    /* get list tag a */
                                    htmlDoc.LoadHtml(xmlData);
                                    var lstA = htmlDoc.DocumentNode.Descendants("a").Where(n => n.GetAttributeValue("rel", "") == "theater").ToList();
                                    foreach (var tagA in lstA)
                                    {
                                        /* GET DATA MODEL */
                                        var href    = tagA.GetAttributeValue("href", "");
                                        var ajaxify = tagA.GetAttributeValue("ajaxify", "");
                                        var fbID    = findID(ajaxify);
                                        var pin     = new PinsModels()
                                        {
                                            ID   = fbID,
                                            Link = href,
                                        };
                                        model.Pins.Add(pin);

                                        CrawlerFBDetail(href, fbID, ref pin);
                                    }
                                }
                            }
                        }
                    }
                    streamReader.Close();
                    streamReader.Dispose();
                }
            }
            catch (Exception ex)
            {
                NSLog.Logger.Error("ErrorgetDataPinterest" + "\n url: " + url + "\nBookmarks:" + bookmarks, ex);
            }
            return(model);
        }
        public static void findNode(string input, string key, int start, string fb_id, ref PinsModels pin)
        {
            var jsonfeedbacktarget = findElement(input, "feedbacktarget", 0);
            JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
            dynamic dobj       = jsonSerializer.Deserialize <dynamic>(jsonfeedbacktarget);
            var     dictionary = dobj as Dictionary <string, dynamic>;

            if (dictionary.ContainsKey("entidentifier"))
            {
                var _fb_id = dictionary["entidentifier"];
                if (fb_id.Equals(_fb_id))
                {
                    if (dictionary.ContainsKey("commentTotalCount"))
                    {
                        var commentTotalCount = Convert.ToInt16(dictionary["commentTotalCount"]);
                        pin.commentTotalCount = commentTotalCount;
                    }
                    if (dictionary.ContainsKey("reactioncount"))
                    {
                        var reactioncount = Convert.ToInt16(dictionary["reactioncount"]);
                        pin.reactioncount = reactioncount;
                    }
                    if (dictionary.ContainsKey("sharecount"))
                    {
                        var sharecount = Convert.ToInt16(dictionary["sharecount"]);
                        pin.sharecount = sharecount;
                    }
                    return;
                }
                else
                {
                    jsonfeedbacktarget = "\"feedbacktarget\":" + jsonfeedbacktarget;
                    input = input.Replace(jsonfeedbacktarget, "");
                    findNode(input, key, start, fb_id, ref pin);
                }
            }
        }
Exemple #14
0
        public bool CrawlData(string Id, string createdBy, ref string msg)
        {
            NSLog.Logger.Info("CrawlData: " + Id);
            var      model    = new CMS_CrawlerModels();
            var      sequence = 0;
            var      key      = "";
            var      _cookie  = "";
            DateTime lastdate = DateTime.Now.AddDays(-7);
            DateTime datenow  = DateTime.Now;

            var result = true;

            try
            {
                using (var _db = new CMS_Context())
                {
                    /* get key by ID */
                    var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id).FirstOrDefault();
                    if (keyWord != null)
                    {
                        sequence = keyWord.Sequence;
                        key      = keyWord.KeyWord;
                        /* check time span crawl */
                        var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate;
                        if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */
                        {
                            /* update crawer date */
                            var bkTime = keyWord.UpdatedDate;
                            keyWord.UpdatedDate = DateTime.Now;
                            keyWord.UpdatedBy   = createdBy;
                            _db.SaveChanges();

                            /* call drawler api to crawl data */
                            CMSPinFactory _fac = new CMSPinFactory();

                            var listAcc    = _db.CMS_Account.Where(o => o.Status == (byte)Commons.EStatus.Active && o.IsActive && !string.IsNullOrEmpty(o.Cookies)).ToList();
                            var listCookie = listAcc.Select(x => x.Cookies).ToList();
                            _cookie = CommonHelper.RamdomCookie(listCookie);
                            /* crawler tab post */
                            var    PageSize  = Convert.ToInt32(Commons.PageSize);
                            var    modelPost = new CMS_CrawlerModels();
                            string q         = "keywords_search(" + keyWord.KeyWord.Replace(" ", "+") + ")";
                            string ref_path  = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public";
                            //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.Post, _cookie, PageSize, ref modelPost);
                            //string q = "stories-public(stories-keyword(" + keyWord.KeyWord + "))";
                            //string ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public";
                            NSLog.Logger.Info("done crawler tab post : ", modelPost.Pins.Count);
                            if (modelPost.Pins != null && modelPost.Pins.Any())
                            {
                                model.Pins.AddRange(modelPost.Pins);
                            }
                            /* crawler tab people */
                            var modelPeople = new CMS_CrawlerModels();
                            q        = "stories-opinion(stories-keyword(" + keyWord.KeyWord + "))";
                            ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-opinion";
                            //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.People, _cookie, PageSize, ref modelPeople);
                            NSLog.Logger.Info("done crawler tab people : ", modelPeople.Pins.Count);
                            if (modelPeople.Pins != null && modelPeople.Pins.Any())
                            {
                                model.Pins.AddRange(modelPeople.Pins);
                            }

                            /* crawler tab photo */
                            var modelPhoto = new CMS_CrawlerModels();
                            q        = "photos-keyword(" + keyWord.KeyWord.Replace(" ", "+") + ")";
                            ref_path = "/search/str/" + keyWord.KeyWord.Replace(" ", "+") + "/photos-keyword";
                            CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "grid", (byte)Commons.EType.Photo, _cookie, 70, ref modelPhoto);



                            /*crawler detail tab photo */
                            PinsModels refmodelPhoto = new PinsModels();
                            var        options       = new ParallelOptions {
                                MaxDegreeOfParallelism = 10
                            };
                            //for (int i = 0; i < modelPhoto.Pins.Count; i++)
                            //{
                            //    CrawlerFBToolHelpers.CrawlerDetail(modelPhoto.Pins[i].PhotoID, _cookie, (byte)Commons.EType.Photo, ref refmodelPhoto);
                            //}
                            Parallel.ForEach(modelPhoto.Pins, options, pin =>
                            {
                                CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Photo, ref pin);
                            });
                            NSLog.Logger.Info("done crawler tab photo : ", modelPhoto.Pins.Count);
                            if (modelPhoto.Pins != null && modelPhoto.Pins.Any())
                            {
                                model.Pins.AddRange(modelPhoto.Pins);
                            }
                            var res = false;
                            if (model.Pins.Count > 0)
                            {
                                NSLog.Logger.Info("done crawler before 7 days ago : ", model.Pins.Count);
                                /* check 7 days ago */
                                model.Pins = model.Pins.Where(o => o.Created_At >= lastdate && o.Created_At <= datenow).ToList();
                                NSLog.Logger.Info("done crawler after 7 days ago : ", model.Pins.Count);

                                Parallel.ForEach(model.Pins, options, pin =>
                                {
                                    if (pin.Type != (byte)Commons.EType.Photo)
                                    {
                                        CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Post, ref pin);
                                    }
                                });

                                res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, keyWord.KeyWord, ref msg);
                            }

                            if (res == false)
                            {
                                /* back to last crawl data */
                                //keyWord.UpdatedDate = bkTime;
                                //_db.SaveChanges();
                                result = false;
                            }
                            else
                            {
                                keyWord.UpdatedDate = DateTime.Now;
                                _db.SaveChanges();
                            }
                        }
                    }
                }

                LogHelper.WriteLogs(sequence.ToString() + " " + key, "Num post: " + model.Pins.Count().ToString());
                NSLog.Logger.Info("ResponseCrawlData", result.ToString());
            }
            catch (Exception ex)
            {
                msg    = "Crawl data is unsuccessfully.";
                result = false;

                LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("ErrorCrawlData: " + Id, ex);
            }

            return(result);
        }